[
  {
    "path": ".github/workflows/ci.yml",
    "content": "# This workflow will install Python dependencies, run tests and lint with a single version of Python\n# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions\n\nname: CI tests\n\non:\n  push:\n    branches: [ master ]\n  pull_request:\n    branches: [ master ]\n\njobs:\n  build:\n\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v2\n    - name: Set up Python 3.7\n      uses: actions/setup-python@v2\n      with:\n        python-version: 3.7\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi\n        pip install .\n        python setup.py install \n    - name: Test with pytest\n      run: |\n        pytest\n"
  },
  {
    "path": "CHANGELOG",
    "content": "v0.5, 2019/07 -- Initial release.\nv0.6, 2020/03 -- Support tensorflow 2.0, tf.keras and python3.\nv0.7, 2020/03 -- Enhancemence of binary and ternary quantization.\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# How to Contribute\n\nWe'd love to accept your patches and contributions to this project. There are\njust a few small guidelines you need to follow.\n\n## Contributor License Agreement\n\nContributions to this project must be accompanied by a Contributor License\nAgreement. You (or your employer) retain the copyright to your contribution;\nthis simply gives us permission to use and redistribute your contributions as\npart of the project. Head over to <https://cla.developers.google.com/> to see\nyour current agreements on file or to sign a new one.\n\nYou generally only need to submit a CLA once, so if you've already submitted one\n(even if it was for a different project), you probably don't need to do it\nagain.\n\n## Code reviews\n\nAll submissions, including submissions by project members, require review. We\nuse GitHub pull requests for this purpose. Consult\n[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more\ninformation on using pull requests.\n\n## Community Guidelines\n\nThis project follows\n[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright 2019 The QKeras Authors.  All rights reserved.\n\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include *.txt\nrecursive-include docs *.txt\n"
  },
  {
    "path": "README.md",
    "content": "# QKeras\n\n[github.com/google/qkeras](https://github.com/google/qkeras)\n\n## Introduction\n\nQKeras is a quantization extension to Keras that provides drop-in\nreplacement for some of the Keras layers, especially the ones that\ncreates parameters and activation layers, and perform arithmetic\noperations, so that we can quickly create a deep quantized version of\nKeras network.\n\nAccording to Tensorflow documentation, Keras is a high-level API to\nbuild and train deep learning models. It's used for fast prototyping,\nadvanced research, and production, with three key advantages:\n\n- User friendly\n\nKeras has a simple, consistent interface optimized for common use\ncases. It provides clear and actionable feedback for user errors.\n\n- Modular and composable\n\nKeras models are made by connecting configurable building blocks\ntogether, with few restrictions.\n\n- Easy to extend\n\nWrite custom building blocks to express new ideas for research. Create\nnew layers, loss functions, and develop state-of-the-art models.\n\nQKeras is being designed to extend the functionality of Keras using\nKeras' design principle, i.e. being user friendly, modular and\nextensible, adding to it being \"minimally intrusive\" of Keras native\nfunctionality.\n\nIn order to successfully quantize a model, users need to replace\nvariable creating layers (Dense, Conv2D, etc) by their counterparts\n(QDense, QConv2D, etc), and any layers that perform math operations\nneed to be quantized afterwards.\n\n## Publications\n\n- Claudionor N. Coelho Jr, Aki Kuusela, Shan Li, Hao Zhuang, Jennifer Ngadiuba, Thea Klaeboe Aarrestad, Vladimir Loncar, Maurizio Pierini, Adrian Alan Pol, Sioni Summers, \"Automatic heterogeneous quantization of deep neural networks for low-latency inference on the edge for particle detectors\", Nature Machine Intelligence (2021), https://www.nature.com/articles/s42256-021-00356-5\n\n- Claudionor N. Coelho Jr., Aki Kuusela, Hao Zhuang, Thea Aarrestad, Vladimir Loncar, Jennifer Ngadiuba, Maurizio Pierini, Sioni Summers, \"Ultra Low-latency, Low-area Inference Accelerators using Heterogeneous Deep Quantization with QKeras and hls4ml\", http://arxiv.org/abs/2006.10159v1\n\n- Erwei Wang, James J. Davis, Daniele Moro, Piotr Zielinski, Claudionor Coelho, Satrajit Chatterjee, Peter Y. K. Cheung, George A. Constantinides, \"Enabling Binary Neural Network Training on the Edge\", https://arxiv.org/abs/2102.04270\n\n## Layers Implemented in QKeras\n\n- QDense\n\n- QConv1D\n\n- QConv2D\n\n- QDepthwiseConv2D\n\n- QSeparableConv1D (depthwise + pointwise convolution, without\nquantizing the activation values after the depthwise step)\n\n- QSeparableConv2D (depthwise + pointwise convolution, without\nquantizing the activation values after the depthwise step)\n\n- QMobileNetSeparableConv2D (extended from MobileNet SeparableConv2D\nimplementation, quantizes the activation values after the depthwise step)\n\n- QConv2DTranspose\n\n- QActivation\n\n- QAdaptiveActivation\n\n- QAveragePooling2D (in fact, an AveragePooling2D stacked with a \nQActivation layer for quantization of the result)\n\n- QBatchNormalization (is still in its experimental stage, as we\nhave not seen the need to use this yet due to the normalization \nand regularization effects of stochastic activation functions.)\n\n- QOctaveConv2D\n\n- QSimpleRNN, QSimpleRNNCell\n\n- QLSTM, QLSTMCell\n\n- QGRU, QGRUCell\n\n- QBidirectional\n\nIt is worth noting that not all functionality is safe at this time to\nbe used with other high-level operations, such as with layer\nwrappers. For example, Bidirectional layer wrappers are used with\nRNNs.  If this is required, we encourage users to use quantization\nfunctions invoked as strings instead of the actual functions as a way\nthrough this, but we may change that implementation in the future.\n\nA first attempt to create a safe mechanism in QKeras is the adoption\nof QActivation is a wrap-up that provides an encapsulation around the\nactivation functions so that we can save and restore the network\narchitecture, and duplicate them using Keras interface, but this\ninterface has not been fully tested yet.\n\n## Activation Layers Implemented in QKeras\n\n- smooth_sigmoid(x)\n\n- hard_sigmoid(x)\n\n- binary_sigmoid(x)\n\n- binary_tanh(x)\n\n- smooth_tanh(x)\n\n- hard_tanh(x)\n\n- quantized_bits(bits=8, integer=0, symmetric=0, keep_negative=1)(x)\n\n- bernoulli(alpha=1.0)(x)\n\n- stochastic_ternary(alpha=1.0, threshold=0.33)(x)\n\n- ternary(alpha=1.0, threshold=0.33)(x)\n\n- stochastic_binary(alpha=1.0)(x)\n\n- binary(alpha=1.0)(x)\n\n- quantized_relu(bits=8, integer=0, use_sigmoid=0, negative_slope=0.0)(x)\n\n- quantized_ulaw(bits=8, integer=0, symmetric=0, u=255.0)(x)\n\n- quantized_tanh(bits=8, integer=0, symmetric=0)(x)\n\n- quantized_po2(bits=8, max_value=-1)(x)\n\n- quantized_relu_po2(bits=8, max_value=-1)(x)\n\nThe stochastic_* functions, bernoulli as well as quantized_relu and\nquantized_tanh rely on stochastic versions of the activation\nfunctions. They draw a random number with uniform distribution from\n_hard_sigmoid of the input x, and result is based on the expected\nvalue of the activation function. Please refer to the papers if you\nwant to understand the underlying theory, or the documentation in\nqkeras/qlayers.py.\n\nThe parameters \"bits\" specify the number of bits for the quantization,\nand \"integer\" specifies how many bits of \"bits\" are to the left of the\ndecimal point. Finally, our experience in training networks with\nQSeparableConv2D, both quantized_bits and quantized_tanh that\ngenerates values between [-1, 1), required symmetric versions of the\nrange in order to properly converge and eliminate the bias.\n\nEvery time we use a quantization for weights and bias that can\ngenerate numbers outside the range [-1.0, 1.0], we need to adjust the\n*_range to the number. For example, if we have a\nquantized_bits(bits=6, integer=2) in a weight of a layer, we need to\nset the weight range to 2**2, which is equivalent to Catapult HLS\nac_fixed<6, 3, true>. Similarly, for quantization functions that accept an \nalpha parameter, we need to specify a range of alpha,\nand for po2 type of quantizers, we need to specify the range of\nmax_value.\n\n\n### Example\n\nSuppose you have the following network.\n\nAn example of a very simple network is given below in Keras.\n\n\n```python\nfrom keras.layers import *\n\nx = x_in = Input(shape)\nx = Conv2D(18, (3, 3), name=\"first_conv2d\")(x)\nx = Activation(\"relu\")(x)\nx = SeparableConv2D(32, (3, 3))(x)\nx = Activation(\"relu\")(x)\nx = Flatten()(x)\nx = Dense(NB_CLASSES)(x)\nx = Activation(\"softmax\")(x)\n```\n\nYou can easily quantize this network as follows:\n\n```python\nfrom keras.layers import *\nfrom qkeras import *\n\nx = x_in = Input(shape)\nx = QConv2D(18, (3, 3),\n        kernel_quantizer=\"stochastic_ternary\",\n        bias_quantizer=\"ternary\", name=\"first_conv2d\")(x)\nx = QActivation(\"quantized_relu(3)\")(x)\nx = QSeparableConv2D(32, (3, 3),\n        depthwise_quantizer=quantized_bits(4, 0, 1),\n        pointwise_quantizer=quantized_bits(3, 0, 1),\n        bias_quantizer=quantized_bits(3),\n        depthwise_activation=quantized_tanh(6, 2, 1))(x)\nx = QActivation(\"quantized_relu(3)\")(x)\nx = Flatten()(x)\nx = QDense(NB_CLASSES,\n        kernel_quantizer=quantized_bits(3),\n        bias_quantizer=quantized_bits(3))(x)\nx = QActivation(\"quantized_bits(20, 5)\")(x)\nx = Activation(\"softmax\")(x)\n```\n\nThe last QActivation is advisable if you want to compare results later on. \nPlease find more cases under the directory examples.\n\n\n## QTools\nThe purpose of QTools is to assist hardware implementation of the quantized\nmodel and model energy consumption estimation. QTools has two functions: data\ntype map generation and energy consumption estimation.\n\n- Data Type Map Generation:\nQTools automatically generate the data type map for weights, bias, multiplier,\nadder, etc. of each layer. The data type map includes operation type,\nvariable size, quantizer type and bits, etc. Input of the QTools is:\n1) a given quantized model;\n2) a list of input quantizers\nfor the model. Output of QTools json file that list the data type map of each\nlayer (stored in qtools_instance._output_dict)\nOutput methods include: qtools_stats_to_json, which is to output the data type\nmap to a json file; qtools_stats_print which is to print out the data type map.\n\n- Energy Consumption Estimation:\nAnother function of QTools is to estimate the model energy consumption in\nPico Joules (pJ). It provides a tool for QKeras users to quickly estimate\nenergy consumption for memory access and MAC operations in a quantized model\nderived from QKeras, especially when comparing power consumption of two models\nrunning on the same device.\n\nAs with any high-level model, it should be used with caution when attempting\nto estimate the absolute energy consumption of a model for a given technology,\nor when attempting to compare different technologies.\n\nThis tool also provides a measure for model tuning which needs to consider\nboth accuracy and model energy consumption. The energy cost provided by this\ntool can be integrated into a total loss function which combines energy\ncost and accuracy.\n\n- Energy Model:\nThe best work referenced by the literature on energy consumption was first\ncomputed by Horowitz M.: “1.1 computing’s energy problem (\nand what we can do about it)”; IEEE International Solid-State Circuits\nConference Digest of Technical Papers (ISSCC), 2014\n\nIn this work, the author attempted to estimate the energy\nconsumption for accelerators, and for 45 nm process, the data points he\npresented has since been used whenever someone wants to compare accelerator\nperformance. QTools energy consumption on a 45nm process is based on the\ndata published in this work.\n\n- Examples:\nExample of how to generate data type map can be found in qkeras/qtools/\nexamples/example_generate_json.py. Example of how to generate energy consumption\nestimation can be found in qkeras/qtools/examples/example_get_energy.py\n\n\n## AutoQKeras\n\nAutoQKeras allows the automatic quantization and rebalancing of deep neural\nnetworks by treating quantization and rebalancing of an existing deep neural\nnetwork as a hyperparameter search in Keras-Tuner using random search,\nhyperband or gaussian processes.\n\nIn order to contain the explosion of hyperparameters, users can group tasks by\npatterns, and perform distribute training using available resources.\n\nExtensive documentation is present in notebook/AutoQKeras.ipynb.\n\n\n## Related Work\n\nQKeras has been implemented based on the work of \"B.Moons et al. -\nMinimum Energy Quantized Neural Networks\", Asilomar Conference on\nSignals, Systems and Computers, 2017 and \"Zhou, S. et al. -\nDoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with\nLow Bitwidth Gradients,\" but the framework should be easily\nextensible. The original code from QNN can be found below.\n\nhttps://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\n\nQKeras extends QNN by providing a richer set of layers (including\nSeparableConv2D, DepthwiseConv2D, ternary and stochastic ternary\nquantizations), besides some functions to aid the estimation for the\naccumulators and conversion between non-quantized to quantized\nnetworks. Finally, our main goal is easy of use, so we attempt to make\nQKeras layers a true drop-in replacement for Keras, so that users can\neasily exchange non-quantized layers by quantized ones.\n\n### Acknowledgements\n\nPortions of QKeras were derived from QNN.\n\nhttps://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\n\nCopyright (c) 2017, Bert Moons where it applies\n\n"
  },
  {
    "path": "examples/example_act.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Example the usage of activation functions in qkeras.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport warnings\nimport numpy as np\n\nimport tensorflow as tf\nimport tensorflow.keras.backend as K\n\nfrom qkeras import binary\nfrom qkeras import bernoulli\nfrom qkeras import hard_sigmoid\nfrom qkeras import hard_tanh\nfrom qkeras import quantized_bits\nfrom qkeras import quantized_relu\nfrom qkeras import quantized_tanh\nfrom qkeras import quantized_po2\nfrom qkeras import quantized_relu_po2\nfrom qkeras import set_internal_sigmoid\nfrom qkeras import smooth_sigmoid\nfrom qkeras import smooth_tanh\nfrom qkeras import stochastic_binary\nfrom qkeras import stochastic_ternary\nfrom qkeras import ternary\n\n\ndef main():\n  # check the mean value of samples from stochastic_rounding for po2\n  np.random.seed(42)\n  count = 100000\n  val = 42\n  a = K.constant([val] * count)\n  b = quantized_po2(use_stochastic_rounding=True)(a)\n  res = np.sum(K.eval(b)) / count\n  print(res, \"should be close to \", val)\n  b = quantized_relu_po2(use_stochastic_rounding=True)(a)\n  res = np.sum(K.eval(b)) / count\n  print(res, \"should be close to \", val)\n  a = K.constant([-1] * count)\n  b = quantized_relu_po2(use_stochastic_rounding=True)(a)\n  res = np.sum(K.eval(b)) / count\n  print(res, \"should be all \", 0)\n\n  # non-stochastic rounding quantizer.\n  a = K.constant([-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0])\n  a = K.constant([0.194336])\n  print(\" a =\", K.eval(a).astype(np.float16))\n  print(\"qa =\", K.eval(quantized_relu(6,2)(a)).astype(np.float16))\n  print(\"ss =\", K.eval(smooth_sigmoid(a)).astype(np.float16))\n  print(\"hs =\", K.eval(hard_sigmoid(a)).astype(np.float16))\n  print(\"ht =\", K.eval(hard_tanh(a)).astype(np.float16))\n  print(\"st =\", K.eval(smooth_tanh(a)).astype(np.float16))\n  c = K.constant(np.arange(-1.5, 1.51, 0.3))\n  print(\" c =\", K.eval(c).astype(np.float16))\n  print(\"qb_111 =\", K.eval(quantized_bits(1,1,1)(c)).astype(np.float16))\n  print(\"qb_210 =\", K.eval(quantized_bits(2,1,0)(c)).astype(np.float16))\n  print(\"qb_211 =\", K.eval(quantized_bits(2,1,1)(c)).astype(np.float16))\n  print(\"qb_300 =\", K.eval(quantized_bits(3,0,0)(c)).astype(np.float16))\n  print(\"qb_301 =\", K.eval(quantized_bits(3,0,1)(c)).astype(np.float16))\n  c_1000 = K.constant(np.array([list(K.eval(c))] * 1000))\n  b = np.sum(K.eval(bernoulli()(c_1000)).astype(np.int32), axis=0) / 1000.0\n  print(\"       hs =\", K.eval(hard_sigmoid(c)).astype(np.float16))\n  print(\"    b_all =\", b.astype(np.float16))\n  T = 0.0\n  t = K.eval(stochastic_ternary(alpha=\"auto\")(c_1000))\n  for i in range(10):\n    print(\"stochastic_ternary({}) =\".format(i), t[i])\n  print(\"   st_all =\", np.round(\n      np.sum(t.astype(np.float32), axis=0).astype(np.float16) /\n      1000.0, 2).astype(np.float16))\n  print(\"  ternary =\", K.eval(ternary(threshold=0.5)(c)).astype(np.int32))\n  c = K.constant(np.arange(-1.5, 1.51, 0.3))\n  print(\" c =\", K.eval(c).astype(np.float16))\n  print(\" b_10 =\", K.eval(binary(1)(c)).astype(np.float16))\n  print(\"qr_10 =\", K.eval(quantized_relu(1,0)(c)).astype(np.float16))\n  print(\"qr_11 =\", K.eval(quantized_relu(1,1)(c)).astype(np.float16))\n  print(\"qr_20 =\", K.eval(quantized_relu(2,0)(c)).astype(np.float16))\n  print(\"qr_21 =\", K.eval(quantized_relu(2,1)(c)).astype(np.float16))\n  print(\"qr_101 =\", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16))\n  print(\"qr_111 =\", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16))\n  print(\"qr_201 =\", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16))\n  print(\"qr_211 =\", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16))\n  print(\"qt_200 =\", K.eval(quantized_tanh(2,0)(c)).astype(np.float16))\n  print(\"qt_210 =\", K.eval(quantized_tanh(2,1)(c)).astype(np.float16))\n  print(\"qt_201 =\", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16))\n  print(\"qt_211 =\", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16))\n  set_internal_sigmoid(\"smooth\"); print(\"with smooth sigmoid\")\n  print(\"qr_101 =\", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16))\n  print(\"qr_111 =\", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16))\n  print(\"qr_201 =\", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16))\n  print(\"qr_211 =\", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16))\n  print(\"qt_200 =\", K.eval(quantized_tanh(2,0)(c)).astype(np.float16))\n  print(\"qt_210 =\", K.eval(quantized_tanh(2,1)(c)).astype(np.float16))\n  print(\"qt_201 =\", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16))\n  print(\"qt_211 =\", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16))\n  set_internal_sigmoid(\"real\"); print(\"with real sigmoid\")\n  print(\"qr_101 =\", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16))\n  print(\"qr_111 =\", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16))\n  print(\"qr_201 =\", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16))\n  print(\"qr_211 =\", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16))\n  print(\"qt_200 =\", K.eval(quantized_tanh(2,0)(c)).astype(np.float16))\n  print(\"qt_210 =\", K.eval(quantized_tanh(2,1)(c)).astype(np.float16))\n  print(\"qt_201 =\", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16))\n  print(\"qt_211 =\", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16))\n  set_internal_sigmoid(\"hard\")\n  print(\" c =\", K.eval(c).astype(np.float16))\n  print(\"q2_31 =\", K.eval(quantized_po2(3,1)(c)).astype(np.float16))\n  print(\"q2_32 =\", K.eval(quantized_po2(3,2)(c)).astype(np.float16))\n  print(\"qr2_21 =\", K.eval(quantized_relu_po2(2,1)(c)).astype(np.float16))\n  print(\"qr2_22 =\", K.eval(quantized_relu_po2(2,2)(c)).astype(np.float16))\n  print(\"qr2_44 =\", K.eval(quantized_relu_po2(4,1)(c)).astype(np.float16))\n\n  # stochastic rounding\n  c = K.constant(np.arange(-1.5, 1.51, 0.3))\n  print(\"q2_32_2 =\", K.eval(quantized_relu_po2(32,2)(c)).astype(np.float16))\n  b = K.eval(stochastic_binary()(c_1000)).astype(np.int32)\n  for i in range(5):\n    print(\"sbinary({}) =\".format(i), b[i])\n  print(\"sbinary =\", np.round(np.sum(b, axis=0) / 1000.0, 2).astype(np.float16))\n  print(\" binary =\", K.eval(binary()(c)).astype(np.int32))\n  print(\" c      =\", K.eval(c).astype(np.float16))\n  for i in range(10):\n    print(\" s_bin({}) =\".format(i),\n          K.eval(binary(use_stochastic_rounding=1)(c)).astype(np.int32))\n  for i in range(10):\n    print(\" s_po2({}) =\".format(i),\n          K.eval(quantized_po2(use_stochastic_rounding=1)(c)).astype(np.int32))\n  for i in range(10):\n    print(\n        \" s_relu_po2({}) =\".format(i),\n        K.eval(quantized_relu_po2(use_stochastic_rounding=1)(c)).astype(\n            np.int32))\n\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "examples/example_b2t.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements total/partial Binary to Thermometer decoder.\"\"\"\n\nimport numpy as np\nfrom qkeras import BinaryToThermometer\n\nif __name__ == \"__main__\":\n  np.random.seed(42)\n  x = np.array(range(8))\n  b = BinaryToThermometer(x, 2, 8)\n  print(b)\n  b = BinaryToThermometer(x, 2, 8, 1)\n  print(b)\n  b = BinaryToThermometer(x, 2, 8, 1, use_two_hot_encoding=1)\n  print(b)\n  b = BinaryToThermometer(x, 4, 8)\n  print(b)\n  b = BinaryToThermometer(x, 4, 8, 1)\n  print(b)\n  b = BinaryToThermometer(x, 4, 8, 1, use_two_hot_encoding=1)\n  print(b)\n  x = np.random.randint(0, 255, (100, 28, 28, 1))\n  print(x[0, 0, 0:5])\n  b = BinaryToThermometer(x, 8, 256, 0)\n  print(x.shape, b.shape)\n  print(b[0, 0, 0:5])\n  b = BinaryToThermometer(x, 8, 256, 1)\n  print(b[0, 0, 0:5])\n  x = np.random.randint(0, 255, (100, 28, 28, 2))\n  b = BinaryToThermometer(x, 8, 256, 0, 1)\n  print(x.shape, b.shape)\n  print(x[0, 0, 0, 0:2])\n  print(b[0, 0, 0, 0:8])\n  print(b[0, 0, 0, 8:16])\n"
  },
  {
    "path": "examples/example_cifar10_po2.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests qcore model with po2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nfrom collections import defaultdict\n\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import cifar10\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import *\nfrom tensorflow.keras.utils import to_categorical\nimport numpy as np\n\nfrom qkeras import *\n\nnp.random.seed(42)\n\nNB_EPOCH = 50\nBATCH_SIZE = 64\nVERBOSE = 1\nNB_CLASSES = 10\nOPTIMIZER = Adam(lr=0.0001)\nVALIDATION_SPLIT = 0.1\n\n(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n\nx_train = x_train.astype(\"float32\")\nx_test = x_test.astype(\"float32\")\n\nx_train /= 255.0\nx_test /= 255.0\n\nprint(x_train.shape[0], \"train samples\")\nprint(x_test.shape[0], \"test samples\")\n\nprint(y_train[0:10])\n\ny_train = to_categorical(y_train, NB_CLASSES)\ny_test = to_categorical(y_test, NB_CLASSES)\n\nx = x_in = Input(x_train.shape[1:], name=\"input\")\nx = QActivation(\"quantized_relu_po2(4,4)\", name=\"acti\")(x)\nx = QConv2D(\n    128, (3, 3),\n    strides=1,\n    kernel_quantizer=quantized_po2(4, 1),\n    bias_quantizer=quantized_po2(4, 4),\n    bias_range=4,\n    name=\"conv2d_0_m\")(\n        x)\nx = QActivation(\"ternary()\", name=\"act0_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_0\")(x)\nx = QConv2D(\n    256, (3, 3),\n    strides=1,\n    kernel_quantizer=quantized_po2(4, 1),\n    bias_quantizer=quantized_po2(4, 4),\n    bias_range=4,\n    name=\"conv2d_1_m\")(\n        x)\nx = QActivation(\"quantized_relu(6,2)\", name=\"act1_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_1\")(x)\nx = QConv2D(\n    128, (3, 3),\n    strides=1,\n    kernel_quantizer=quantized_bits(4, 0, 1),\n    bias_quantizer=quantized_bits(4, 0, 1),\n    name=\"conv2d_2_m\")(\n        x)\nx = QActivation(\"quantized_relu(4,2)\", name=\"act2_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_2\")(x)\nx = Flatten()(x)\nx = QDense(\n    NB_CLASSES,\n    kernel_quantizer=quantized_ulaw(4, 0, 1),\n    bias_quantizer=quantized_bits(4, 0, 1),\n    name=\"dense\")(\n        x)\nx = Activation(\"softmax\", name=\"softmax\")(x)\n\nmodel = Model(inputs=[x_in], outputs=[x])\nmodel.summary()\n\nmodel.compile(\n    loss=\"categorical_crossentropy\", optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\nif int(os.environ.get(\"TRAIN\", 0)):\n\n  history = model.fit(\n      x_train, y_train, batch_size=BATCH_SIZE,\n      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,\n      validation_split=VALIDATION_SPLIT)\n\n  outputs = []\n  output_names = []\n\n  for layer in model.layers:\n    if layer.__class__.__name__ in [\n        \"QActivation\", \"Activation\", \"QDense\", \"QConv2D\", \"QDepthwiseConv2D\"\n    ]:\n      output_names.append(layer.name)\n      outputs.append(layer.output)\n\n  model_debug = Model(inputs=[x_in], outputs=outputs)\n\n  outputs = model_debug.predict(x_train)\n\n  print(\"{:30} {: 8.4f} {: 8.4f}\".format(\n      \"input\", np.min(x_train), np.max(x_train)))\n\n  for n, p in zip(output_names, outputs):\n    print(\"{:30} {: 8.4f} {: 8.4f}\".format(n, np.min(p), np.max(p)), end=\"\")\n    layer = model.get_layer(n)\n    for i, weights in enumerate(layer.get_weights()):\n      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))\n      print(\" ({: 8.4f} {: 8.4f})\".format(np.min(weights), np.max(weights)),\n            end=\"\")\n      print(\"\")\n\n  score = model.evaluate(x_test, y_test, verbose=VERBOSE)\n  print(\"Test score:\", score[0])\n  print(\"Test accuracy:\", score[1])\n\nmodel.summary()\n\nprint_qstats(model)\n"
  },
  {
    "path": "examples/example_keras_to_qkeras.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests automatic conversion of keras model to qkeras.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom collections import defaultdict\n\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import Model\n\nfrom qkeras.estimate import print_qstats\nfrom qkeras.utils import model_quantize\nfrom qkeras.utils import quantized_model_dump\n\nx0 = x_in0 = Input((28, 28, 1), name=\"input0\")\nx1 = x_in1 = Input((28, 28, 1), name=\"input1\")\nx = Concatenate(name=\"concat\")([x0, x1])\nx = Conv2D(128, (3, 3), strides=1, name=\"conv2d_0_m\")(x)\nx = Activation(\"relu\", name=\"act0_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_0\")(x)\nx = Conv2D(256, (3, 3), strides=1, name=\"conv2d_1_m\")(x)\nx = Activation(\"relu\", name=\"act1_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_1\")(x)\nx = Conv2D(128, (3, 3), strides=1, name=\"conv2d_2_m\")(x)\nx = Activation(\"relu\", name=\"act2_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_2\")(x)\nx = Flatten()(x)\nx = Dense(10, name=\"dense\")(x)\nx = Activation(\"softmax\", name=\"softmax\")(x)\n\nmodel = Model(inputs=[x_in0, x_in1], outputs=[x])\nmodel.summary()\n\nq_dict = {\n    \"conv2d_0_m\": {\n        \"kernel_quantizer\": \"binary()\",\n        \"bias_quantizer\": \"quantized_bits(4,0,1)\"\n    },\n    \"conv2d_1_m\": {\n        \"kernel_quantizer\": \"ternary()\",\n        \"bias_quantizer\": \"quantized_bits(4,0,1)\"\n    },\n    \"act2_m\": \"quantized_relu(6,2)\",\n    \"QActivation\": {\n        \"relu\": \"quantized_relu(4,0)\"\n    },\n    \"QConv2D\": {\n        \"kernel_quantizer\": \"quantized_bits(4,0,1)\",\n        \"bias_quantizer\": \"quantized_bits(4,0,1)\"\n    },\n    \"QDense\": {\n        \"kernel_quantizer\": \"quantized_bits(3,0,1)\",\n        \"bias_quantizer\": \"quantized_bits(3,0,1)\"\n    }\n}\n\nqmodel = model_quantize(model, q_dict, 4)\n\nqmodel.summary()\n\nprint_qstats(qmodel)\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nx_test_arr = [x_test[0:10,:], x_test[0:10,:]]\n\nquantized_model_dump(\n    qmodel, x_test_arr,\n    layers_to_dump=[\"input0\", \"input1\", \"act2_m\", \"act1_m\", \"act0_m\"])\n\n"
  },
  {
    "path": "examples/example_mnist.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"uses po2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nfrom collections import defaultdict\n\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.optimizers import SGD\nfrom tensorflow.keras.utils import to_categorical\n\nfrom qkeras import *\nfrom qkeras.utils import model_save_quantized_weights\n\n\nimport numpy as np\nimport tensorflow.compat.v1 as tf\n\nnp.random.seed(42)\n\nNB_EPOCH = 100\nBATCH_SIZE = 64\nVERBOSE = 1\nNB_CLASSES = 10\nOPTIMIZER = Adam(lr=0.0001, decay=0.000025)\nVALIDATION_SPLIT = 0.1\n\ntrain = 1\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nRESHAPED = 784\n\nx_test_orig = x_test\n\nx_train = x_train.astype(\"float32\")\nx_test = x_test.astype(\"float32\")\n\nx_train = x_train[..., np.newaxis]\nx_test = x_test[..., np.newaxis]\n\nx_train /= 256.0\nx_test /= 256.0\n\nprint(x_train.shape[0], \"train samples\")\nprint(x_test.shape[0], \"test samples\")\n\nprint(y_train[0:10])\n\ny_train = to_categorical(y_train, NB_CLASSES)\ny_test = to_categorical(y_test, NB_CLASSES)\n\nx = x_in = Input(\n    x_train.shape[1:-1] + (1,), name=\"input\")\nx = QConv2D(\n    32, (2, 2), strides=(2,2),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1),\n    name=\"conv2d_0_m\")(x)\nx = QActivation(\"quantized_relu(4,0)\", name=\"act0_m\")(x)\nx = QConv2D(\n    64, (3, 3), strides=(2,2),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1),\n    name=\"conv2d_1_m\")(x)\nx = QActivation(\"quantized_relu(4,0)\", name=\"act1_m\")(x)\nx = QConv2D(\n    64, (2, 2), strides=(2,2),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1),\n    name=\"conv2d_2_m\")(x)\nx = QActivation(\"quantized_relu(4,0)\", name=\"act2_m\")(x)\nx = Flatten()(x)\nx = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4,0,1),\n           bias_quantizer=quantized_bits(4,0,1),\n           name=\"dense\")(x)\nx_out = x\nx = Activation(\"softmax\", name=\"softmax\")(x)\n\nmodel = Model(inputs=[x_in], outputs=[x])\nmo = Model(inputs=[x_in], outputs=[x_out])\nmodel.summary()\n\nmodel.compile(\n    loss=\"categorical_crossentropy\", optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\nif train:\n\n  history = model.fit(\n      x_train, y_train, batch_size=BATCH_SIZE,\n      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,\n      validation_split=VALIDATION_SPLIT)\n\n  outputs = []\n  output_names = []\n\n  for layer in model.layers:\n    if layer.__class__.__name__ in [\"QActivation\", \"Activation\",\n                                  \"QDense\", \"QConv2D\", \"QDepthwiseConv2D\"]:\n      output_names.append(layer.name)\n      outputs.append(layer.output)\n\n  model_debug = Model(inputs=[x_in], outputs=outputs)\n\n  outputs = model_debug.predict(x_train)\n\n  print(\"{:30} {: 8.4f} {: 8.4f}\".format(\n      \"input\", np.min(x_train), np.max(x_train)))\n\n  for n, p in zip(output_names, outputs):\n    print(\"{:30} {: 8.4f} {: 8.4f}\".format(n, np.min(p), np.max(p)), end=\"\")\n    layer = model.get_layer(n)\n    for i, weights in enumerate(layer.get_weights()):\n      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))\n      print(\" ({: 8.4f} {: 8.4f})\".format(np.min(weights), np.max(weights)),\n            end=\"\")\n      print(\"\")\n\n  p_test = mo.predict(x_test)\n  p_test.tofile(\"p_test.bin\")\n\n  score = model.evaluate(x_test, y_test, verbose=VERBOSE)\n  print(\"Test score:\", score[0])\n  print(\"Test accuracy:\", score[1])\n\n  all_weights = []\n  model_save_quantized_weights(model)\n\n  for layer in model.layers:\n    for w, weights in enumerate(layer.get_weights()):\n      print(layer.name, w)\n      all_weights.append(weights.flatten())\n\n  all_weights = np.concatenate(all_weights).astype(np.float32)\n  print(all_weights.size)\n\n\nfor layer in model.layers:\n  for w, weight in enumerate(layer.get_weights()):\n    print(layer.name, w, weight.shape)\n\nprint_qstats(model)\n"
  },
  {
    "path": "examples/example_mnist_ae.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"uses po2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nfrom collections import defaultdict\n\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.optimizers import SGD\nfrom tensorflow.keras.utils import to_categorical\n\nfrom qkeras import *\nfrom qkeras.utils import model_save_quantized_weights\n\n\nimport numpy as np\nimport tensorflow.compat.v1 as tf\n\nnp.random.seed(42)\n\nNB_EPOCH = 100\nBATCH_SIZE = 64\nVERBOSE = 1\nNB_CLASSES = 10\nOPTIMIZER = Adam(lr=0.0001, decay=0.000025)\nVALIDATION_SPLIT = 0.1\n\ntrain = 1\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nRESHAPED = 784\n\nx_train = x_train.astype(\"float32\")\nx_test = x_test.astype(\"float32\")\n\nx_train = x_train[..., np.newaxis]\nx_test = x_test[..., np.newaxis]\n\nx_train /= 256.0\nx_test /= 256.0\n\nprint(x_train.shape[0], \"train samples\")\nprint(x_test.shape[0], \"test samples\")\n\nprint(y_train[0:10])\n\ny_train = to_categorical(y_train, NB_CLASSES)\ny_test = to_categorical(y_test, NB_CLASSES)\n\nx = x_in = Input(\n    x_train.shape[1:-1] + (1,))\nx = QConv2D(\n    32,\n    kernel_size=(3, 3),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx = QActivation(\"quantized_relu(4,0)\")(x)\nx = QConv2D(\n    16,\n    kernel_size=(3, 3),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx = QActivation(\"quantized_relu(4,0)\")(x)\nx = QConv2D(\n    8,\n    kernel_size=(3, 3),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx = QActivation(\"quantized_relu(4,0)\")(x)\nx = QConv2DTranspose(\n    8,\n    kernel_size=(3, 3),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx = QActivation(\"quantized_relu(4,0)\")(x)\nx = QConv2DTranspose(\n    16,\n    kernel_size=(3, 3),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx = QActivation(\"quantized_relu(4,0)\")(x)\nx = QConv2DTranspose(\n    32,\n    kernel_size=(3, 3),\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx = QActivation(\"quantized_relu(4,0)\")(x)\nx = QConv2D(\n    1,\n    kernel_size=(3, 3),\n    padding=\"same\",\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(4,0,1))(x)\nx_out = x\nx = Activation(\"sigmoid\")(x)\n\nmodel = Model(inputs=[x_in], outputs=[x])\nmo = Model(inputs=[x_in], outputs=[x_out])\nmodel.summary()\n\nmodel.compile(\n    loss=\"binary_crossentropy\", optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\nif train:\n\n  history = model.fit(\n      x_train, x_train, batch_size=BATCH_SIZE,\n      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,\n      validation_split=VALIDATION_SPLIT)\n\n  # Generate reconstructions\n  num_reco = 8\n  samples = x_test[:num_reco]\n  targets = y_test[:num_reco]\n  reconstructions = model.predict(samples)\n\n\nfor layer in model.layers:\n  for w, weight in enumerate(layer.get_weights()):\n    print(layer.name, w, weight.shape)\n\nprint_qstats(model)\n"
  },
  {
    "path": "examples/example_mnist_b2t.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests qcore model with BinaryToThermometer.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\n\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.optimizers import SGD\nfrom tensorflow.keras.utils import to_categorical\nimport numpy as np\n\nfrom qkeras import *\n\nnp.random.seed(42)\n\nNB_EPOCH = 20\nBATCH_SIZE = 32\nVERBOSE = 1\nNB_CLASSES = 10\nOPTIMIZER = Adam(lr=0.0001)\nN_HIDDEN = 100\nVALIDATION_SPLIT = 0.1\n\nT_CLASSES = 256\nT_WITH_RESIDUE = 0\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nRESHAPED = 784\n\nx_train = x_train.astype(\"float32\")\nx_test = x_test.astype(\"float32\")\n\nx_train = x_train[..., np.newaxis]\nx_test = x_test[..., np.newaxis]\n\nif T_CLASSES == 1:\n  x_train /= 256.0\n  x_test /= 256.0\n\nprint(x_train.shape[0], \"train samples\")\nprint(x_test.shape[0], \"test samples\")\n\nprint(y_train[0:10])\n\n# x_train = x_train[0:1000]\n# y_train = y_train[0:1000]\n# x_test = x_test[0:100]\n# y_test = y_test[0:100]\n\ny_train = to_categorical(y_train, NB_CLASSES)\ny_test = to_categorical(y_test, NB_CLASSES)\n\n# we ran out of memory here, so we split x_train/x_test into smaller groups\n\nx = x_in = Input(\n    x_train.shape[1:-1] + (T_CLASSES,), name=\"input\")\n\n# Number is represented as 1.bbb, where number of bits of bbb is\n# log2(256/T_CLASSES) if T_WITH_RESIDUE == 1\n\nbits = (\n    (T_WITH_RESIDUE == 1) * int(np.ceil(np.log2(256/T_CLASSES))) +\n    (T_CLASSES > 1)\n)\n\nprint(\"Input quantizer: quantized_relu({},{})\".format(bits, int(T_CLASSES > 1)))\nx = QActivation(\"quantized_relu({},{})\".format(bits, int(T_CLASSES > 1)))(x)\nx = QConv2D(\n    64, (3, 3), strides=1, padding=\"same\",\n    kernel_quantizer=quantized_po2(4,1),\n    bias_quantizer=quantized_bits(4,2,1),\n    bias_range=4,\n    name=\"conv2d_0_m\")(x)\nx = QActivation(\"quantized_relu(4,0)\", name=\"act0_m\")(x)\nx = MaxPooling2D(2,2,name=\"mp_0\")(x)\nx = QConv2D(\n    32, (3, 3), strides=1, padding=\"same\",\n    kernel_quantizer=stochastic_ternary(),\n    bias_quantizer=quantized_bits(8,5,1),\n    bias_range=32,\n    name=\"conv2d_1_m\")(x)\nx = QActivation(\"quantized_relu(4,0)\", name=\"act1_m\")(x)\nx = MaxPooling2D(2,2,name=\"mp_1\")(x)\nx = QConv2D(\n    16, (3, 3), strides=1, padding=\"same\",\n    kernel_quantizer=quantized_bits(4,0,1),\n    bias_quantizer=quantized_bits(8,5,1),\n    bias_range=32,\n    name=\"conv2d_2_m\")(x)\nx = QActivation(\"quantized_relu(6,2)\", name=\"act2_m\")(x)\nx = MaxPooling2D(2,2,name=\"mp_2\")(x)\nx = Flatten()(x)\nx = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4,0,1),\n           bias_quantizer=quantized_bits(4,0,1),\n           name=\"dense2\")(x)\nx = Activation(\"softmax\", name=\"softmax\")(x)\n\nmodel = Model(inputs=[x_in], outputs=[x])\nmodel.summary()\n\nmodel.compile(\n    loss=\"categorical_crossentropy\", optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\noutputs = []\noutput_names = []\n\nfor layer in model.layers:\n  if layer.__class__.__name__ in [\"QActivation\", \"Activation\",\n                                  \"QDense\", \"QConv2D\", \"QDepthwiseConv2D\"]:\n    output_names.append(layer.name)\n    outputs.append(layer.output)\n\nmodel_debug = Model(inputs=[x_in], outputs=outputs)\n\nbatch_size = 1000 * BATCH_SIZE\nn_batches = x_train.shape[0] // batch_size\n\nif T_CLASSES > 1:\n  x_test = BinaryToThermometer(x_test, T_CLASSES, 256, T_WITH_RESIDUE)\n\nif int(os.environ.get(\"TRAIN\", 0)):\n\n  for i in range(NB_EPOCH):\n    for b in range(n_batches):\n\n      min_b = b * batch_size\n      max_b = (b + 1) * batch_size\n      if max_b > x_train.shape[0]:\n        max_b = x_train.shape[0]\n\n      if T_CLASSES > 1:\n        x = BinaryToThermometer(\n            x_train[min_b:max_b], T_CLASSES, 256, T_WITH_RESIDUE)\n      else:\n        x = x_train[min_b:max_b]\n\n      history = model.fit(\n          x, y_train[min_b:max_b], batch_size=BATCH_SIZE,\n          epochs=i+1, initial_epoch=i, verbose=VERBOSE,\n          validation_split=VALIDATION_SPLIT)\n\n  if T_CLASSES > 1:\n    x = BinaryToThermometer(x_train[0:100], T_CLASSES, 256, T_WITH_RESIDUE)\n  else:\n    x = x_train[0:100]\n\n  outputs = model_debug.predict(x)\n\n  print(\"{:30} {: 8.4f} {: 8.4f}\".format(\"input\", np.min(x), np.max(x)))\n  for n, p in zip(output_names, outputs):\n    print(\"{:30} {: 8.4f} {: 8.4f}\".format(n, np.min(p), np.max(p)), end=\"\")\n    layer = model.get_layer(n)\n    for i, weights in enumerate(layer.get_weights()):\n      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))\n      print(\" ({: 8.4f} {: 8.4f})\".format(np.min(weights), np.max(weights)),\n            end=\"\")\n    print(\"\")\n\n  score = model.evaluate(x_test, y_test, verbose=VERBOSE)\n  print(\"Test score:\", score[0])\n  print(\"Test accuracy:\", score[1])\n\nprint_qstats(model)\n\nacc = analyze_accumulator_from_sample(model, x_test, mode=\"sampled\")\n\nprint(acc)\n\n\n"
  },
  {
    "path": "examples/example_mnist_bn.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests mnist batchnormalization used as learned scale factor.\"\"\"\n\n# to run, THRESHOLD=0.05 WITH_BN=1 EPOCHS=5 TRAIN=1 python example_mnist_bn.py\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom collections import defaultdict\nimport os\n\nimport numpy as np\nfrom six.moves import zip\nfrom tensorflow.keras import callbacks\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import *\nfrom tensorflow.keras.utils import to_categorical\n\nfrom qkeras import *\n\nnp.random.seed(42)\n\nTRAIN = 1\nNB_EPOCH = 2\nBATCH_SIZE = 64\nVERBOSE = 1\nNB_CLASSES = 10\nOPTIMIZER = Adam(lr=0.0001)\nVALIDATION_SPLIT = 0.1\nWITH_BN = 1\nTHRESHOLD = 0.1\n\n\nclass LearningRateAdjuster(callbacks.Callback):\n  def __init__(self):\n    self.learning_rate_factor = 1.0\n    pass\n\n  def on_epoch_end(self, epochs, logs):\n    max_variance = -1\n\n    for layer in self.model.layers:\n      if layer.__class__.__name__ in [\n          \"BatchNormalization\",\n          \"QBatchNormalization\"\n      ]:\n        variance = np.max(layer.get_weights()[-1])\n        if variance > max_variance:\n          max_variance = variance\n\n    if max_variance > 32 and self.learning_rate_factor < 100:\n      learning_rate = K.get_value(self.model.optimizer.learning_rate)\n      self.learning_rate_factor /= 2.0\n      print(\"***** max_variance is {} / lr is {} *****\".format(\n          max_variance, learning_rate))\n      K.eval(K.update(\n          self.model.optimizer.learning_rate, learning_rate / 2.0\n      ))\n\nlra = LearningRateAdjuster()\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nx_train = x_train.reshape(x_train.shape + (1,)).astype(\"float32\")\nx_test = x_test.reshape(x_test.shape + (1,)).astype(\"float32\")\n\nx_train /= 256.0\nx_test /= 256.0\n\nprint(x_train.shape[0], \"train samples\")\nprint(x_test.shape[0], \"test samples\")\n\nprint(y_train[0:10])\n\ny_train = to_categorical(y_train, NB_CLASSES)\ny_test = to_categorical(y_test, NB_CLASSES)\n\nx = x_in = Input(x_train.shape[1:], name=\"input\")\n#x = QActivation(\"quantized_relu_po2(4,1)\", name=\"acti\")(x)\nx = QConv2D(\n    128, (3, 3),\n    strides=1,\n    kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_po2(4, 1),\n    bias_quantizer=quantized_bits(4,2,0) if not WITH_BN else None,\n    bias_range=4 if not WITH_BN else None,\n    use_bias=not WITH_BN,\n    name=\"conv2d_0_m\")(x)\nif WITH_BN:\n  x = QBatchNormalization(\n      gamma_quantizer=quantized_relu_po2(4,8),\n      variance_quantizer=quantized_relu_po2(6),\n      beta_quantizer=quantized_po2(4, 4),\n      gamma_range=8,\n      beta_range=4,\n      name=\"bn0\")(x)\nx = QActivation(\"quantized_relu(3,1)\", name=\"act0_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_0\")(x)\nx = QConv2D(\n    256, (3, 3),\n    strides=1,\n    kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_bits(2,0,1),\n    bias_quantizer=quantized_bits(4,2,1) if not WITH_BN else None,\n    bias_range=4 if not WITH_BN else None,\n    use_bias=not WITH_BN,\n    name=\"conv2d_1_m\")(x)\nif WITH_BN:\n  x = QBatchNormalization(\n      gamma_quantizer=quantized_relu_po2(4,8),\n      variance_quantizer=quantized_relu_po2(6),\n      beta_quantizer=quantized_po2(4, 4),\n      gamma_range=8,\n      beta_range=4,\n      name=\"bn1\")(x)\nx = QActivation(\"quantized_relu(3,1)\", name=\"act1_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_1\")(x)\nx = QConv2D(\n    128, (3, 3),\n    strides=1,\n    kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_bits(2,0,1),\n    bias_quantizer=quantized_bits(4,2,1) if not WITH_BN else None,\n    bias_range=4 if not WITH_BN else None,\n    use_bias=not WITH_BN,\n    name=\"conv2d_2_m\")(x)\nif WITH_BN:\n  x = QBatchNormalization(\n      gamma_quantizer=quantized_relu_po2(4,8),\n      variance_quantizer=quantized_relu_po2(6),\n      beta_quantizer=quantized_po2(4, 4),\n      gamma_range=8,\n      beta_range=4,\n      name=\"bn2\")(x)\nx = QActivation(\"quantized_relu(3,1)\", name=\"act2_m\")(x)\nx = MaxPooling2D(2, 2, name=\"mp_2\")(x)\nx = Flatten()(x)\nx = QDense(\n    NB_CLASSES,\n    kernel_quantizer=quantized_ulaw(4, 0, 1),\n    bias_quantizer=quantized_bits(4, 0, 1),\n    name=\"dense\")(\n        x)\nx = Activation(\"softmax\", name=\"softmax\")(x)\n\nmodel = Model(inputs=[x_in], outputs=[x])\nmodel.summary()\n\nmodel.compile(\n    loss=\"categorical_crossentropy\", optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\n\nif TRAIN:\n  history = model.fit(\n      x_train, y_train, batch_size=BATCH_SIZE,\n      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,\n      validation_split=VALIDATION_SPLIT,\n      callbacks=[]) #lra])\n\n  outputs = []\n  output_names = []\n\n  for layer in model.layers:\n    if layer.__class__.__name__ in [\n        \"QActivation\", \"QBatchNormalization\", \"Activation\", \"QDense\",\n        \"QConv2D\", \"QDepthwiseConv2D\"\n    ]:\n      output_names.append(layer.name)\n      outputs.append(layer.output)\n\n  model_debug = Model(inputs=[x_in], outputs=outputs)\n\n  outputs = model_debug.predict(x_train)\n\n  print(\"{:30} {: 8.4f} {: 8.4f}\".format(\n      \"input\", np.min(x_train), np.max(x_train)))\n\n  for n, p in zip(output_names, outputs):\n    print(\"{:30} {: 8.4f} {: 8.4f}\".format(n, np.min(p), np.max(p)), end=\"\")\n    layer = model.get_layer(n)\n    for i, weights in enumerate(layer.get_weights()):\n      if layer.get_quantizers()[i]:\n        weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))\n      print(\" ({: 8.4f} {: 8.4f})\".format(np.min(weights), np.max(weights)),\n            end=\"\")\n    print(\"\")\n\n  score = model.evaluate(x_test, y_test, verbose=False)\n  print(\"Test score:\", score[0])\n  print(\"Test accuracy:\", score[1])\n\nprint_qstats(model)\n"
  },
  {
    "path": "examples/example_mnist_po2.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests qlayers model with po2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.utils import to_categorical\nimport numpy as np\n\nfrom qkeras import *   # pylint: disable=wildcard-import\n\nnp.random.seed(42)\n\nNB_EPOCH = 5\nBATCH_SIZE = 64\nVERBOSE = 1\nNB_CLASSES = 10\nOPTIMIZER = Adam(lr=0.0001, decay=0.000025)\nN_HIDDEN = 100\nVALIDATION_SPLIT = 0.1\n\nQUANTIZED = 1\nCONV2D = 1\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\nRESHAPED = 784\n\nx_train = x_train.astype(\"float32\")\nx_test = x_test.astype(\"float32\")\n\nx_train = x_train[..., np.newaxis]\nx_test = x_test[..., np.newaxis]\n\nx_train /= 256.0\nx_test /= 256.0\n\ntrain = False\n\nprint(x_train.shape[0], \"train samples\")\nprint(x_test.shape[0], \"test samples\")\n\nprint(y_train[0:10])\n\ny_train = to_categorical(y_train, NB_CLASSES)\ny_test = to_categorical(y_test, NB_CLASSES)\n\n# we ran out of memory here, so we split x_train/x_test into smaller groups\n\nx = x_in = Input(x_train.shape[1:-1] + (1,), name=\"input\")\nx = QActivation(\"quantized_relu_po2(4)\", name=\"acti\")(x)\nx = QConv2D(\n    32, (2, 2),\n    strides=(2, 2),\n    kernel_quantizer=quantized_po2(4, 1),\n    bias_quantizer=quantized_po2(4, 1),\n    name=\"conv2d_0_m\")(\n        x)\nx = QActivation(\"quantized_relu_po2(4,4)\", name=\"act0_m\")(x)\nx = QConv2D(\n    64, (3, 3),\n    strides=(2, 2),\n    kernel_quantizer=quantized_po2(4, 1),\n    bias_quantizer=quantized_po2(4, 1),\n    name=\"conv2d_1_m\")(\n        x)\nx = QActivation(\"quantized_relu_po2(4,4,use_stochastic_rounding=True)\",\n                name=\"act1_m\")(x)\nx = QConv2D(\n    64, (2, 2),\n    strides=(2, 2),\n    kernel_quantizer=quantized_po2(4, 1, use_stochastic_rounding=True),\n    bias_quantizer=quantized_po2(4, 1),\n    name=\"conv2d_2_m\")(\n        x)\nx = QActivation(\"quantized_relu(4,1)\", name=\"act2_m\")(x)\nx = Flatten()(x)\nx = QDense(\n    NB_CLASSES,\n    kernel_quantizer=quantized_bits(4, 0, 1),\n    bias_quantizer=quantized_bits(4, 0, 1),\n    name=\"dense\")(\n        x)\nx = Activation(\"softmax\", name=\"softmax\")(x)\n\nmodel = Model(inputs=[x_in], outputs=[x])\nmodel.summary()\n\nmodel.compile(\n    loss=\"categorical_crossentropy\", optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\nif train:\n  history = model.fit(\n      x_train, y_train, batch_size=BATCH_SIZE,\n      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,\n      validation_split=VALIDATION_SPLIT)\n\n  outputs = []\n  output_names = []\n\n  for layer in model.layers:\n    if layer.__class__.__name__ in [\n        \"QActivation\", \"Activation\", \"QDense\", \"QConv2D\", \"QDepthwiseConv2D\"\n    ]:\n      output_names.append(layer.name)\n      outputs.append(layer.output)\n\n  model_debug = Model(inputs=[x_in], outputs=outputs)\n\n  outputs = model_debug.predict(x_train)\n\n  print(\"{:30} {: 8.4f} {: 8.4f}\".format(\n      \"input\", np.min(x_train), np.max(x_train)))\n\n  for n, p in zip(output_names, outputs):\n    print(\"{:30} {: 8.4f} {: 8.4f}\".format(n, np.min(p), np.max(p)), end=\"\")\n    layer = model.get_layer(n)\n    for i, weights in enumerate(layer.get_weights()):\n      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))\n      print(\" ({: 8.4f} {: 8.4f})\".format(np.min(weights), np.max(weights)),\n            end=\"\")\n      print(\"\")\n\n  score = model.evaluate(x_test, y_test, verbose=VERBOSE)\n  print(\"Test score:\", score[0])\n  print(\"Test accuracy:\", score[1])\n\nmodel.summary()\n\nprint_qstats(model)\n"
  },
  {
    "path": "examples/example_mnist_prune.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Example of mnist model with pruning.\n   Adapted from TF model optimization example.\"\"\"\n\nimport tempfile\nimport numpy as np\n\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.models import save_model\nfrom tensorflow.keras.utils import to_categorical\n\nfrom qkeras import QActivation\nfrom qkeras import QDense\nfrom qkeras import QConv2D\nfrom qkeras import quantized_bits\nfrom qkeras.utils import load_qmodel\nfrom qkeras.utils import print_model_sparsity\n\nfrom tensorflow_model_optimization.python.core.sparsity.keras import prune\nfrom tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks\nfrom tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule\n\n\nbatch_size = 128\nnum_classes = 10\nepochs = 12\n\nprune_whole_model = True # Prune whole model or just specified layers\n\n\ndef build_model(input_shape):\n    x = x_in = Input(shape=input_shape, name=\"input\")\n    x = QConv2D(\n        32, (2, 2), strides=(2,2),\n        kernel_quantizer=quantized_bits(4,0,1),\n        bias_quantizer=quantized_bits(4,0,1),\n        name=\"conv2d_0_m\")(x)\n    x = QActivation(\"quantized_relu(4,0)\", name=\"act0_m\")(x)\n    x = QConv2D(\n        64, (3, 3), strides=(2,2),\n        kernel_quantizer=quantized_bits(4,0,1),\n        bias_quantizer=quantized_bits(4,0,1),\n        name=\"conv2d_1_m\")(x)\n    x = QActivation(\"quantized_relu(4,0)\", name=\"act1_m\")(x)\n    x = QConv2D(\n        64, (2, 2), strides=(2,2),\n        kernel_quantizer=quantized_bits(4,0,1),\n        bias_quantizer=quantized_bits(4,0,1),\n        name=\"conv2d_2_m\")(x)\n    x = QActivation(\"quantized_relu(4,0)\", name=\"act2_m\")(x)\n    x = Flatten()(x)\n    x = QDense(num_classes, kernel_quantizer=quantized_bits(4,0,1),\n               bias_quantizer=quantized_bits(4,0,1),\n               name=\"dense\")(x)\n    x = Activation(\"softmax\", name=\"softmax\")(x)\n\n    model = Model(inputs=[x_in], outputs=[x])\n    return model\n\n\ndef build_layerwise_model(input_shape, **pruning_params):\n    return Sequential([\n        prune.prune_low_magnitude(\n            QConv2D(\n                32, (2, 2), strides=(2,2),\n                kernel_quantizer=quantized_bits(4,0,1),\n                bias_quantizer=quantized_bits(4,0,1),\n                name=\"conv2d_0_m\"),\n            input_shape=input_shape,\n            **pruning_params),\n        QActivation(\"quantized_relu(4,0)\", name=\"act0_m\"),\n        prune.prune_low_magnitude(\n            QConv2D(\n                64, (3, 3), strides=(2,2),\n                kernel_quantizer=quantized_bits(4,0,1),\n                bias_quantizer=quantized_bits(4,0,1),\n                name=\"conv2d_1_m\"),\n            **pruning_params),\n        QActivation(\"quantized_relu(4,0)\", name=\"act1_m\"),\n        prune.prune_low_magnitude(\n            QConv2D(\n                64, (2, 2), strides=(2,2),\n                kernel_quantizer=quantized_bits(4,0,1),\n                bias_quantizer=quantized_bits(4,0,1),\n                name=\"conv2d_2_m\"),\n            **pruning_params),\n        QActivation(\"quantized_relu(4,0)\", name=\"act2_m\"),\n        Flatten(),\n        prune.prune_low_magnitude(\n            QDense(\n                num_classes, kernel_quantizer=quantized_bits(4,0,1),\n                bias_quantizer=quantized_bits(4,0,1),\n                name=\"dense\"),\n            **pruning_params),\n        Activation(\"softmax\", name=\"softmax\")\n  ])\n\n\ndef train_and_save(model, x_train, y_train, x_test, y_test):\n    model.compile(\n        loss=\"categorical_crossentropy\",\n        optimizer=\"adam\",\n        metrics=[\"accuracy\"])\n\n    # Print the model summary.\n    model.summary()\n\n    # Add a pruning step callback to peg the pruning step to the optimizer's\n    # step. Also add a callback to add pruning summaries to tensorboard\n    callbacks = [\n        pruning_callbacks.UpdatePruningStep(),\n        #pruning_callbacks.PruningSummaries(log_dir=tempfile.mkdtemp())\n        pruning_callbacks.PruningSummaries(log_dir=\"/tmp/mnist_prune\")\n    ]\n\n    model.fit(\n        x_train,\n        y_train,\n        batch_size=batch_size,\n        epochs=epochs,\n        verbose=1,\n        callbacks=callbacks,\n        validation_data=(x_test, y_test))\n    score = model.evaluate(x_test, y_test, verbose=0)\n    print(\"Test loss:\", score[0])\n    print(\"Test accuracy:\", score[1])\n\n    print_model_sparsity(model)\n\n    # Export and import the model. Check that accuracy persists.\n    _, keras_file = tempfile.mkstemp(\".h5\")\n    print(\"Saving model to: \", keras_file)\n    save_model(model, keras_file)\n    \n    print(\"Reloading model\")\n    with prune.prune_scope():\n        loaded_model = load_qmodel(keras_file)\n    score = loaded_model.evaluate(x_test, y_test, verbose=0)\n    print(\"Test loss:\", score[0])\n    print(\"Test accuracy:\", score[1])\n\n\ndef main():\n    # input image dimensions\n    img_rows, img_cols = 28, 28\n\n    # the data, shuffled and split between train and test sets\n    (x_train, y_train), (x_test, y_test) = mnist.load_data()\n\n    if K.image_data_format() == \"channels_first\":\n      x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n      x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n      input_shape = (1, img_rows, img_cols)\n    else:\n      x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)\n      x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)\n      input_shape = (img_rows, img_cols, 1)\n\n    x_train = x_train.astype(\"float32\")\n    x_test = x_test.astype(\"float32\")\n    x_train /= 255\n    x_test /= 255\n    print(\"x_train shape:\", x_train.shape)\n    print(x_train.shape[0], \"train samples\")\n    print(x_test.shape[0], \"test samples\")\n\n    # convert class vectors to binary class matrices\n    y_train = to_categorical(y_train, num_classes)\n    y_test = to_categorical(y_test, num_classes)\n\n    pruning_params = {\n        \"pruning_schedule\":\n            pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)\n    }\n    \n    if prune_whole_model:\n        model = build_model(input_shape)\n        model = prune.prune_low_magnitude(model, **pruning_params)\n    else:\n        model = build_layerwise_model(input_shape, **pruning_params)\n\n    train_and_save(model, x_train, y_train, x_test, y_test)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "examples/example_qdense.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests qdense model.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport argparse\n\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.utils import to_categorical\nimport numpy as np\n\nfrom qkeras import print_qstats\nfrom qkeras import QActivation\nfrom qkeras import QDense\nfrom qkeras import quantized_bits\nfrom qkeras import ternary\n\n\nnp.random.seed(42)\nOPTIMIZER = Adam()\nNB_EPOCH = 1\nBATCH_SIZE = 32\nVERBOSE = 1\nNB_CLASSES = 10\nN_HIDDEN = 100\nVALIDATION_SPLIT = 0.1\nRESHAPED = 784\n\n\ndef QDenseModel(weights_f, load_weights=False):\n  \"\"\"Construct QDenseModel.\"\"\"\n\n  x = x_in = Input((RESHAPED,), name=\"input\")\n  x = QActivation(\"quantized_relu(4)\", name=\"act_i\")(x)\n  x = QDense(N_HIDDEN, kernel_quantizer=ternary(),\n             bias_quantizer=quantized_bits(4, 0, 1), name=\"dense0\")(x)\n  x = QActivation(\"quantized_relu(2)\", name=\"act0\")(x)\n  x = QDense(\n      NB_CLASSES,\n      kernel_quantizer=quantized_bits(4, 0, 1),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name=\"dense2\")(\n          x)\n  x = Activation(\"softmax\", name=\"softmax\")(x)\n\n  model = Model(inputs=[x_in], outputs=[x])\n  model.summary()\n  model.compile(loss=\"categorical_crossentropy\",\n                optimizer=OPTIMIZER, metrics=[\"accuracy\"])\n\n  if load_weights and weights_f:\n    model.load_weights(weights_f)\n\n  print_qstats(model)\n  return model\n\n\ndef UseNetwork(weights_f, load_weights=False):\n  \"\"\"Use DenseModel.\n\n  Args:\n    weights_f: weight file location.\n    load_weights: load weights when it is True.\n  \"\"\"\n  model = QDenseModel(weights_f, load_weights)\n\n  batch_size = BATCH_SIZE\n  (x_train_, y_train_), (x_test_, y_test_) = mnist.load_data()\n\n  x_train_ = x_train_.reshape(60000, RESHAPED)\n  x_test_ = x_test_.reshape(10000, RESHAPED)\n  x_train_ = x_train_.astype(\"float32\")\n  x_test_ = x_test_.astype(\"float32\")\n\n  x_train_ /= 255\n  x_test_ /= 255\n\n  print(x_train_.shape[0], \"train samples\")\n  print(x_test_.shape[0], \"test samples\")\n\n  y_train_ = to_categorical(y_train_, NB_CLASSES)\n  y_test_ = to_categorical(y_test_, NB_CLASSES)\n\n  if not load_weights:\n    model.fit(\n        x_train_,\n        y_train_,\n        batch_size=batch_size,\n        epochs=NB_EPOCH,\n        verbose=VERBOSE,\n        validation_split=VALIDATION_SPLIT)\n\n    if weights_f:\n      model.save_weights(weights_f)\n\n  score = model.evaluate(x_test_, y_test_, verbose=VERBOSE)\n  print_qstats(model)\n  print(\"Test score:\", score[0])\n  print(\"Test accuracy:\", score[1])\n\n\ndef ParserArgs():\n  parser = argparse.ArgumentParser()\n  parser.add_argument(\"-l\", \"--load_weight\", default=\"0\",\n                      help=\"\"\"load weights directly from file.\n                            0 is to disable and train the network.\"\"\")\n  parser.add_argument(\"-w\", \"--weight_file\", default=None)\n  a = parser.parse_args()\n  return a\n\n\nif __name__ == \"__main__\":\n  args = ParserArgs()\n  lw = False if args.load_weight == \"0\" else True\n  UseNetwork(args.weight_file, load_weights=lw)\n"
  },
  {
    "path": "examples/example_qoctave.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"QOctave example.\"\"\"\nimport numpy as np\nimport sys\nfrom tensorflow.keras import activations\nfrom tensorflow.keras import initializers\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.utils import to_categorical\nfrom functools import partial\nfrom qkeras import *   # pylint: disable=wildcard-import\n\n\ndef create_model():\n  \"\"\"use qocatve in network.\"\"\"\n  kernel_initializer=initializers.he_normal(seed=42)\n\n  x = x_in = Input(shape=(256, 256, 3))\n\n  # Block 1\n  high, low = QOctaveConv2D(\n      32, (3, 3),\n      alpha=0.5,\n      strides=(2, 2),\n      padding='valid',\n      kernel_initializer=kernel_initializer,\n      bias_initializer=\"zeros\",\n      bias_quantizer=\"quantized_bits(4,1)\",\n      depthwise_quantizer=\"quantized_bits(4,1)\",\n      depthwise_activation=\"quantized_bits(6,2,1)\",\n      pointwise_quantizer=\"quantized_bits(4,1)\",\n      acc_quantizer=\"quantized_bits(16,7,1)\",\n      activation=\"quantized_relu(6,2)\",\n      use_separable=True,\n      name='block1_conv1')([x, None])\n\n  # Block 2\n  high, low = QOctaveConv2D(\n      64, (3, 3),\n      alpha=0.4,\n      strides=(2, 2),\n      padding='same',\n      kernel_initializer=kernel_initializer,\n      bias_initializer=\"zeros\",\n      bias_quantizer=\"quantized_bits(4,1)\",\n      depthwise_quantizer=\"quantized_bits(4,1)\",\n      depthwise_activation=\"quantized_bits(6,2,1)\",\n      pointwise_quantizer=\"quantized_bits(4,1)\",\n      acc_quantizer=\"quantized_bits(16,7,1)\",\n      activation=\"quantized_relu(6,2)\",\n      use_separable=True,\n      name='block2_conv1')([high, low])\n\n  # Block 3\n  high, low = QOctaveConv2D(\n      64, (3, 3),\n      alpha=0.4,\n      strides=(2, 2),\n      padding='same',\n      kernel_initializer=kernel_initializer,\n      bias_initializer=\"zeros\",\n      bias_quantizer=\"quantized_bits(4,1)\",\n      depthwise_quantizer=\"quantized_bits(4,1)\",\n      depthwise_activation=\"quantized_bits(6,2,1)\",\n      pointwise_quantizer=\"quantized_bits(4,1)\",\n      acc_quantizer=\"quantized_bits(16,7,1)\",\n      activation=\"quantized_relu(6,2)\",\n      use_separable=True,\n      name='block3_conv1')([high, low])\n\n  high, low = QOctaveConv2D(\n      32, (3, 3),\n      alpha=0.4,\n      strides=(1, 1),\n      padding='same',\n      kernel_initializer=kernel_initializer,\n      bias_initializer='zeros',\n      bias_quantizer=\"quantized_bits(4,1)\",\n      depthwise_quantizer=\"quantized_bits(4,1)\",\n      depthwise_activation=\"quantized_bits(6,2,1)\",\n      pointwise_quantizer=\"quantized_bits(4,1)\",\n      acc_quantizer=\"quantized_bits(16,7,1)\",\n      activation=\"quantized_relu(6,2)\",\n      use_separable=True,\n      name='block3_conv2')([high, low])\n\n  high, low = QOctaveConv2D(\n      32, (3, 3),\n      alpha=0.3,\n      strides=(1, 1),\n      padding='same',\n      kernel_initializer=kernel_initializer,\n      bias_initializer='zeros',\n      bias_quantizer=\"quantized_bits(4,1)\",\n      depthwise_quantizer=\"quantized_bits(4,1)\",\n      depthwise_activation=\"quantized_bits(6,2,1)\",\n      pointwise_quantizer=\"quantized_bits(4,1)\",\n      acc_quantizer=\"quantized_bits(16,7,1)\",\n      activation=\"quantized_relu(6,2)\",\n      use_separable=True,\n      name='block3_conv3')([high, low])\n\n  x, _ = QOctaveConv2D(\n      32, (3, 3),\n      alpha=0.0,\n      strides=(2, 2),\n      padding='same',\n      kernel_initializer=kernel_initializer,\n      bias_initializer='zeros',\n      bias_quantizer=\"quantized_bits(4,1)\",\n      depthwise_quantizer=\"quantized_bits(4,1)\",\n      depthwise_activation=\"quantized_bits(6,2,1)\",\n      pointwise_quantizer=\"quantized_bits(4,1)\",\n      acc_quantizer=\"quantized_bits(16,7,1)\",\n      activation=\"quantized_relu(6,2)\",\n      use_separable=True,\n      name='block3_conv_down')([high, low])\n\n  # Upsample\n  x = UpSampling2D(size=(2, 2), data_format=\"channels_last\")(x)\n\n  x = QConv2D(\n      2, (2, 2),\n      strides=(1, 1),\n      kernel_initializer=kernel_initializer,\n      bias_initializer=\"ones\",\n      kernel_quantizer=quantized_bits(4, 0, 1),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      padding=\"same\",\n      name=\"conv_up\")(\n          x)\n\n  x = Activation(\"softmax\", name=\"softmax\")(x)\n  output = x\n\n  model = Model(x_in, output, name='qoctave_network')\n  return model\n\n\n# Create the model\ndef customLoss(y_true,y_pred):\n  log1 = 1.5 * y_true * K.log(y_pred + 1e-9) * K.pow(1-y_pred, 2)\n  log0 = 0.5 * (1 - y_true) * K.log((1 - y_pred) + 1e-9) * K.pow(y_pred, 2)\n  return (- K.sum(K.mean(log0 + log1, axis = 0)))\n\nif __name__ == '__main__':\n  model = create_model()\n  model.compile(optimizer=\"Adam\", loss=customLoss, metrics=['acc'])\n  model.summary(line_length=100)\n  print_qstats(model)\n"
  },
  {
    "path": "examples/example_ternary.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import  # Not necessary in a Python 3-only module\nfrom __future__ import division  # Not necessary in a Python 3-only module\nfrom __future__ import print_function  # Not necessary in a Python 3-only module\n\nfrom absl import app\nfrom absl import flags\nimport matplotlib\nimport numpy as np\n\nmatplotlib.use('TkAgg')\nimport matplotlib.pyplot as plt\n\n\nFLAGS = flags.FLAGS\n\n\ndef _stochastic_rounding(x, precision, resolution, delta):\n  \"\"\"Stochastic_rounding for ternary.\n\n  Args:\n    x:\n    precision: A float. The area we want to make this stochastic rounding.\n       [delta-precision, delta] [delta, delta+precision]\n    resolution: control the quantization resolution.\n    delta: the undiscountinued point (positive number)\n\n  Return:\n    A tensor with stochastic rounding numbers.\n  \"\"\"\n  delta_left = delta - precision\n  delta_right = delta + precision\n  scale = 1 / resolution\n  scale_delta_left = delta_left * scale\n  scale_delta_right = delta_right * scale\n  scale_2_delta = scale_delta_right - scale_delta_left\n  scale_x = x * scale\n  fraction = scale_x - scale_delta_left\n  # print(precision, scale, x[0], np.floor(scale_x[0]), scale_x[0], fraction[0])\n\n  # we use uniform distribution\n  random_selector = np.random.uniform(0, 1, size=x.shape) * scale_2_delta\n\n  # print(precision, scale, x[0], delta_left[0], delta_right[0])\n  # print('x', scale_x[0], fraction[0], random_selector[0], scale_2_delta[0])\n  # rounddown = fraction < random_selector\n  result = np.where(fraction < random_selector,\n                    scale_delta_left / scale,\n                    scale_delta_right / scale)\n  return result\n\n\ndef _ternary(x, sto=False):\n  m = np.amax(np.abs(x), keepdims=True)\n  scale = 2 * m / 3.0\n  thres = scale / 2.0\n  ratio = 0.1\n\n  if sto:\n    sign_bit = np.sign(x)\n    x = np.abs(x)\n    prec = x / scale\n    x = (\n        sign_bit * scale * _stochastic_rounding(\n            x / scale,\n            precision=0.3, resolution=0.01, # those two are all normalized.\n            delta=thres / scale))\n    # prec + prec *ratio)\n    # mm = np.amax(np.abs(x), keepdims=True)\n  return np.where(np.abs(x) < thres, np.zeros_like(x), np.sign(x))\n\n\ndef main(argv):\n  if len(argv) > 1:\n    raise app.UsageError('Too many command-line arguments.')\n\n  # x = np.arange(-3.0, 3.0, 0.01)\n  # x = np.random.uniform(-0.01, 0.01, size=1000)\n  x = np.random.uniform(-10.0, 10.0, size=1000)\n  # x = np.random.uniform(-1, 1, size=1000)\n  x = np.sort(x)\n  tr = np.zeros_like(x)\n  t = np.zeros_like(x)\n  iter_count = 500\n  for _ in range(iter_count):\n    y = _ternary(x)\n    yr = _ternary(x, sto=True)\n    t = t + y\n    tr = tr + yr\n\n  plt.plot(x, t/iter_count)\n  plt.plot(x, tr/iter_count)\n  plt.ylabel('mean (%s samples)' % iter_count)\n  plt.show()\n\n\nif __name__ == '__main__':\n  app.run(main)\n"
  },
  {
    "path": "experimental/lo/__init__.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Exports logic optimization module.\"\"\"\nfrom .utils import *  # pylint: disable=wildcard-import\nfrom .receptive import model_to_receptive_field\nfrom .conv2d import optimize_conv2d_logic\nfrom .dense import optimize_dense_logic\nfrom .optimizer import run_rf_optimizer\nfrom .optimizer import run_abc_optimizer\nfrom .optimizer import mp_rf_optimizer_func\nfrom .table import load\nfrom .compress import Compressor\nfrom .generate_rf_code import *\n# __version__ = \"0.5.0\"\n"
  },
  {
    "path": "experimental/lo/compress.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements faster version of set on multiple strings.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nclass Compressor:\n  \"\"\"Implements a hierarchical set class with better performance than a set.\"\"\"\n\n  def __init__(self, hash_only_input=False):\n    self.n_dict = {}\n    self.hash_only_input = hash_only_input\n\n  def add_entry(self, table_in, table_out=\"\"):\n    \"\"\"Adds entry (table_in, table_out) to the set.\"\"\"\n    line = (table_in, table_out)\n\n    if self.hash_only_input:\n      h_line = hash(table_in)\n    else:\n      h_line = hash(line)\n\n    if self.n_dict.get(h_line, None):\n      self.n_dict[h_line] = self.n_dict[h_line].union([line])\n    else:\n      self.n_dict[h_line] = set([line])\n\n  def has_entry(self, table_in, table_out=\"\"):\n    \"\"\"Checks if table_in is already stored in the set.\"\"\"\n\n    line = (table_in, table_out)\n\n    if self.hash_only_input:\n      h_line = hash(table_in)\n    else:\n      h_line = hash(line)\n\n    if not self.n_dict.get(h_line, None):\n      return None\n\n    set_h_line = self.n_dict[h_line]\n\n    for (ti, to) in set_h_line:\n      if table_in == ti:\n        return to\n\n    return None\n\n  def __call__(self):\n    for key in self.n_dict:\n      for line in self.n_dict[key]:\n        yield line\n\n"
  },
  {
    "path": "experimental/lo/conv2d.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements convolutional (?, h, w, c) facing input layer optimization.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport multiprocessing as mp\nimport os\nimport shutil\n\nfrom .compress import Compressor\nimport numpy as np\nimport six\nfrom tensorflow.keras.models import Model\nfrom .utils import get_padding_value\n\nDEBUG = int(os.getenv(\"DEBUG\", 0))\n\nOG_IS_SYMBOLIC = 0\n\n\ndef parallel_index_table(\n    p, ni, size, idx_height, idx_width, i_dict, o_dict,\n    kernel, strides, padding, generate_pla):\n  \"\"\"Processes the table in parallel and use espresso to optimize it.\"\"\"\n\n  print(\"... indexing table from {} to {} ({} => {})\".format(\n      ni, ni+size, p[0].shape, p[1].shape))\n\n  table_ins = []\n  table_ous = []\n\n  table_set = Compressor(hash_only_input=True)\n\n  if DEBUG:\n    table_set_line = {}\n\n  for n in range(size):\n\n    # we need to traverse the outputs to compute the input coordinates\n\n    for ho in idx_height:\n      min_hi = strides[0]*ho - 2*padding[0]\n      max_hi = strides[0]*ho - 2*padding[0] + kernel[0]\n\n      if min_hi < 0 or max_hi > p[0].shape[0]:\n        continue\n\n      for wo in idx_width:\n        min_wi = strides[1]*wo - 2*padding[1]\n        max_wi = strides[1]*wo - 2*padding[1] + kernel[1]\n\n        if min_wi < 0 or max_wi > p[0].shape[1]:\n          continue\n\n        i_values = p[0][n, min_hi:max_hi, min_wi:max_wi].flatten()\n\n        # o_values has dimension (1, 1, C_O)\n\n        o_values = p[1][n, ho, wo]\n\n        # if we generate a pla entry, we care about a list of\n        # bits. Otherwise, we care about a list of floating point\n        # values.\n\n        table_i = \"\".join([i_dict[v] for v in i_values])\n        table_o = \"\".join([o_dict[v] for v in o_values])\n\n        if generate_pla:\n          table_s = \"\".join([str(v) for v in table_i])\n          bit_str = table_s\n        else:\n          table_s = \",\".join([str(v) for v in table_i])\n          table_i = table_s\n          bit_str = \"\".join(i_dict[v] for v in i_values)\n        is_table_zero = bit_str != \"0\"*len(bit_str)\n\n        if table_set.has_entry(table_s) and not is_table_zero:\n\n          # if table is already stored, we do not store it again.\n          # from time to time, we may want to check if we have found\n          # diverging output values.\n\n          if DEBUG:\n\n            (table_o_old, (old_n, old_ho, old_wo)) = table_set_line[table_s]\n\n            if table_o != table_o_old:\n              print(\n                  \"contradicting outputs n={} old_n={} out_p={} out={}\".format(\n                      (n, ho, wo), (old_n, old_ho, old_wo), table_o_old,\n                      table_o))\n              print(\" I:\", table_s)\n              print(\" I:\", i_values)\n              print(\"<<<\", table_o_old)\n              print(\">>>\", table_o)\n              return (None, None)\n\n          continue\n\n        # these are unique table entries\n\n        table_ins.append(table_i)\n        table_ous.append(table_o)\n\n        # we store this information in order to be able to debug\n        # and discard information.\n\n        table_set.add_entry(table_s)\n\n        if DEBUG:\n          table_set_line[table_s] = (table_o, (n, ho, wo))\n\n  print(\"... indexing table from {} to {} completed\".format(ni, ni+size))\n\n  return (table_ins, table_ous)\n\n\ndef parallel_compress_output_table(\n    filename, header, table_ins, table_ous, output_group, generate_pla,\n    n_bits_og, o, o_bits):\n  \"\"\"Processes in parallel compression of table and writes it to a disk.\"\"\"\n\n  f = open(filename, \"w\")\n\n  f.write(\"\".join(header))\n\n  c = Compressor()\n\n  for n in range(len(table_ins)):\n    for og in range(output_group):\n\n      if output_group > 1:\n        if generate_pla:\n          if OG_IS_SYMBOLIC:\n            og_l = [\"0\"] * n_bits_og\n            og_l[n_bits_og - 1 - og] = \"1\"\n            og_b = \"\".join(og_l)\n            table_i_suffix = \" \" + og_b\n          else:\n            og_b = bin(og)[2:]\n            table_i_suffix = \" \" + \"0\" * (n_bits_og - len(og_b)) + og_b\n        else:\n          table_i_suffix = \",\" + str(og)\n      else:\n        table_i_suffix = \"\"\n      table_i = table_ins[n] + table_i_suffix\n      table_o = table_ous[n][(o+og)*o_bits:(o+og+1)*o_bits]\n\n      if generate_pla:\n        c.add_entry(table_i + \" \" + table_o)\n      else:\n        c.add_entry(table_i + \",\" + str(table_o[0]))\n\n  for line in c():\n    f.write(\"{}\\n\".format(line[0]))\n\n  if generate_pla:\n    f.write(\".e\\n\")\n\n  f.close()\n\n  print(\"... file {} generated\".format(filename))\n\n\ndef optimize_conv2d_logic(\n    model, i_name, o_name, x_train,\n    i_dict=None, o_dict=None,\n    kernel=None, strides=None, padding=None,\n    output_group=1, samples=2000,\n    randomize=None, generate_pla=True, prefix=\"\"):\n  \"\"\"Generates table for logic synthesis for conv2d or conv2d-like shape.\n\n  Generates table in either espresso format or csv format to be optimized\n  for logic synthesis. The parameters kernel, strides and padding usually\n  do not require any values, unless we want to embed maxpooling layer or\n  multiple convolutional layers between i_name and o_name. In that case,\n  we require the user to compute the proper kernel, strides, and padding\n  that will correspond to the combined layer, as Keras and tensorflow do not\n  provide a way to compute the receptive field between two layers.\n\n  Arguments:\n    model: Keras model\n    i_name: name of convolutional layer (input to this layer must be\n      quantized).\n    o_name: name of quantized output layer.\n    x_train: training set to be used to dump table.\n    i_dict: dictionary of floating point values to encoding for inputs.\n    o_dict: dictionary of floating point values to encoding for outputs.\n    kernel: kernel size, to be specified if we want to override convolution\n      kernel.\n    strides: strides, to be specified if we want to override first convolution\n      strides.\n    padding: padding, to be specified if we want to override first convolution\n      padding.\n    output_group: by default, we compute one PE per channel output. The user\n      can override that by specifying how many output channels should be\n      bundled into the same PE.\n    samples: how many images from x_train should be sampled when generating the\n      tables.\n    randomize: if specified, it should be the number of coordinates within the\n      same image we will use to derive the convolution table.\n    generate_pla: if true, we generate table in pla format. Otherwise, we\n      generate a csv file.\n    prefix: prefix name to create directory.\n\n  Returns:\n    list of files generated.\n  \"\"\"\n\n  # if no i_dict or no o_dict, we do not know how to encode, so we generate\n  # csv file.\n\n  if not i_dict or not o_dict:\n    generate_pla = False\n\n  # extract layer from i_name and o_name\n\n  i_layer = model.get_layer(i_name)\n  o_layer = model.get_layer(o_name)\n\n  # if kernel is not specified, use the kernel size from i_layer\n\n  if not kernel:\n    kernel = i_layer.kernel_size\n\n  # if strides is not specified, use the strides from i_layer\n\n  if not strides:\n    strides = i_layer.strides\n\n  # if padding is not specified, use the padding from i_layer\n\n  if not padding:\n    padding = i_layer.padding\n\n  # for conv2d, we want a list for kernel, strides and padding\n\n  if not isinstance(kernel, list) and not isinstance(kernel, tuple):\n    kernel = [kernel, kernel]\n\n  if not isinstance(strides, list) and not isinstance(strides, tuple):\n    strides = [strides, strides]\n\n  if not isinstance(padding, list) and not isinstance(padding, tuple):\n    padding = [padding, padding]\n\n  # compute the padding value\n\n  padding[0] = get_padding_value(padding[0], kernel[0])\n  padding[1] = get_padding_value(padding[1], kernel[1])\n\n  # resample inputs\n\n  skip = min(2000, samples)\n\n  indexes = np.array(range(x_train.shape[0]))\n  np.random.shuffle(indexes)\n  x_train = x_train[indexes[:samples]]\n\n  # we want to create a smaller model that from inputs generate\n  # i_layer.output + o_layer.output tensors, so that we can predict\n  # its values.\n\n  outputs = []\n\n  x = i_layer.input\n  y = o_layer.output\n\n  if not isinstance(x, list):\n    x = [x]\n\n  outputs = x + [y]\n\n  mo = Model(inputs=model.inputs, outputs=outputs)\n  p = mo.predict(x_train)\n\n  # in csv mode, each entry has \"1\" value, for PLA,\n  # we encode the floating point into multiple bits.\n\n  if not generate_pla:\n    i_bits = 1\n    # i_dict = {v:v for v in i_dict.keys()}\n  else:\n    i_bits = len(six.next(six.itervalues(i_dict)))\n\n  if not generate_pla:\n    o_bits = 1\n    # o_dict = {v:v for v in o_dict.keys()}\n  else:\n    o_bits = len(six.next(six.itervalues(o_dict)))\n\n  # if randomize is specified, we will sample sqrt(randomize)\n  # from each image, as the conv2d performs the filter everywhere\n  # in the image. Because the same image may contain a lot of\n  # reduntant information, we may want to restrict the number of\n  # samples.\n\n  if randomize:\n    idx_height = np.random.choice(\n        p[-1].shape[1],\n        int(np.round(np.sqrt(randomize))))\n\n    idx_width = np.random.choice(\n        p[-1].shape[2],\n        int(np.round(np.sqrt(randomize))))\n  else:\n    idx_height = range(p[-1].shape[1])\n    idx_width = range(p[-1].shape[2])\n\n  # this is just to inspect that the inputs and outputs are really quantized.\n\n  print(\"inputs:\")\n  for i in range(len(x)):\n    print(i, np.min(p[i]), np.max(p[i]))\n  print(\"outputs:\")\n  print(np.min(p[-1]), np.max(p[-1]))\n\n  # i_size and o_size are the channel sizes of the inputs and outputs\n\n  o_size = y.shape[-1]\n  i_size = p[0].shape[-1]\n\n  if generate_pla:\n    suffix = \"pla\"\n  else:\n    suffix = \"csv\"\n\n  prefix = prefix + \"/\" if prefix else \"\"\n\n  # lets try to remove the directory and create a new one\n\n  try:\n    shutil.rmtree(prefix + i_layer.name + \".\" + suffix)\n  except OSError:\n    pass\n\n  try:\n    os.makedirs(prefix + i_layer.name + \".\" + suffix)\n  except OSError:\n    pass\n\n  table_ins = list()\n  table_ous = list()\n\n  print(\"...indexing inputs\")\n\n  # for each image in sampled x_train\n\n  # on Intel processors, mp.cpu_count() returns number of threads\n\n  number_of_processes = mp.cpu_count() // 2\n  pool = mp.Pool(number_of_processes)\n\n  results = []\n\n  for n in range(0, x_train.shape[0], skip):\n\n    res = pool.apply_async(\n        parallel_index_table,\n        args=((p[0][n:n+skip], p[1][n:n+skip]), n, skip, idx_height,\n              idx_width, i_dict, o_dict, kernel, strides, padding,\n              generate_pla))\n    results.append(res)\n\n  pool.close()\n  pool.join()\n\n  all_pools = [res.get(timeout=1) for res in results]\n\n  table_ins = sum([ap[0] for ap in all_pools], [])\n  table_ous = sum([ap[1] for ap in all_pools], [])\n\n  # input and output size\n\n  ni = len(table_ins[0])\n  no = len(table_ous[0])\n\n  print(\"... generating tables {} outputs, {} entries\".format(\n      o_size, len(table_ins)))\n\n  # this step should be very fast\n\n  files = []\n\n  if OG_IS_SYMBOLIC:\n    if output_group > 1:\n      n_bits_og = output_group\n    else:\n      n_bits_og = 1\n  else:\n    if output_group == 2:\n      n_bits_og = 1\n    else:\n      n_bits_og = int(np.ceil(np.log2(output_group)))\n\n  # sometimes linux get very grumpy with too many files opened.\n  # let's limit to 20.\n\n  number_of_processes = min(20, mp.cpu_count() // 2)\n  pool = mp.Pool(number_of_processes)\n\n  for o in range(0, o_size, output_group):\n\n    filename = \"{}{}.{}/{}_{}.raw.{}\".format(\n        prefix, i_name, suffix, i_name, o, suffix)\n\n    files.append(filename)\n\n    header = []\n\n    if generate_pla:\n      header.append(\".i {}\\n\".format(ni + n_bits_og))\n      header.append(\".o {}\\n\".format(no // o_size))\n      header.append(\".type fr\\n\")\n\n      if OG_IS_SYMBOLIC and output_group > 1:\n        header.append(\".mv {} {} {} {}\\n\".format(\n            3, ni, n_bits_og, no // o_size))\n\n      # let's generate some labels\n\n      header.append(\".ob \" + \" \".join([\n          \"o_\" + str(o) + \"_\" + str(o_bits - 1 - v)\n          for v in range(o_bits)]) + \"\\n\")\n\n      i_names = []\n\n      # name is i_<channel>_<kernel_row>_<kernel_col>_bit\n\n      assert ni == (i_size * kernel[0] * kernel[1] * i_bits)\n\n      for channel in range(i_size):\n        for row in range(kernel[0]):\n          for col in range(kernel[1]):\n            for bit in range(i_bits):\n              i_names.append(\"i_{}_{}_{}_{}\".format(\n                  channel, row, col, (i_bits - 1 - bit)))\n\n      # if we are grouping multiple channels, these will be the inputs\n\n      for c in range(n_bits_og):\n        i_names.append(\"og_{}\".format(n_bits_og - 1 - c))\n\n      header.append(\".ilb \" + \" \".join(i_names) + \"\\n\")\n\n    pool.apply_async(\n        parallel_compress_output_table,\n        args=((filename, header, table_ins, table_ous, output_group,\n               generate_pla, n_bits_og, o, o_bits)))\n\n  pool.close()\n  pool.join()\n\n  return files\n"
  },
  {
    "path": "experimental/lo/dense.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Implements dense (?, features) fancing input layer optimization.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport multiprocessing as mp\nimport os\nimport shutil\n\nfrom .compress import Compressor\nimport numpy as np\nimport six\nfrom tensorflow.keras.models import Model\n\nDEBUG = int(os.getenv(\"DEBUG\", 0))\n\nOG_IS_SYMBOLIC = 0\n\n\ndef parallel_index_table(\n    p, ni, size, i_dict, o_dict, generate_pla):\n  \"\"\"Processes the table in parallel and use espresso to optimize it.\"\"\"\n\n  print(\"... indexing table from {} to {} ({} => {})\".format(\n      ni, ni+size, p[0].shape, p[1].shape))\n\n  table_ins = []\n  table_ous = []\n\n  table_set = Compressor(hash_only_input=True)\n\n  if DEBUG:\n    table_set_line = {}\n\n  for n in range(size):\n\n    i_values = p[0][n].flatten()\n    o_values = p[1][n].flatten()\n\n    # if we generate a pla entry, we care about a list of\n    # bits. Otherwise, we care about a list of floating point\n    # values.\n\n    table_i = \"\".join([i_dict[v] for v in i_values])\n    table_o = \"\".join([o_dict[v] for v in o_values])\n\n    if generate_pla:\n      table_s = \"\".join([str(v) for v in table_i])\n      bit_str = table_s\n    else:\n      table_s = \",\".join([str(v) for v in table_i])\n      table_i = table_s\n      bit_str = \"\".join(str(i_dict[v]) for v in i_values)\n    is_table_zero = bit_str != \"0\"*len(bit_str)\n\n    if table_set.has_entry(table_s) and not is_table_zero:\n\n      # if table is already stored, we do not store it again.\n      # from time to time, we may want to check if we have found\n      # diverging output values.\n\n      if DEBUG:\n\n        (table_o_old, old_n) = table_set_line[table_s]\n\n        if table_o != table_o_old:\n          print(\"contradicting outputs n={} old_n={} out_p={} out={}\".format(\n              n, old_n, table_o_old, table_o))\n          print(\" I:\", table_s)\n          print(\" I:\", i_values)\n          print(\"<<<\", table_o_old)\n          print(\">>>\", table_o)\n          return (None, None)\n\n      continue\n\n    # these are unique table entries\n\n    table_ins.append(table_i)\n    table_ous.append(table_o)\n\n    # we store this information in order to be able to debug\n    # and discard information.\n\n    table_set.add_entry(table_s)\n\n    if DEBUG:\n      table_set_line[table_s] = (table_o, n)\n\n  print(\"... indexing table from {} to {} completed\".format(ni, ni+size))\n\n  return (table_ins, table_ous)\n\n\ndef parallel_compress_output_table(\n    filename, header, table_ins, table_ous, output_group, generate_pla,\n    n_bits_og, o, o_bits):\n  \"\"\"Processes in parallel compression of table and writes it to a disk.\"\"\"\n\n  f = open(filename, \"w\")\n\n  f.write(\"\".join(header))\n\n  c = Compressor()\n\n  for n in range(len(table_ins)):\n    for og in range(output_group):\n\n      if output_group > 1:\n        if generate_pla:\n          if OG_IS_SYMBOLIC:\n            og_l = [\"0\"] * n_bits_og\n            og_l[n_bits_og - 1 - og] = \"1\"\n            og_b = \"\".join(og_l)\n            table_i_suffix = \" \" + og_b\n          else:\n            og_b = bin(og)[2:]\n            table_i_suffix = \" \" + \"0\"*(n_bits_og - len(og_b)) + og_b\n        else:\n          table_i_suffix = \",\" + str(og)\n      else:\n        table_i_suffix = \"\"\n      table_i = table_ins[n] + table_i_suffix\n      table_o = table_ous[n][(o+og)*o_bits:(o+og+1)*o_bits]\n\n      if generate_pla:\n        c.add_entry(table_i + \" \" + table_o)\n      else:\n        c.add_entry(table_i + \",\" + str(table_o[0]))\n\n  for line in c():\n    f.write(\"{}\\n\".format(line[0]))\n\n  if generate_pla:\n    f.write(\".e\\n\")\n  f.close()\n\n\ndef optimize_dense_logic(\n    model, i_name, o_name, x_train, i_dict, o_dict,\n    output_group=1, samples=2000,\n    generate_pla=True, prefix=\"\"):\n\n  \"\"\"Generates table for logic synthesis for dense or flattened layer.\n\n  Generates table in either espresso format or csv format to be optimized\n  for logic synthesis.\n\n  Arguments:\n    model: Keras model\n    i_name: name of convolutional layer (input to this layer must be\n      quantized).\n    o_name: name of quantized output layer.\n    x_train: training set to be used to dump table.\n    i_dict: dictionary of floating point values to encoding for inputs.\n    o_dict: dictionary of floating point values to encoding for outputs.\n    output_group: by default, we compute one PE per channel output. The user\n      can override that by specifying how many output channels should be\n      bundled into the same PE.\n    samples: how many images from x_train should be sampled when generating the\n      tables.\n    generate_pla: if true, we generate table in pla format. Otherwise, we\n      generate a csv file.\n    prefix: prefix name to create a directory.\n  Returns:\n    list of files generated.\n  \"\"\"\n\n  i_layer = model.get_layer(i_name)\n  o_layer = model.get_layer(o_name)\n\n  # resample inputs\n\n  skip = min(2000, samples)\n\n  indexes = np.array(range(x_train.shape[0]))\n  np.random.shuffle(indexes)\n\n  x_train = x_train[indexes[:samples]]\n\n  outputs = []\n\n  x = i_layer.input\n  y = o_layer.output\n\n  if not isinstance(x, list):\n    x = [x]\n\n  outputs = x + [y]\n\n  mo = Model(inputs=model.inputs, outputs=outputs)\n  p = mo.predict(x_train)\n\n  # in csv mode, each entry has \"1\" value, for PLA,\n  # we encode the floating point into multiple bits.\n\n  if not generate_pla:\n    i_bits = 1\n    # i_dict = {v:v for v in i_dict.keys()}\n  else:\n    i_bits = len(six.next(six.itervalues(i_dict)))\n\n  if not generate_pla:\n    o_bits = 1\n    # o_dict = {v:v for v in o_dict.keys()}\n  else:\n    o_bits = len(six.next(six.itervalues(o_dict)))\n\n  print(\"inputs:\")\n  for i in range(len(x)):\n    print(i, np.min(p[i]), np.max(p[i]))\n  print(\"outputs:\")\n  print(0, np.min(p[-1]), np.max(p[-1]))\n\n  o_size = y.shape[-1]\n  i_size = p[0].shape[-1]\n\n  if generate_pla:\n    suffix = \"pla\"\n  else:\n    suffix = \"csv\"\n\n  prefix = prefix + \"/\" if prefix else \"\"\n\n  # lets try to remove the directory and create a new one\n\n  try:\n    shutil.rmtree(prefix + i_layer.name + \".\" + suffix)\n  except OSError:\n    pass\n\n  try:\n    os.makedirs(prefix + i_layer.name + \".\" + suffix)\n  except OSError:\n    pass\n\n  print(\"...indexing inputs\")\n\n  # for each image in sampled x_train\n\n  # on Intel processors, mp.cpu_count() returns number of threads\n\n  number_of_processes = mp.cpu_count() // 2\n  pool = mp.Pool(number_of_processes)\n\n  results = []\n\n  for n in range(0, x_train.shape[0], skip):\n\n    res = pool.apply_async(\n        parallel_index_table,\n        args=((p[0][n:n+skip], p[1][n:n+skip]), n, skip, i_dict, o_dict,\n              generate_pla))\n    results.append(res)\n\n  pool.close()\n  pool.join()\n\n  all_pools = [res.get(timeout=1) for res in results]\n\n  table_ins = sum([ap[0] for ap in all_pools], [])\n  table_ous = sum([ap[1] for ap in all_pools], [])\n\n  # input and output size\n\n  ni = len(table_ins[0])\n  no = len(table_ous[0])\n\n  print(\"... generating tables {} outputs, {} entries\".format(\n      o_size, len(table_ins)))\n\n  # this step should be very fast\n\n  files = []\n\n  if OG_IS_SYMBOLIC:\n    if output_group > 1:\n      n_bits_og = output_group\n    else:\n      n_bits_og = 1\n  else:\n    if output_group == 2:\n      n_bits_og = 1\n    else:\n      n_bits_og = int(np.ceil(np.log2(output_group)))\n\n  # sometimes linux get very grumpy with too many files opened.\n  # let's limit to 20.\n\n  number_of_processes = min(20, mp.cpu_count() // 2)\n  pool = mp.Pool(number_of_processes)\n\n  for o in range(0, o_size, output_group):\n\n    filename = \"{}{}.{}/{}_{}.raw.{}\".format(\n        prefix, i_name, suffix, i_name, o, suffix)\n\n    files.append(filename)\n\n    header = []\n\n    if generate_pla:\n      header.append(\".i {}\\n\".format(ni + n_bits_og))\n      header.append(\".o {}\\n\".format(no // o_size))\n      header.append(\".type fr\\n\")\n\n      if OG_IS_SYMBOLIC and output_group > 1:\n        header.append(\".mv {} {} {} {}\\n\".format(\n            3, ni, n_bits_og, no // o_size))\n\n      # let's generate some labels\n\n      header.append(\".ob \" + \" \".join([\n          \"o_\" + str(o) + \"_\" + str(o_bits - 1 - v)\n          for v in range(o_bits)]) + \"\\n\")\n\n      i_names = []\n\n      # name is i_<features>_bit\n\n      assert ni == (i_size * i_bits)\n\n      for feature in range(i_size):\n        for bit in range(i_bits):\n          i_names.append(\"i_{}_{}\".format(\n              feature, (i_bits - 1 - bit)))\n\n      # if we are grouping multiple channels, these will be the inputs\n\n      for c in range(n_bits_og):\n        i_names.append(\"og_{}\".format(n_bits_og - 1 - c))\n\n      header.append(\".ilb \" + \" \".join(i_names) + \"\\n\")\n\n    pool.apply_async(\n        parallel_compress_output_table,\n        args=((filename, header, table_ins, table_ous, output_group,\n               generate_pla, n_bits_og, o, o_bits)))\n\n  pool.close()\n  pool.join()\n\n  return files\n\n\n"
  },
  {
    "path": "experimental/lo/generate_rf_code.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Generates expressions for random trees.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\n\nimport numpy as np\n\nDEBUG = int(os.environ.get(\"DEBUG\", 0))\nPRINT_DEBUG = int(os.environ.get(\"PRINT_DEBUG\", 0))\n\n\ndef gen_random_tree_regressor(\n    tree, code, bits, o_bits, o_decimal_digits, o_is_neg, bdd, offset, is_cc=True):\n  \"\"\"Generates HLS friendly C++ code for random tree regressor.\n\n  Generates HLS friendly C++ code for Catapult.\n\n  Arguments:\n    tree: decision tree regressor from SkLearn.\n    code: list of code lines to be append to.\n    bits: list containing number of bits for each of the inputs.\n    o_bits: number of bits for output.\n    o_decimal_digits: number of decimal digits (right of the decimal point\n        of o_bits for approximation of regressor in RandomTreeRegressor.\n    o_is_neg: True or 1 if output can be negative.\n    bdd: we actually try to cache entries (i,v,n1,n0) entries so that if\n        they appear again, we reuse previously computed nodes.\n    offset: each variable created in this function call is incremented by\n        offset.\n    is_cc: if True, generates C++, else Verilog.\n\n  Returns:\n    Tuple containing last variable name and current number of variables.\n\n  \"\"\"\n\n  # extract information from tree\n\n  n_nodes = tree.node_count\n  children_left = tree.children_left\n  children_right = tree.children_right\n  feature = tree.feature\n  threshold = tree.threshold\n  values = np.copy(tree.value)\n\n  o_suffix = \"\"\n  if DEBUG:\n    o_type = \"float\"\n  elif is_cc:\n    o_type = \"ac_fixed<{},{},{}>\".format(\n        o_bits + o_decimal_digits,\n        o_bits + o_is_neg,\n        o_is_neg)\n  else:\n    o_sign = \" signed\" if o_is_neg else \"\"\n    if o_bits + o_decimal_digits > 1:\n      o_suffix = \"[{}:0]\".format(o_bits + o_decimal_digits - 1)\n    o_type = \"wire\" + o_sign + \" \" + o_suffix\n\n\n  def round_digits(x, decimal_digits):\n    \"\"\"Rounds to decimal_digits to the right of the decimal point.\"\"\"\n\n    if DEBUG:\n      return x\n    factor = (1 << decimal_digits) * 1.0\n    x = x * factor\n    return np.round(x) / factor\n\n  is_leaves = np.zeros(shape=n_nodes, dtype=bool)\n\n  stack = [(0, -1)]\n\n  while stack:\n    node_id, parent_depth = stack.pop()\n\n    if children_left[node_id] != children_right[node_id]:\n      stack.append((children_left[node_id], parent_depth+1))\n      stack.append((children_right[node_id], parent_depth+1))\n    else:\n      is_leaves[node_id] = True\n      values[node_id] = round_digits(tree.value[node_id], o_decimal_digits)\n      if (\n          values[node_id].flatten()[0] != tree.value[node_id].flatten()[0] and\n          DEBUG\n      ):\n        print(node_id, values[node_id].flatten()[0],\n              tree.value[node_id].flatten()[0])\n\n  v_name = {}\n  n_vars = offset\n\n  bdd = {}\n\n  def round_value_to_int(x):\n    v = hex(int(np.round(x * (1 << (o_decimal_digits)))))\n    if is_cc:\n      if DEBUG:\n        return str(x)\n      else:\n        return x\n      #v + \" /* {} */\".format(x)\n    else:\n      return (\n          str(o_bits + o_decimal_digits) + \"'h\" + v[2:] + \" /* {} */\".format(x)\n      )\n\n  if is_leaves[0]:\n    v_name[0] = round_value_to_int(values[0].flatten()[0])\n    code.append(\"  {} n_{} = {};\".format(o_type, n_vars, v_name[0]))\n    last_var = \"n_{}\".format(n_vars)\n    n_vars += 1\n  else:\n    for i in range(n_nodes-1, -1, -1):\n      if is_leaves[i]:\n        continue\n\n      if v_name.get(children_left[i], None) is not None:\n        n1 = v_name[children_left[i]]\n      elif is_leaves[children_left[i]]:\n        n1 = round_value_to_int(values[children_left[i]].flatten()[0])\n        v_name[children_left[i]] = n1\n      else:\n        n1 = \"n_\" + str(n_vars)\n        n_vars += 1\n        v_name[children_left[i]] = n1\n        raise ValueError((children_left[i], n1, is_leaves[children_left[i]]))\n\n      if v_name.get(children_right[i], None) is not None:\n        n0 = v_name[children_right[i]]\n      elif is_leaves[children_right[i]]:\n        n0 = round_value_to_int(values[children_right[i]].flatten()[0])\n        v_name[children_right[i]] = n0\n      else:\n        n0 = \"n_\" + str(n_vars)\n        n_vars += 1\n        v_name[children_right[i]] = n0\n        raise ValueError((children_right[i], n0, is_leaves[children_right[i]]))\n\n      if v_name.get(i, None) is not None:\n        n = v_name[i]\n        last_var = v_name[i]\n      elif bdd.get((feature[i], threshold[i], n1, n0), None) is not None:\n        n = bdd[(feature[i], threshold[i], n1, n0)]\n        v_name[i] = n\n        last_var = n\n      elif n1 == n0:\n        # store intermediate results so that we can build a dag, not a tree\n        bdd[(feature[i], threshold[i], n1, n0)] = n1\n        v_name[i] = n1\n        last_var = n1\n      else:\n        n = \"n_\" + str(n_vars)\n        n_vars += 1\n        v_name[i] = n\n        # store intermediate results so that we can build a dag, not a tree\n        bdd[(feature[i], threshold[i], n1, n0)] = n\n        t = int(threshold[i])\n        if bits[feature[i]] == 1:\n          if t == 0:\n            n1, n0 = n0, n1\n          code.append(\n              \"  {} {} = (i_{}) ? {} : {}; // x_{} {}\".format(\n                  o_type, v_name[i], feature[i], n1, n0, i,\n                  threshold[i]))\n        else:\n          code.append(\n              \"  {} {} = (i_{} <= {}) ? {} : {}; // x_{} {}\".format(\n                  o_type, v_name[i], feature[i], t, n1, n0, i,\n                  threshold[i]))\n        last_var = v_name[i]\n\n  return (last_var, n_vars)\n\n\ndef entry_to_hex(entry, max_value, size, is_cc):\n  \"\"\"Converts class instance to hexa number.\"\"\"\n\n  e_vector = [np.power(max_value+1, i) for i in range(len(entry)-1, -1, -1)]\n  entry = np.array(entry)\n  v = hex(np.sum(entry * e_vector))\n\n  if is_cc:\n    return v\n  else:\n    return str(size) + \"'h\" + v[2:] + \" /* {} */\".format(entry)\n\n\ndef gen_random_tree_classifier(\n    tree, code, bits, bdd, max_value, values_rom, offset, is_cc=True):\n  \"\"\"Generates C++ or Verilog friendly code for random tree classifier.\n\n  Generates HLS Catapult friendly code or RTL in Verilog for random tree\n  classifier from SkLearn.\n\n  Arguments:\n    tree: RandomTreeClassifier from sklearn.\n    code: list of strings containing code generated.\n    bits: list containing number of bits for each of the inputs.\n    bdd: we actually try to cache entries (i,v,n1,n0) entries so that if\n        they appear again, we reuse previously computed nodes.\n    max_value: random tree classifiers returns vector of classes with the\n        number of instances found in the terminal leaf node. This variable\n        specifies a clipping factor for each class type so that we have\n        a bounded problem to synthesize.\n    values_rom: to save space in classifier, we store class values in\n        values_rom.\n    offset: each variable created in this function call is incremented by\n        offset.\n    is_cc: if True, generates C++ code; otherwise, Verilog.\n\n  Returns:\n    Tuple containing last variable name and current number of variables.\n  \"\"\"\n\n  # extract information from tree\n\n  n_nodes = tree.node_count\n  children_left = tree.children_left\n  children_right = tree.children_right\n  feature = tree.feature\n  threshold = tree.threshold\n\n  values = {}\n\n  is_leaves = np.zeros(shape=n_nodes, dtype=bool)\n\n  stack = [(0, -1)]\n\n  rom_l = []\n\n  use_rom = max_value >= 7\n\n  n_classes = len(tree.value[0].flatten())\n\n  max_bits = int(np.ceil(np.log2(max_value + 1)))\n\n  while stack:\n    node_id, parent_depth = stack.pop()\n\n    if children_left[node_id] != children_right[node_id]:\n      stack.append((children_left[node_id], parent_depth+1))\n      stack.append((children_right[node_id], parent_depth+1))\n    else:\n      # is leaf node\n      is_leaves[node_id] = True\n      # get tree node output\n      p_input_tuple = tree.value[node_id].flatten().astype(np.int32)\n      max_input_value = np.max(p_input_tuple)\n      min_input_value = np.min(p_input_tuple)\n      # if max_value == 1, only keep top ones\n      if max_value == 1:\n        input_tuple = (p_input_tuple == max_input_value).astype(np.int32)\n        tree.value[node_id] = (tree.value[node_id] == max_input_value).astype(\n            tree.value[node_id].dtype)\n      else: # if max_value <= 3:\n        # SKLearn classifier computes probability for each entry instead of\n        # suming them all. We should do the same.\n        max_input_value = np.sum(p_input_tuple)\n        min_input_value = 0\n        # Just update tree.value to number so that we can compare accuracy of\n        # quantization later.\n        tree.value[node_id] = np.round(\n            max_value *\n            (tree.value[node_id] - min_input_value) /\n            (max_input_value - min_input_value))\n        input_tuple = tree.value[node_id].flatten()\n      input_tuple = tuple(list(input_tuple.astype(np.int32)))\n\n      # stores values in rom - we will use rom to store values if use_rom is\n      # true.\n      if values_rom.get(input_tuple, None) is None:\n        values_rom[input_tuple] = len(values_rom)\n        rom_l.append(input_tuple)\n        if DEBUG:\n          print(values_rom[input_tuple], input_tuple)\n\n      if use_rom:\n        values[node_id] = values_rom[input_tuple]\n      else:\n        values[node_id] = entry_to_hex(\n            input_tuple, max_value, max_bits * n_classes, is_cc)\n\n  # t_bits: entry type\n  # l_bits: table line type\n  if use_rom:\n    t_bits = int(np.ceil(np.log2(len(values_rom))))\n    l_bits = max_bits * n_classes\n  else:\n    t_bits = max_bits * n_classes\n\n  # we only store the index here, as we read from a rom\n  if is_cc:\n    if DEBUG:\n      t_type = \"int\"\n    else:\n      t_type = \"ac_int<{},false>\".format(t_bits)\n  else:\n    t_type = \"wire [{}:0]\".format(t_bits-1)\n\n  v_name = {}\n  n_vars = offset\n\n  bdd = {}\n\n  if is_leaves[0]:\n    v_name[0] = t_type + \"(\" + str(values[0]) + \")\"\n    code.append(\"  {} n_{} = {};\".format(\n        t_type, n_vars, values[0]))\n    last_var = \"n_{}\".format(n_vars)\n    n_vars += 1\n  else:\n    for i in range(n_nodes-1, -1, -1):\n      if is_leaves[i]:\n        continue\n\n      if v_name.get(children_left[i], None) is not None:\n        n1 = v_name[children_left[i]]\n      elif is_leaves[children_left[i]]:\n        if is_cc:\n          n1 = t_type + \"(\" + str(values[children_left[i]]) + \")\"\n        else:\n          n1 = str(values[children_left[i]])\n        v_name[children_left[i]] = n1\n      else:\n        n1 = \"n_\" + str(n_vars)\n        n_vars += 1\n        v_name[children_left[i]] = n1\n        raise ValueError((children_left[i], n1, is_leaves[children_left[i]]))\n\n      if v_name.get(children_right[i], None) is not None:\n        n0 = v_name[children_right[i]]\n      elif is_leaves[children_right[i]]:\n        if is_cc:\n          n0 = t_type + \"(\" + str(values[children_right[i]]) + \")\"\n        else:\n          n0 = str(values[children_right[i]])\n        v_name[children_right[i]] = n0\n      else:\n        n0 = \"n_\" + str(n_vars)\n        n_vars += 1\n        v_name[children_right[i]] = n0\n        raise ValueError((children_right[i], n0, is_leaves[children_right[i]]))\n\n      if v_name.get(i, None) is not None:\n        n = v_name[i]\n        last_var = v_name[i]\n      elif bdd.get((feature[i], threshold[i], n1, n0), None) is not None:\n        n = bdd[(feature[i], threshold[i], n1, n0)]\n        v_name[i] = n\n        last_var = n\n      elif n1 == n0:\n        # store intermediate results so that we can build a dag, not a tree\n        bdd[(feature[i], threshold[i], n1, n0)] = n1\n        v_name[i] = n1\n        last_var = n1\n      else:\n        n = \"n_\" + str(n_vars)\n        n_vars += 1\n        v_name[i] = n\n        # store intermediate results so that we can build a dag, not a tree\n        bdd[(feature[i], threshold[i], n1, n0)] = n\n        t = int(threshold[i])\n        if bits[feature[i]] == 1:\n          if t == 0:\n            n1, n0 = n0, n1\n          code.append(\n              \"  {} {} = (i_{}) ? {} : {}; // x_{} {}\".format(\n                  t_type, v_name[i], feature[i], n1, n0, i,\n                  threshold[i]))\n        else:\n          code.append(\n              \"  {} {} = (i_{} <= {}) ? {} : {}; // x_{} {}\".format(\n                  t_type, v_name[i], feature[i], t, n1, n0, i,\n                  threshold[i]))\n        last_var = v_name[i]\n\n  if use_rom:\n    if is_cc:\n      if DEBUG:\n        l_type = \"int\"\n      else:\n        l_type = \"ac_int<{},false>\".format(l_bits)\n\n      code.append(\"  {} {}_rom[{}]\".format(l_type, last_var, len(values_rom)) +\n                  \" {\")\n      for i in range(len(values_rom)):\n        code_s = \"    \" + entry_to_hex(rom_l[i], max_value, l_bits, is_cc)\n        if i < len(values_rom)-1:\n          code_s = code_s + \",\"\n        code.append(code_s)\n      code.append(\"  };\")\n\n    else:\n      l_type = \"wire [{}:0]\".format(l_bits - 1)\n      code.append(\"  function [{}:0] {}_rom;\".format(l_bits-1, last_var))\n      code.append(\"  input [{}:0] address;\".format(t_bits-1))\n      code.append(\"  begin\")\n      code.append(\"    case (address)\")\n      for i in range(len(values_rom)):\n        code.append(\"    {}'d{}: {}_rom = {};\".format(\n            l_bits, i, last_var, entry_to_hex(rom_l[i], max_value, l_bits, is_cc)))\n      code.append(\"    default: {}_rom = 0;\".format(last_var))\n      code.append(\"    endcase\")\n      code.append(\"  end\")\n      code.append(\"  endfunction\")\n\n    code.append(\"  {} v_{} = {}_rom[{}];\".format(\n        l_type, last_var, last_var, last_var))\n\n    last_var = \"v_\" + last_var\n\n  return last_var, n_vars\n\n\ndef gen_random_forest(\n    rf, name, bits, is_neg, o_bits, o_is_neg, is_regressor=True,\n    is_top_level=False, is_cc=True):\n  \"\"\"Generates HLS based C++ or SystemVerilog code for random forest.\"\"\"\n\n  # TODO(nunescoelho): need to take care of multiple outputs for classifier.\n  # we can get better result if we do not look at the winning classifier,\n  # but sum how many of them appear in each classifier for leaf nodes.\n\n  bdd = {}\n  values_rom = {}\n  offset = 0\n  code = []\n\n  max_value = (1 << int(os.environ.get(\"MAX_BITS\",1))) - 1\n  decimal_digits = int(os.environ.get(\"MAX_BITS\", 5))\n\n  assert max_value > 0\n\n  o_list = []\n  for i in range(len(rf.estimators_)):\n    tree = rf.estimators_[i].tree_\n    code.append(\"  //----- TREE {}\".format(i))\n    if is_regressor:\n      last_var, offset = gen_random_tree_regressor(\n          tree, code, bits, o_bits, decimal_digits, o_is_neg, bdd, offset, is_cc)\n    else:\n      values_rom = {}\n      last_var, offset = gen_random_tree_classifier(\n          tree, code, bits, bdd, max_value, values_rom, offset, is_cc)\n\n    o_list.append(last_var)\n\n  if is_cc:\n    header = [\n        \"#include <ac_int.h>\",\n        \"#include <ac_fixed.h>\",\n        \"#include <iostream>\",\n        \"using namespace std;\",\n        \"//#define _PRINT_DEBUG_\",\n        \"#define PB(n) cout << #n << \\\":\\\" << n << endl;\",\n        \"#define PS(n) \\\\\",\n        \"  cout << #n << \\\":\\\" << n.to_double() << \\\" \\\"; \\\\\",\n        \"  for(int i=n.width-1; i>=0; i--) cout << n[i]; cout << endl;\"\n    ]\n\n    if DEBUG:\n      header = header + [\n          \"static inline float round_even(float x) {\",\n          \"  int x_int = truncf(x);\",\n          \"  float x_dec = x - x_int;\",\n          \"  if ((x_dec == 0.5) && (x_int % 2 == 0)) {\",\n          \"    return truncf(x);\",\n          \"  } else {\",\n          \"    return truncf(x + 0.5);\"\n          \"  }\",\n          \"}\"\n      ]\n      if is_top_level:\n        header.append(\"#pragma hls_design top\")\n      header.append(\"void {}(int in[{}], int &out)\".format(\n          name, np.sum(bits), o_bits) + \" {\")\n    else:\n      n_bits = int(np.ceil(np.log2(len(o_list))))\n      header = header + [\n          \"static inline ac_int<{},{}> round_even(ac_fixed<{},{},{}> x)\".format(\n              o_bits, o_is_neg,\n              n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg,\n              o_is_neg\n          ) + \" {\",\n          \"  bool x_int_is_even = x[{}] == 0;\".format(decimal_digits + n_bits),\n          \"  bool x_frac_is_0_5 = x[{}] && (x.slc<{}>(0) == 0);\".format(\n              n_bits + decimal_digits-1, n_bits + decimal_digits-1),\n          \"  if (x_frac_is_0_5 && x_int_is_even) {\",\n          \"    return x.slc<{}>({});\".format(o_bits, n_bits + decimal_digits),\n          \"  } else {\",\n          \"    ac_int<{},{}> r = x.slc<{}>({}) + 1;\".format(\n              o_bits + 1, o_is_neg,\n              o_bits + 1, n_bits + decimal_digits - 1),\n          \"    return r.slc<{}>(1);\".format(o_bits + 1),\n          #\"    return (x + ac_fixed<{},{},{}>({})).slc<{}>({});\".format(\n          #    n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg,\n          #    o_is_neg, 1<<(n_bits+decimal_digits-1),\n          #    o_bits, n_bits + decimal_digits),\n          #    #o_is_neg, len(o_list)/2, o_bits, n_bits + decimal_digits),\n          \"  }\",\n          \"}\"\n      ]\n      if is_top_level:\n        header.append(\"#pragma hls_design top\")\n      header.append(\"void {}(ac_int<{},0> in, ac_int<{},{}> &out)\".format(\n          name, np.sum(bits), o_bits, o_is_neg) + \" {\")\n  else:\n    n_bits = int(np.ceil(np.log2(len(o_list))))\n    i_decl = \"  input [{}:0] in;\".format(np.sum(bits)-1)\n    o_sign = \"signed \" if o_is_neg else \"\"\n    o_decl = \"  output \" + o_sign + \"[{}:0] out;\".format(o_bits-1)\n    header = [\n        \"module \" + name + \"(in, out);\",\n        i_decl,\n        o_decl,\n        \"\",\n        \"  function {}[{}:0] round_even;\".format(o_sign, o_bits),\n        \"  input {}[{}:0] x;\".format(o_sign, n_bits + o_bits + decimal_digits - 1),\n        \"  reg x_int_is_even;\",\n        \"  reg x_frac_is_0_5;\",\n        \"  reg {}[{}:0] round_sum;\".format(o_sign, o_bits + 1),\n        \"  begin\",\n        \"    x_int_is_even = x[{}] == 0;\".format(decimal_digits + n_bits),\n        \"    x_frac_is_0_5 = x[{}] && (x[{}:0] == 0);\".format(\n            n_bits + decimal_digits-1, n_bits + decimal_digits - 2),\n        \"    if (x_frac_is_0_5 && x_int_is_even)\",\n        \"      round_even = x[{}:{}];\".format(\n            n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits),\n        \"    else\",\n        \"    begin\",\n        \"      round_sum = x[{}:{}] + 1;\".format(\n            n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits - 1),\n        \"      round_even = round_sum[{}:1];\".format(o_bits + 1),\n        \"    end\",\n        #\"      round_even = (x + {})[{}:{}];\".format(\n        #    #(1 << (n_bits + decimal_digits - 1)),\n        #    n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits),\n        \"  end\",\n        \"  endfunction\"\n    ]\n\n\n  all_bits = np.sum(bits)\n  sum_i = 0\n  for i in range(bits.shape[0]):\n    if is_cc:\n      if bits[i] > 1:\n        if DEBUG:\n          header.append(\"  int i_{} = in[{}];\".format(i, i))\n        else:\n          header.append(\"  ac_int<{},{}> i_{} = in.slc<{}>({});\".format(\n              bits[i], is_neg[i], i, bits[i], sum_i))\n      else:\n        header.append(\"  bool i_{} = in[{}];\".format(i, sum_i))\n    else:\n      if bits[i] == 1:\n        header.append(\"  wire i_{} = in[{}];\".format(i, all_bits - sum_i - 1))\n      else:\n        header.append(\"  wire i_{}[{}:0] = in[{}:{}];\".format(\n            i, bits[i], sum_i + bits[i] - 1, all_bits - sum_i - 1))\n    sum_i += bits[i]\n\n  footer = []\n\n  if is_regressor:\n    n_bits = int(np.ceil(np.log2(len(o_list))))\n    assert 1 << n_bits == len(o_list)\n\n    if is_cc:\n\n      if DEBUG:\n        tmp_type = \"float\"\n      else:\n        tmp_type = \"ac_fixed<{},{},{}>\".format(\n            n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg,\n            o_is_neg)\n      avg_o = \"  {} o_tmp = {};\".format(tmp_type, \" + \".join(o_list))\n\n      # rnd_o = \"  o_tmp += {}({});\".format(tmp_type, len(o_list)/2)\n\n      if DEBUG:\n        out = \"  out = round_even(o_tmp / {});\".format(len(o_list))\n      else:\n        out = \"  out = round_even(o_tmp);\"\n\n      footer.append(\"  #ifdef _PRINT_DEBUG_\")\n      for o_name in o_list:\n        footer.append(\"  PS({});\".format(o_name))\n      footer.append(\"  #endif\")\n      closing = \"}\"\n\n    else:\n      tmp_sign = \"signed \" if o_is_neg else \"\"\n      avg_o = \"  wire \" + tmp_sign + \"[{}:0] o_tmp = {};\".format(\n          n_bits + o_bits + decimal_digits - 1, \" + \".join(o_list))\n\n      for n in o_list:\n        footer.append(\"  // always @({}) $display(\\\"{} = %f (%b)\\\", {} / 32.0, {});\".format(n,n,n,n))\n      footer.append(\"  // always @(o_tmp) $display(\\\"o_tmp = %b\\\", o_tmp);\")\n\n      out = \"  assign out = round_even(o_tmp);\"\n\n      closing = \"endmodule\"\n\n    footer = footer + [avg_o, out, closing]\n\n  else:\n\n    assert not o_is_neg\n\n    footer = []\n\n    o_suffix = \"\"\n    if DEBUG:\n      o_type = \"int\"\n    elif is_cc:\n      o_type = \"ac_int<{},{}>\".format(o_bits, o_is_neg)\n    else:\n      o_sign = \" signed\" if o_is_neg else \"\"\n      o_suffix = \"[{}:0]\".format(o_bits)\n      o_type = \"wire\" + o_sign + \" \" + o_suffix\n\n    if is_cc:\n      n_classes = 1 << o_bits\n      max_bits = int(np.ceil(np.log2(max_value + 1)))\n      log2_o_list = int(np.ceil(np.log2(len(o_list))))\n      if DEBUG:\n        log2_o_type = \"int\"\n      else:\n        log2_o_type = \"ac_int<{},false>\".format(log2_o_list + max_bits)\n      sum_v = (\n          \"  {} sum[{}] = \".format(\n              log2_o_type, 1 << o_bits) + \"{\" +\n          \",\".join(\"0\" * (1 << o_bits)) + \"};\"\n      )\n      footer = [sum_v]\n      for o_name in o_list:\n        for i in range(n_classes):\n          if DEBUG:\n            footer.append(\"  sum[{}] += ({} >> {}) & {};\".format(\n                i, o_name, (n_classes - i) * max_bits - max_bits,\n                hex((1 << max_bits) - 1)))\n          else:\n            footer.append(\"  sum[{}] += {}.slc<{}>({});\".format(\n                i, o_name, max_bits, (n_classes - i) * max_bits - max_bits))\n        debug_print = []\n        for i in range(n_classes):\n          debug_print.append(\"{}.slc<{}>({}).to_string(AC_DEC)\".format(\n              o_name, max_bits, (n_classes - i) * max_bits - max_bits))\n        footer_s = (\n            \"  cout << \\\"{} \\\" <<\".format(o_name) +\n            \" << \\\" \\\" << \".join(debug_print) + \" << endl;\"\n        )\n        footer.append(\"  #ifdef _PRINT_DEBUG_\")\n        footer.append(footer_s)\n        footer.append(\"  #endif\")\n      footer.append(\"  {} max_tmp = sum[0];\".format(log2_o_type))\n      footer.append(\"  {} max_id = 0;\".format(o_type))\n      footer.append(\"  for(int i=1; i<{}; i++)\".format(1 << o_bits))\n      footer.append(\n        \"    if (sum[i] >= max_tmp) { max_tmp = sum[i]; max_id = i; }\")\n      out = \"  out = max_id;\"\n\n      footer.append(out)\n      footer += [\"}\"]\n    else:\n      n_classes = 1 << o_bits\n      max_bits = int(np.ceil(np.log2(max_value + 1)))\n      log2_o_list = int(np.ceil(np.log2(len(o_list))))\n      log2_o_type = \"wire [{}:0]\".format(log2_o_list + max_bits)\n      footer = []\n      for i in range(n_classes):\n        code_s = \"  {} sum_{} = \".format(log2_o_type, i)\n        code_term = []\n        for o_name in o_list:\n          code_term.append(\"{}[{}:{}]\".format(\n              o_name, (n_classes - i) * max_bits, (n_classes - i) * max_bits - max_bits))\n        code_s += \" + \".join(code_term) + \";\"\n        footer.append(code_s)\n        footer.append(\"  // always @(sum_{}) $display(\\\"sum_{} = %d\\\", sum_{});\".format(\n            i, i, i))\n      footer.append(\"  reg [{}:0] max_tmp;\".format(\n          log2_o_list + max_bits - 1))\n      footer.append(\"  reg [{}:0] max_id;\".format(o_bits-1))\n      footer.append(\"  integer i;\")\n      footer.append(\"  always @(\" +\n                    \" or \".join(\n                        [\"sum_\" + str(i) for i in range(n_classes)]) + \")\")\n      footer.append(\"  begin\")\n      footer.append(\"    max_tmp = sum_0; max_id = 0;\")\n      for i in range(1, n_classes):\n        footer.append(\n            \"    if (sum_{} >= max_tmp) begin max_tmp = sum_{}; max_id = {}; end\".format(\n                i, i, i))\n      footer.append(\"  end\")\n      footer.append(\"  assign out = max_id;\")\n      footer.append(\"endmodule\")\n\n  return header + code + footer\n\n\ndef gen_testbench_sv(rf, name, bits, is_neg, o_bits, o_is_neg, x, y, p, code):\n  code.append(\"module tb;\")\n  x_0, x_1 = x.shape\n  x_0_log2 = int(np.ceil(np.log2(x_0)))\n  code.append(\"reg [{}:0] x_rom[{}:0];\".format(x_1-1, x_0-1))\n  code.append(\"initial $readmemb(\\\"x.rom\\\", x_rom, 0, {});\".format(x_0-1))\n  with open(\"x.rom\", \"w\") as f:\n    for i in range(len(x)):\n      f.write(\"\".join([str(int(v)) for v in x[i]]) + \"\\n\")\n\n  o_sign = \"signed \" if o_is_neg else \"\"\n  o_type = o_sign + \"[{}:0]\".format(o_bits - 1)\n  code.append(\"reg {} y_rom[{}:0];\".format(o_type,x_0-1))\n  code.append(\"reg {} p_rom[{}:0];\".format(o_type,x_0-1))\n  with open(\"y.rom\",\"w\") as f:\n    for i in range(len(y)):\n      f.write(hex(int(y[i]))+ \"\\n\")\n  with open(\"p.rom\",\"w\") as f:\n    for i in range(len(y)):\n      f.write(hex(int(p[i]))+ \"\\n\")\n  code.append(\"initial $readmemh(\\\"y.rom\\\", y_rom, 0, {});\".format(x_0-1))\n  code.append(\"initial $readmemh(\\\"p.rom\\\", p_rom, 0, {});\".format(x_0-1))\n  code.append(\"integer i;\")\n  code.append(\"integer cnt;\")\n  code.append(\"reg [{}:0] in;\".format(x_1-1))\n  code.append(\"wire {} out;\".format(o_type))\n  code.append(\"{} {}(in, out);\".format(name, name))\n  code.append(\"initial\")\n  code.append(\"begin\")\n  code.append(\"  cnt = 0;\")\n  code.append(\"  in = x_rom[i];\")\n  code.append(\"  for (i=0; i<{}; i=i+1)\".format(x_0))\n  code.append(\"  begin\")\n  code.append(\"    in = x_rom[i];\")\n  code.append(\"    #1000;\")\n  code.append(\"    if (p_rom[i] != out && y_rom[i] != out)\")\n  code.append(\"    begin\")\n  code.append(\"      $display(\\\"%d: %b y=%d p=%d -> %d\\\", i, x_rom[i], y_rom[i], p_rom[i], out);\")\n  code.append(\"    end\")\n  code.append(\"    else\")\n  code.append(\"    begin\")\n  code.append(\"      cnt = cnt + 1;\")\n  code.append(\"    end\")\n  code.append(\"  end\")\n  code.append(\"  $display(\\\"acc = %f\\\", 100.0 * cnt / {});\".format(x_0))\n  code.append(\"end\")\n  code.append(\"endmodule\")\n\n\ndef gen_testbench_cc(rf, name, bits, is_neg, o_bits, o_is_neg, x, y, p, code):\n  code.append(\"int x[{}][{}] = \".format(*x.shape) + \"{\")\n  for i in range(len(x)):\n    code_s = \"  {\" + \",\".join([str(int(v)) for v in x[i]]) + \"}\"\n    if i < len(x) - 1:\n      code_s = code_s + \",\"\n    code.append(code_s)\n  code.append(\"};\")\n  code_s = (\n      \"int y[{}] = \".format(y.shape[0]) + \"{\" +\n      \",\".join([str(int(v)) for v in y]) + \"};\"\n  )\n  code.append(code_s)\n  code_s = (\n      \"int p[{}] = \".format(p.shape[0]) + \"{\" +\n      \",\".join([str(int(v)) for v in p]) + \"};\"\n  )\n  code.append(code_s)\n\n  code.append(\"int main()\")\n  code.append(\"{\")\n  code.append(\"  double acc = 0.0;\")\n  if DEBUG:\n    code.append(\"  int in[{}];\".format(x.shape[1]))\n    code.append(\"  int out;\")\n  else:\n    code.append(\"  ac_int<{},0> in;\".format(x.shape[1]))\n    code.append(\"  ac_int<{},{}> out;\".format(o_bits, o_is_neg))\n\n  code.append(\"  for (int i=0; i<{}; i++)\".format(x.shape[0]) + \"{\")\n  code.append(\"    for (int j=0; j<{}; j++) in[j] = x[i][j];\".format(\n      x.shape[1]))\n  code.append(\"    {}(in, out);\".format(name))\n  code.append(\"    if (p[i] != out && y[i] != out) {\")\n  code.append(\"      cout << i << \\\": \\\";\")\n  code.append(\"      for (int j=0; j<{}; j++) cout << in[j];\".format(\n      x.shape[1]))\n  if DEBUG:\n    code.append(\"      cout << \\\" y=\\\" << y[i] << \\\" p=\\\" << p[i] << \\\" \\\" << out << endl;\")\n    code.append(\"    }\")\n    code.append(\"    acc += (y[i] == out);\")\n  else:\n    code.append(\"      cout << \\\" y=\\\" << y[i] << \\\" p=\\\" << p[i] << \\\" \\\" << out.to_int() << endl;\")\n    code.append(\"      #ifdef _PRINT_DEBUG_\")\n    code.append(\"        exit(1);\")\n    code.append(\"      #endif\")\n    code.append(\"    }\")\n    code.append(\"    acc += (y[i] == out.to_int());\")\n  code.append(\"  }\")\n  code.append(\"  cout << \\\"acc = \\\" << 100.0 * acc  / {} << endl;\".format(\n      x.shape[0]))\n  code.append(\"}\")\n\n"
  },
  {
    "path": "experimental/lo/optimizer.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements random forest or logic otimizer function.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport multiprocessing as mp\nimport os\nimport pickle\nimport random\nimport shutil\nimport subprocess\nimport sys\nimport time\nimport warnings\n\nimport numpy as np\nimport six\n\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.ensemble import RandomForestRegressor\n\nfrom .compress import Compressor\nfrom .generate_rf_code import gen_random_forest\nfrom .table import load\n\n\ndef file_compress(fin, fout):\n  \"\"\"Compresses table using hash set.\"\"\"\n  c = Compressor()\n  n_lines = 0\n  for line in open(fin):\n    n_lines += 1\n    line = line.strip()\n    c.add_entry(line)\n\n  f = open(fout, \"w\")\n  n_compressed = 0\n  for line in c():\n    n_compressed += 1\n    f.write(line + \"\\n\")\n  f.close()\n  print(\"... random forrest for {} reduced from {} to {} entries\".format(\n      os.path.basename(fin), n_lines, n_compressed))\n\n\ndef mp_rf_optimizer_func(fn_tuple):\n  \"\"\"Executes in parallel creation of random forrest creation.\"\"\"\n\n  fn, flags, file_suffix = fn_tuple\n\n  n_trees = flags[\"n_trees\"]\n  is_regressor = flags[\"is_regressor\"]\n  sample_size = flags[\"sample_size\"]\n  n_features = flags[\"n_features\"]\n  max_depth = flags[\"max_depth\"]\n\n  if not file_suffix:\n    file_suffix = \"none\"\n\n  path_split = fn.split(\"/\")\n  path = \"/\".join(path_split[:-1]) + \"/\"\n  fn_split = path_split[-1].split(\".\")\n  # o_file = path + \".\".join(fn_split[0:-2] + [fn_split[-1]])\n  cv_file = path + \".\".join(fn_split[0:-2] + [file_suffix])\n  rfb_file = path + \".\".join(fn_split[0:-2] + [\"rb\", \"bin\"])\n\n  # let's compress the table first to make the job easier for random forest.\n  # compression can usually achieve a ratio of 50x or more.\n\n  # compress(fn, o_file)\n  train = load(fn)\n\n  n_features = \"auto\" if not n_features else float(n_features)\n\n  # min_size = 1\n\n  if max_depth:\n    max_depth = int(max_depth)\n\n  print(\"... creating random forrest for \" + os.path.basename(fn) + \" with \" +\n        str(sample_size) + \" samples\")\n\n  if is_regressor:\n    rf = RandomForestRegressor(\n        n_estimators=n_trees,\n        max_depth=max_depth,\n        # min_samples_split=2,\n        # min_samples_leaf=min_size,\n        max_features=n_features,\n        # max_leaf_nodes=100,\n        # oob_score=True,\n        # warm_start=True,\n        bootstrap=True,\n        random_state=42,\n        n_jobs=1)\n  else:\n    rf = RandomForestClassifier(\n        n_estimators=n_trees,\n        max_depth=max_depth,\n        # min_samples_split=2,\n        # min_samples_leaf=min_size,\n        max_features=n_features,\n        # max_leaf_nodes=100,\n        # oob_score=True,\n        # warm_start=True,\n        bootstrap=True,\n        random_state=42,\n        n_jobs=1)\n\n  if sample_size and train.shape[0] >= 10000:\n    sample_size = int(sample_size)\n    np.random.seed(42)\n    idx = np.random.choice(train.shape[0], train.shape[0], replace=False)\n\n    x = train[idx[sample_size:], 0:-1]\n    y = train[idx[sample_size:], -1]\n\n    x_test = train[idx[0:sample_size], 0:-1]\n    y_test = train[idx[0:sample_size], -1]\n  else:\n    x = train[:, 0:-1]\n    y = train[:, -1]\n\n    x_test = x\n    y_test = y\n\n  estimators = []\n  with warnings.catch_warnings():\n    warnings.simplefilter(\"ignore\")\n    rf.fit(x, y)\n\n  func_name = fn_split[0]\n\n  bits = np.ceil(\n      np.log2(\n          np.abs(\n              np.amax(x, axis=0) -\n              np.amin(x, axis=0) + 1))).astype(np.int32)\n  is_neg = (np.amin(x, axis=0) < 0).astype(np.int8)\n\n  o_bits = np.ceil(\n      np.log2(\n          np.abs(\n              np.amax(y, axis=0) -\n              np.amin(y, axis=0) + 1))).astype(np.int32)\n  o_is_neg = (np.amin(y, axis=0) < 0).astype(np.int8)\n\n  rf.bits = bits\n  rf.is_neg = is_neg\n  rf.o_bits = o_bits\n  rf.o_is_neg = o_is_neg\n\n  code = gen_random_forest(\n      rf, func_name, bits, is_neg, o_bits, o_is_neg,\n      is_regressor=is_regressor, is_top_level=False,\n      is_cc=file_suffix == \"cc\")\n\n  open(cv_file, \"w\").write(\"\\n\".join(code))\n\n  p = 1.0 * np.round(rf.predict(x_test))\n\n  dy = np.max(train[:, -1]) - np.min(train[:, -1])\n\n  error = np.sum(np.abs(y_test - p)) / (1.0 * p.shape[0] * dy)\n  score = np.sum(y_test == p) / p.shape[0]\n\n  print(\"y:\", np.max(y_test), y_test[0:30].astype(np.int32))\n  print(\"p:\", np.max(p), p[0:30].astype(np.int32))\n\n  print(\"... model {} with score of {:.2f}% and error of {:.2f}%\".format(\n      func_name, 100.0*score, 100.0*error))\n\n  print(\"... saving model in {}\".format(rfb_file))\n  pickle.dump(rf, open(rfb_file, \"wb\"))\n  return rfb_file\n\n\ndef mp_abc_optimizer_func(fn):\n  \"\"\"Performs espresso and abc optimization on a single espresso input.\"\"\"\n\n  fn_split = fn.split(\".\")\n  o_file = \".\".join(fn_split[0:-2] + [fn_split[-1]])\n  v_file = \".\".join(fn_split[0:-2] + [\"v\"])\n  b_file = \".\".join(fn_split[0:-2] + [\"blif\"])\n\n  print(\"...running espresso in \" + fn)\n\n  espresso_flags = os.environ.get(\"ESPRESSO_FLAGS\", \"-Dexpand\")\n\n  cmd = \"espresso {} {} > {}\".format(fn, espresso_flags, o_file)\n\n  output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)\n\n  output = output.strip()\n  if output:\n    print(output)\n    sys.stdout.flush()\n\n  # check if network is empty\n\n  for line in open(o_file):\n    line = line.strip()\n    if line[0:2] == \".p\":\n      terms = int(line[2:])\n      # empty : espresso optimized away all the logic\n      if terms == 0:\n        shutil.copyfile(fn, o_file)\n      break\n\n  print(\"...running abc in \" + o_file)\n\n  abc_flags = os.environ.get(\"ABC_FLAGS\", \"\")\n\n  abc_flags_list = abc_flags.split(\";\") if abc_flags else []\n\n  abc_cmds_list = (\n      [\"read_pla \" + o_file] + abc_flags_list +\n      [\"strash\",\n       \"dc2\",\n       \"strash\",\n       \"if -K 3\",\n       \"write_verilog \" + v_file,\n       \"write_blif \" + b_file\n       ])\n\n  abc_cmds = \";\".join(abc_cmds_list)\n\n  cmd = \"abc -c '\" + abc_cmds + \"'\"\n\n  output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)\n\n  output = output.strip()\n  if output:\n    print(output)\n    sys.stdout.flush()\n\n  print(\"...generated \" + v_file)\n\n\ndef run_abc_optimizer(files):\n  \"\"\"Implements logic optimizer using espresso/abc.\"\"\"\n\n  # intel processors sometimes return number of threads, not processors\n\n  cpus = mp.cpu_count() // 2\n\n  start_time = time.time()\n  pool = mp.Pool(cpus)\n  pool.map(mp_abc_optimizer_func, files)\n  pool.close()\n  print(\"Optimizer ran in {} seconds.\".format(time.time() - start_time))\n\n\ndef run_rf_optimizer(files, flags, file_suffix=\"cc\"):\n  \"\"\"Implements random forest main optimizer.\"\"\"\n\n  # intel processors sometimes return number of threads, not processors\n\n  cpus = mp.cpu_count() // 2\n\n  start_time = time.time()\n  pool = mp.Pool(cpus)\n  pool.map(mp_rf_optimizer_func, zip(\n      files, [flags]*len(files), [file_suffix]*len(files)))\n  pool.close()\n  print(\"Optimizer ran in {} seconds.\".format(time.time() - start_time))\n\n  # generates header file\n\n  # .../.../.../conv2d_0_m.csv/conv2d_0_m_0.csv\n  #\n  # returns conv2d_0_m for module_name\n\n  module_name = files[0].split(\"/\")[-2].split(\".\")[0]\n\n  path_split = files[0].split(\"/\")\n  path = \"/\".join(path_split[:-1]) + \"/\"\n  fn_split = path_split[-1].split(\".\")\n  rfb_file = path + \".\".join(fn_split[0:-2] + [\"rb\", \"bin\"])\n\n  rf = pickle.load(open(rfb_file, \"rb\"))\n\n  f = open(path + module_name + \".\" + file_suffix, \"w\")\n\n  if file_suffix == \"cc\":\n    f.write(\"#include <ac_int.h>\\n\\n\")\n\n  modules = []\n\n  for fn in files:\n    path_split = fn.split(\"/\")\n    path = \"/\".join(path_split[:-1]) + \"/\"\n    fn_split = path_split[-1].split(\".\")\n    v_file = \".\".join(fn_split[0:-2] + [file_suffix])\n\n    func_name = fn_split[0]\n\n    if file_suffix == \"v\":\n      f.write(\"'include \\\"\" + v_file + \"\\\"\\n\")\n    else:\n      f.write(\"#include \\\"\" + v_file + \"\\\"\\n\")\n\n    modules.append(func_name)\n\n  f.write(\"\\n\\n\")\n\n  if file_suffix == \"v\":\n    f.write(\"module \" + module_name + \"(\")\n    f.write(\"input [\" + str(np.sum(rf.bits)-1) + \":0] in, \")\n    o_sign = \" signed \" if rf.o_is_neg else \"\"\n    f.write(\"output \" + o_sign + \"[\" + str(len(modules)*rf.o_bits-1) +\n            \":0] out);\\n\")\n  else:\n    f.write(\"void \" + module_name + \"(\")\n    f.write(\"ac_int<\" + str(np.sum(rf.bits)) + \",false> in, \")\n    f.write(\"ac_int<\" + str(len(modules)*rf.o_bits) + \",\" +\n            (\"true\" if rf.o_is_neg else \"false\") +\n            \"> &out)\\n\")\n    f.write(\"{\\n\")\n\n  for o in range(len(modules)):\n    if file_suffix == \"v\":\n      f.write(\"  wire \" + (\"signed \" if rf.o_is_neg else \"\") +\n              \"[\" + str(rf.bits[-1]-1) + \":0] \"\n              \"o_\" + str(o) + \";\\n\")\n      f.write(\"  \" + modules[o] + \"(in, o_\" + str(o) + \");\\n\")\n      f.write(\"  assign out[\" + str(rf.o_bits*(o+1)-1) + \":\" +\n              str(rf.bits[-1]*o) + \"] = o_\" + str(o) + \";\\n\")\n    else:\n      f.write(\"  ac_int<\" + str(rf.o_bits) + \",\" +\n              (\"true\" if rf.o_is_neg else \"false\") +\n              \"> o_\" + str(o) + \"; \" + modules[o] +\n              \"(in, o_\" + str(o) + \"); out.set_slc<\" +\n              str(rf.o_bits) + \">(\" +\n              str(rf.o_bits*o) + \",\" +\n              \"o_\" + str(o) + \");\\n\")\n\n  if file_suffix == \"cc\":\n    f.write(\"}\")\n\n  f.close()\n"
  },
  {
    "path": "experimental/lo/random_forest/__init__.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom .utils import load\nfrom .utils import load_csv\nfrom .utils import load_pla\n# from .random_forest import RandomForest\n# from .random_tree import RandomTree\n"
  },
  {
    "path": "experimental/lo/random_forest/gen_random_tree.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Generates expressions for random trees.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.tree import DecisionTreeRegressor\n\ndef gen_random_tree_cc(tree):\n  n_nodes = tree.node_count\n  children_left = tree.children_left\n  children_right = tree.children_right\n  feature = tree.feature\n  threshold = tree.threshold\n\n  node_depth = np.zeros(shape=n_nodes, dtype=np.int64)\n  is_leaves = np.zeros(shape=n_nodes, dtype=bool)\n\n  stack = [(0, -1)]\n\n  while (len(stack) > 0):\n    node_id, parent_depth = stack.pop()\n    node_depth[node_id] = parent_depth + 1\n\n    if children_left[node_id] != children_right[node_id]:\n      stack.append((chidren_left[node_id], parent_depth+1))\n      stack.append((children_right[node_id], parent_depth+1))\n    else:\n      is_leaves[node_id] = True\n\n  for i in range(n_nodes):\n    if is_leaves[i]:\n      print(\"{}n_{} leaf node.\".format(\"  \"*node_depth[i], i))\n    else:\n      print(\"{}n_{} (i_{} <= {}) ? n_{} : n_{}\".format(\n          \"  \"*node_depth[i], i, feature[i], threshold[i],\n          children_left[i], children_right[i]))\n"
  },
  {
    "path": "experimental/lo/random_forest/parser.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Parses PLA format usig ply.\"\"\"\nfrom ply import yacc\nfrom ply import lex\nimport numpy as np\n\n_1 = 1\n_0 = 2\n_X = 3\n_U = 0\n\nNOT = {_0: _1, _1: _0, _X: _U, _U: _U}\n\nclass PLA:\n  def __init__(self):\n    self.pla_i = []\n    self.pla_o = []\n\npla = PLA()\n\ntokens = [\n  \"I\",\n  \"O\",\n  \"MV\",\n  \"ILB\",\n  \"OB\",\n  \"P\",\n  \"L\",\n  \"E\",\n  \"TYPE\",\n  \"SYMBOL\",\n  \"NUMBER\",\n  \"NEWLINE\"\n]\n\nt_ignore = \" \\t|\"\nt_I = r\"\\.[iI]\"\nt_O = r\"\\.[oO]\"\nt_MV = r\"\\.[mM][vV]\"\nt_ILB = r\"\\.[iI][lL][bB]\"\nt_OB = r\"\\.[oO][bB]\"\nt_P = r\"\\.[pP]\"\nt_L = r\"\\.[lL]\"\nt_E = r\"\\.[eE]\"\nt_TYPE = r\"\\.type\"\nt_SYMBOL = r\"[a-zA-Z_][a-zA-Z0-9_\\<\\>\\-\\$]*\"\n\ndef t_NUMBER(t):\n  r\"[\\d\\-]+\"\n  return t\n\ndef t_NEWLINE(t):\n  r\"\\n+\"\n  t.lexer.lineno += t.value.count(\"\\n\")\n  return t\n\ndef t_error(t):\n  print(\"Illegal character '{}'\".format(t.value))\n  t.lexer.skip(1)\n\nlex.lex()\n\ndef p_pla(p):\n  \"\"\"pla : pla_declarations pla_table pla_end\"\"\"\n\ndef p_pla_declarations(p):\n  \"\"\"pla_declarations : pla_declarations pla_declaration\n                      | pla_declaration\"\"\"\n\ndef p_pla_declaration(p):\n  \"\"\"pla_declaration : I NUMBER NEWLINE\n                     | O NUMBER NEWLINE\n                     | P NUMBER NEWLINE\n                     | MV number_list NEWLINE\n                     | ILB symbol_list NEWLINE\n                     | OB symbol_list NEWLINE\n                     | L NUMBER symbol_list NEWLINE\n                     | TYPE SYMBOL NEWLINE\n  \"\"\"\n  token = p[1].lower()\n  if token == \".i\":\n    pla.ni = int(p[2])\n  elif token == \".o\":\n    pla.no = int(p[2])\n  elif token == \".mv\":\n    pla.mv = [int(v) for v in p[2]]\n  elif token == \".ilb\":\n    pla.ilb = p[2]\n  elif token == \".ob\":\n    pla.ob = p[2]\n  elif token == \".l\":\n    pla.label = p[2]\n  elif token == \".type\":\n    pla.set_type = p[2]\n\n\ndef p_pla_table(p):\n  \"\"\"pla_table : pla_table number_symbol_list NEWLINE\n               | number_symbol_list NEWLINE\"\"\"\n  if len(p[1:]) == 3:\n    line = \"\".join(p[2])\n  else:\n    line = \"\".join(p[1])\n\n  assert hasattr(pla, \"ni\") and hasattr(pla, \"no\")\n\n  # right now we only process binary functions\n\n  line = [_1 if v == \"1\" else _0 if v == \"0\" else _X for v in line]\n\n  pla.pla_i.append(line[0:pla.ni])\n  pla.pla_o.append(line[pla.ni:])\n\n\ndef p_pla_end(p):\n  \"\"\"pla_end : E opt_new_line\"\"\"\n  pass\n\n\ndef p_opt_new_line(p):\n  \"\"\"opt_new_line : NEWLINE\n                  |\n  \"\"\"\n  pass\n\n\ndef p_number_list(p):\n  \"\"\"number_list : number_list NUMBER\n                 | NUMBER\n  \"\"\"\n  if len(p[1:]) == 2:\n    p[0] = p[1] + [p[2]]\n  else:\n    p[0] = [p[1]]\n\n\ndef p_symbol_list(p):\n  \"\"\"symbol_list : symbol_list SYMBOL\n                 | SYMBOL\n  \"\"\"\n  if len(p[1:]) == 2:\n    p[0] = p[1] + [p[2]]\n  else:\n    p[0] = [p[1]]\n\n\ndef p_number_symbol_list(p):\n  \"\"\"number_symbol_list : number_symbol_list number_or_symbol\n                        | number_or_symbol\n  \"\"\"\n  if len(p[1:]) == 2:\n    p[0] = p[1] + [p[2]]\n  else:\n    p[0] = [p[1]]\n\n\ndef p_number_or_symbol(p):\n  \"\"\"number_or_symbol : NUMBER\n                      | SYMBOL\n  \"\"\"\n  p[0] = p[1]\n\n\ndef p_error(p):\n  print(\"Error text at {}\".format(p)) #p.value))\n\nyacc.yacc()\n\ndef get_tokens(fn):\n  lex.input(\"\".join(open(fn).readlines()))\n  return lex.token\n\ndef parse(fn):\n  yacc.parse(\"\".join(open(fn).readlines()))\n\n  pla.pla_i = np.array(pla.pla_i)\n  pla.pla_o = np.array(pla.pla_o)\n\n  return pla\n"
  },
  {
    "path": "experimental/lo/random_forest/random_forest.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Creates a random forest to generate hardware for it.\"\"\"\n\nimport numpy as np\nimport pickle\nimport os\n\nfrom .random_tree import RandomTree\n\ndef fit_parallel(max_depth, min_size, sample, mask_stuck_at_values):\n\n  tree = RandomTree(max_depth, min_size)\n  tree.fit(sample, mask_stuck_at_values)\n\n  return tree\n\n\nclass RandomForest:\n  def __init__(\n      self, max_depth, min_size, n_trees, use_mean=False,\n      sample_size=None):\n    self.max_depth = max_depth\n    self.min_size = min_size\n    self.use_mean = use_mean\n    self.sample_size = sample_size\n    self.n_trees = n_trees\n    self.inputs = None\n    self.bits = None\n    self.is_neg = None\n\n    self.trees = None\n\n  @staticmethod\n  def save(model, filename):\n    \"\"\"Saves model to disk.\"\"\"\n    print(\"... saving model in {}\".format(filename))\n    f = open(filename, \"wb\")\n    pickle.dump(model, f)\n    f.close()\n\n\n  @staticmethod\n  def load(filename):\n    \"\"\"Loads model from disk.\"\"\"\n    print(\"... loading model from {}\".format(filename))\n    f = open(filename, \"rb\")\n    random_forest = pickle.load(f)\n    f.close()\n\n    return random_forest\n\n\n  def subsample(self, dataset):\n    \"\"\"Subsamples dataset if we do not want to use entire dataset.\"\"\"\n    sample_idx = np.random.choice(\n        dataset.shape[0], self.sample_size, replace=True)\n    sample = dataset[sample_idx,...]\n    return sample\n\n\n  def fit(self, dataset, verbose=False):\n    \"\"\"Fits random tree to model.\"\"\"\n    self.inputs = dataset.shape[1]-1\n    self.bits = np.ceil(\n        np.log2(\n            np.abs(\n                np.amax(dataset, axis=0) -\n                np.amin(dataset, axis=0)))).astype(np.int32)\n    self.is_neg = (np.amin(dataset, axis=0) < 0).astype(np.int8)\n\n    self.trees = []\n\n    for i in range(self.n_trees):\n      if verbose:\n        print(\"... creating tree {}\".format(i))\n\n      # as subsample is an expensive operation, we will only perform it if it\n      # reduces the dataset substantially\n\n      if self.sample_size and self.sample_size < 0.3 * dataset.shape[0]:\n        if verbose:\n          print(\"... generated subsample of size {}\".format(self.sample_size))\n        sample = self.subsample(dataset)\n      else:\n        sample = dataset\n\n      self.trees.append(fit_parallel(\n          self.max_depth, self.min_size, sample, True))\n\n\n  def predict_row(self, row):\n    \"\"\"Predicts output for single row.\"\"\"\n    result = [tree.predict_row(row) for tree in self.trees]\n    if self.use_mean:\n      return int(np.round(np.mean(result)))\n    else:\n      return max(set(result), key=result.count)\n\n\n  def predict(self, data):\n    \"\"\"Predicts class based on data.\"\"\"\n\n    assert self.trees is not None\n\n    return np.array([self.predict_row(data[i]) for i in range(data.shape[0])])\n\n\n  def gen_code(self, filename, func_name):\n    \"\"\"Generates code for model.\"\"\"\n\n    assert self.bits is not None\n\n    vd_list = []\n    n_vars = 0\n    for tree in self.trees:\n      vd_list.append(tree.gen_code(n_vars))\n      n_vars += len(vd_list[-1])\n\n    # checks the type by the suffix\n\n    is_v = filename.split(\".\")[-1] == \"v\"\n\n    assert self.inputs\n\n    f = open(filename, \"w\")\n\n    i_bits = np.sum(self.bits[:-1])\n    o_bits = self.bits[-1]\n    o_sign = self.is_neg[-1]\n\n    if is_v:\n      f.write(\"module {}(input [{}:0] i, output [{}:0] o);\\n\".format(\n          func_name, i_bits-1, o_bits-1))\n    else:\n      f.write(\"#include<ac_int.h>\\n\\n\")\n      f.write(\"void {}(ac_int<{},false> i, ac_int<{},{}> &o)\\n\".format(\n          func_name, i_bits, o_bits, o_sign))\n      f.write(\"{\\n\")\n\n\n    # write function headline\n    s_in_line = []\n\n    i_bits = self.bits[0]\n    i_sign = self.is_neg[0]\n\n    if is_v:\n      i_datatype = \"  wire {}[{}:0] \".format(\n          \"signed \" if i_sign else \"\", i_bits-1)\n    else:\n      i_datatype = \"  ac_int<{},{}> \".format(i_bits, i_sign)\n\n    len_s = len(i_datatype)\n\n    for i in range(self.inputs):\n      if is_v:\n        s = (\n            \"i_\" + str(i) + \" = \" + \"i[\" + str(i_bits*(i+1)-1) + \":\" +\n            str(i_bits*i) + \"]\"\n        )\n      else:\n        s = (\n            \"i_\" + str(i) + \" = \" + \"i.slc<\" + str(i_bits) + \">(\" +\n            str(i_bits*i) + \")\"\n        )\n      if (\n          len_s + len(s) + 2 > 70 or i_bits != self.bits[i] or\n          i_sign != self.is_neg[i]\n      ):\n        f.write(i_datatype + \", \".join(s_in_line) + \";\\n\")\n\n        s_in_line = []\n        if is_v:\n          i_datatype = \"  wire {}[{}:0] \".format(\n              \"signed \" if i_sign else \"\", i_bits-1)\n        else:\n          i_datatype = \"  ac_int<{},{}> \".format(i_bits, i_sign)\n\n        len_s = len(i_datatype)\n\n      s_in_line.append(s)\n      len_s += len(s) + 2\n\n    if s_in_line:\n      f.write(i_datatype + \", \".join(s_in_line) + \";\\n\")\n\n    if is_v:\n      o_datatype = \"  wire {}[{}:0] \".format(\n          \"signed \" if o_sign else \"\", o_bits)\n    else:\n      o_datatype = \"  ac_int<{},{}> \".format(o_bits, o_sign)\n\n    o_list = []\n    for i in range(len(vd_list)):\n      for v in vd_list[i]:\n        if is_v:\n          f.write(o_datatype + v + \" = \" + vd_list[i][v] + \";\\n\")\n        else:\n          f.write(o_datatype + v + \" = \" + vd_list[i][v] + \";\\n\")\n      f.write(\"\\n\")\n      o_list.append(v)\n\n    assert len(o_list) <= 3\n\n    if is_v:\n      f.write(\"  assign \")\n    else:\n      f.write(\"  \")\n\n    if len(o_list) == 1:\n      f.write(\"o = \" + o_list[0] + \";\")\n    elif len(o_list) == 2:\n      cond = \"( \" + o_list[0] + \" == \" + o_list[1] + \" ) \"\n      n1 = o_list[0]\n      n0 = \"( ( \" + \" + \".join(o_list) + \" ) >> 1 )\"\n      f.write(\"o = \" + cond + \"? \" + n1 + \": \" + n0)\n    elif len(o_list) == 3:\n      cond = (\n          \"( \" +\n          \"( \" + \" == \".join(o_list[0:2]) + \" )?\" + o_list[0] + \":\" +\n          \"( \" + \" == \".join(o_list[1:]) + \" )?\" + o_list[1] + \":\" +\n          \"( \" + \" == \".join([o_list[0], o_list[2]]) + \" )?\" + o_list[0] +\n          \":\" + \"( \" + \" < \".join(o_list[0:2]) + \" ) ?\" +\n          \"( ( \" + \" < \".join(o_list[1:]) + \" ) ?\" + o_list[1] + \":\" +\n          o_list[2] + \" ) : \" +\n          \"( ( \" + \" < \".join([o_list[0], o_list[2]]) + \" ) ?\" + o_list[0] +\n          \":\" + o_list[2] + \" )\"\n      )\n      f.write(\"o = \" + cond + \";\\n\")\n    if is_v:\n      f.write(\"endmodule\")\n    else:\n      f.write(\"}\")\n\n    f.close()\n"
  },
  {
    "path": "experimental/lo/random_forest/random_tree.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements Random Forest for quantized netlist.\"\"\"\n\nfrom csv import reader\nfrom math import sqrt\nimport os\nimport pprint\nfrom random import seed\nfrom random import randrange\nimport sys\n\nimport numpy as np\nfrom .parser import parse, _X, _0, _1\n\nclass RandomTree:\n  def __init__(self, max_depth, min_size):\n    self.min_size = min_size\n    self.max_depth = max_depth\n    self.n_features = None\n\n  def split_into_groups(self, index, value, dataset):\n    mask_l = dataset[:, index] < value\n    mask_r = np.logical_not(mask_l)\n    left = dataset[mask_l,...]\n    right = dataset[mask_r,...]\n    return left, right\n\n  def gini_index(self, groups, classes):\n    # count all samples at split point\n    n_instances = float(sum([len(group) for group in groups]))\n    # sum weighted Gini index for each group\n    gini = 0.0\n    for group in groups:\n      size = float(len(group))\n      # avoid divide by zero\n      if size == 0:\n        continue\n      score = 0.0\n      # score the group based on the score for each class\n      for class_val in classes:\n        p = np.array([np.sum(group[:, -1] == class_val) / size\n                      for class_val in classes])\n        score += np.sum(np.power(p, 2))\n\n      # weight the group score by its relative size\n      gini += (1.0 - score) * (size / n_instances)\n    return gini\n\n  def select_best_split(self, dataset):\n    class_values = list(set(list(dataset[:,-1].flatten())))\n\n    b_index, b_value, b_score, b_groups = 9999, 9999, 9999, None\n\n    # because several of the entries may be don't cares, we will select the\n    # whole set and restrict to only the ones that are not don't cares\n\n    features = list(\n        np.random.choice(len(dataset[0])-1, self.n_features, p=self.probs,\n                         replace=False))\n\n    for index in features:\n      assert self.mask[index] == True\n      b_values = list(set(list(dataset[:, index])))\n      for b in b_values:\n        groups = self.split_into_groups(index, b, dataset)\n        gini = self.gini_index(groups, class_values)\n        if gini < b_score:\n          b_index, b_value, b_score, b_groups = index, b, gini, groups\n\n    return {'index': b_index, 'value': b_value, 'groups': b_groups}\n\n  def select_terminal(self, group):\n    outcomes = list(group[:,-1].flatten())\n    return max(set(outcomes), key=outcomes.count)\n\n  def split_node(self, node, depth):\n    left, right = node['groups']\n    del(node['groups'])\n\n    # check for a no split\n    if left.shape[0] == 0:\n      node['left'] = node['right'] = self.select_terminal(right)\n      return\n    elif right.shape[0] == 0:\n      node['left'] = node['right'] = self.select_terminal(left)\n      return\n\n    # check for max depth\n    if depth >= self.max_depth:\n      node['left'], node['right'] = (self.select_terminal(left),\n                                     self.select_terminal(right))\n      return\n\n    # process left child\n    if len(set(list(\n        left[:, -1].flatten()))) == 1 or left.shape[0] <= self.min_size:\n      node['left'] = self.select_terminal(left)\n    else:\n      node['left'] = self.select_best_split(left)\n      self.split_node(node['left'], depth + 1)\n\n    # process right child\n    if len(set(list(\n        right[:, -1].flatten()))) == 1 or right.shape[0] <= self.min_size:\n      node['right'] = self.select_terminal(right)\n    else:\n      node['right'] = self.select_best_split(right)\n      self.split_node(node['right'], depth+1)\n\n  def create_mask(self, dataset):\n    self.mask = np.amin(dataset, axis=0) != np.amax(dataset, axis=0)\n\n  def fit(self, dataset, mask_stuck_at_values=False):\n    if mask_stuck_at_values:\n      self.create_mask(dataset)\n    else:\n      self.mask = np.ones(dataset.shape[1])\n\n    self.probs = self.mask[:-1].astype(np.float32) / np.sum(self.mask[:-1])\n\n    if not self.n_features:\n      self.n_features = int(np.sqrt(dataset.shape[1] - 1))\n\n    self.root = self.select_best_split(dataset)\n    self.split_node(self.root, 1)\n\n  def predict_internal(self, node, data):\n    if data[node['index']] < node['value']:\n      if isinstance(node['left'], dict):\n        return self.predict_internal(node['left'], data)\n      else:\n        return node['left']\n    else:\n      if isinstance(node['right'], dict):\n        return self.predict_internal(node['right'], data)\n      else:\n        return node['right']\n\n\n  def predict_row(self, row):\n    return self.predict_internal(self.root, row)\n\n\n  def predict(self, data):\n    return np.array(self.predict_row(data[i]) for i in range(data.shape[0]))\n\n  def gen_code_internal(self, node, var_dict, n_offset):\n    # traverse left\n    cond = '( i_' + str(node['index']) + ' < ' + str(node['value']) + ' )'\n    if isinstance(node['left'], dict):\n      n0 = self.gen_code_internal(node['left'], var_dict, n_offset)\n    else:\n      n0 = str(node['left'])\n\n    if isinstance(node['right'], dict):\n      n1 = self.gen_code_internal(node['right'], var_dict, n_offset)\n    else:\n      n1 = str(node['right'])\n\n    index = len(var_dict) + n_offset\n    r = 'n_' + str(index)\n    stmt = cond + '? ' + n0 + ' : ' + n1\n    var_dict[r] = stmt\n\n    return r\n\n  def gen_code(self, n_offset=0):\n    var_dict = {}\n\n    self.gen_code_internal(self.root, var_dict, n_offset)\n\n    return var_dict\n"
  },
  {
    "path": "experimental/lo/random_forest/utils.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Reads and processes tables of PLAs and CSVs.\"\"\"\n\nfrom csv import reader\nfrom math import sqrt\nimport os\nimport pprint\nfrom random import seed\nfrom random import randrange\nimport sys\n\nimport numpy as np\nfrom .parser import parse, _X, _0, _1\n\n\ndef str_column_to_float(dataset, column):\n  \"\"\"Converts string column to float.\"\"\"\n  for row in dataset:\n    row[column] = float(row[column].strip())\n\ndef str_column_to_int(dataset, column):\n  \"\"\"Converts string column to int.\"\"\"\n  for row in dataset:\n    row[column] = int(row[column].strip())\n\ndef str_column_to_number(dataset, column):\n  \"\"\"Converts output to integer if possible or float.\"\"\"\n\n  class_values = [row[column] for row in dataset]\n  unique = set(class_values)\n  lookup = dict()\n  is_symbolic = False\n  for value in unique:\n    try:\n      # try int first\n      lookup[value] = int(value)\n    except ValueError:\n      try:\n        # if it fails, try float\n        lookup[value] = float(value)\n      except ValueError:\n        # if it fails, it is symbolic\n        is_symbolic = True\n        break\n\n  # best we an do is to assign unique numbers to the classes\n  if is_symbolic:\n    for i, value in enumerate(unique):\n      lookup[value] = i\n\n  # convert output to unique number\n  for row in dataset:\n    row[column] = lookup[row[column]]\n\n  return lookup\n\n\ndef load_csv(filename):\n  \"\"\"Loads CSV file.\"\"\"\n  dataset = list()\n  with open(filename, 'r') as file:\n    csv_reader = reader(file)\n    for row in csv_reader:\n      if not row:\n        continue\n      dataset.append(row)\n\n  # converts data to int's\n  for i in range(0, len(dataset[0])-1):\n    str_column_to_int(dataset, i)\n\n  # converts output to int or float\n  str_column_to_number(dataset, len(dataset[0])-1)\n  dataset = np.array(dataset)\n\n  return dataset\n\n\ndef load_pla(filename):\n  \"\"\"Loads PLA file.\"\"\"\n  dataset = list()\n  pla = parse(filename)\n  for i,o in zip(pla.pla_i, pla.pla_o):\n    i_s = [1 if v == _1 else 0 if v == _0 else 0 for v in i]\n    o_s = [sum([(1 << (len(o)-1-oo)) if o[oo] == _1 else 0\n                for oo in range(len(o))])]\n    dataset.append(i_s + o_s)\n  dataset = np.array(dataset)\n  return dataset\n\n\ndef load(filename):\n  \"\"\"Loads and decides if we will load PLA or CSV file based on suffix.\"\"\"\n\n  suffix_split = filename.split(\".\")\n\n  if suffix_split[-1] == \"pla\":\n    print(\"... loading pla\")\n    dataset = load_pla(filename)\n  else:\n    dataset = load_csv(filename)\n  return dataset\n\n"
  },
  {
    "path": "experimental/lo/receptive.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport math\n\nfrom .utils import get_padding_value\n\n\ndef print_rf(layer_name, x):\n  print(\"Layer {}:\".format(layer_name))\n  print(\n      \"\\theight/width: {}\\n\\tstride: {}\\n\\teq_kernel_size: {}\\n\\tstart: {}\\n\".format(\n          *x)\n  )\n\n\ndef rf_computation_for_layer(layer, layer_in):\n  k, s, p = layer\n  n_in, j_in, r_in, start_in = layer_in\n\n  n_out = int(math.floor((n_in + 2*p - k)/s)) + 1\n\n  if s == 1 and p == 1:\n    n_out = n_in\n\n  actual_p = (n_out-1)*s - n_in + k\n  p_r = math.ceil(actual_p/2)\n  p_l = math.floor(actual_p/2)\n\n  j_out = j_in * s\n\n  r_out = r_in + (k-1)*j_in\n\n  start_out = start_in + (int((k-1)/2) - p_l) * j_in\n\n  return n_out, j_out, r_out, start_out\n\n\ndef model_to_receptive_field(model, i_name, o_name):\n  layers_h = []\n  layers_w = []\n\n  i_layer = model.get_layer(i_name)\n  o_layer = model.get_layer(o_name)\n\n  # right now this only works for sequential layers\n\n  i_index = model.layers.index(i_layer)\n  o_index = model.layers.index(o_layer)\n\n  for i in range(i_index, o_index+1):\n    k_h, k_w = (1, 1)\n    s_h, s_w = (1, 1)\n    p_h, p_w = (0, 0)\n\n    if hasattr(model.layers[i], \"kernel_size\"):\n      kernel = model.layers[i].kernel_size\n\n      if isinstance(kernel, int):\n        kernel = [kernel, kernel]\n\n      k_h, k_w = kernel[0], kernel[1]\n\n    if hasattr(model.layers[i], \"strides\"):\n      strides = model.layers[i].strides\n\n      if isinstance(strides, int):\n        strides = [strides, strides]\n\n      s_h, s_w = strides[0], strides[1]\n\n    if hasattr(model.layers[i], \"padding\"):\n      padding = model.layers[i].padding\n\n      if isinstance(padding, str):\n        padding = [padding, padding]\n\n      p_h = get_padding_value(padding[0], k_h)\n      p_w = get_padding_value(padding[1], k_w)\n\n    layers_h.append((k_h, s_h, p_h))\n    layers_w.append((k_w, s_w, p_w))\n\n  x_h = (i_layer.input.shape[1], 1, 1, 0.5)\n  x_w = (i_layer.input.shape[2], 1, 1, 0.5)\n\n  for l_h, l_w in zip(layers_h, layers_w):\n    x_h = rf_computation_for_layer(l_h, x_h)\n    x_w = rf_computation_for_layer(l_w, x_w)\n\n  strides = (x_h[1], x_w[1])\n  kernel = (x_h[2], x_w[2])\n  padding = (\"valid\", \"valid\")\n\n  return (strides, kernel, padding)\n\n"
  },
  {
    "path": "experimental/lo/table/__init__.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom .utils import load\nfrom .utils import load_csv\nfrom .utils import load_pla\n"
  },
  {
    "path": "experimental/lo/table/parser.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Parses PLA format usig ply.\"\"\"\nfrom ply import yacc\nfrom ply import lex\nimport numpy as np\n\n_1 = 1\n_0 = 2\n_X = 3\n_U = 0\n\nNOT = {_0: _1, _1: _0, _X: _U, _U: _U}\n\nclass PLA:\n  def __init__(self):\n    self.pla_i = []\n    self.pla_o = []\n\npla = PLA()\n\ntokens = [\n  \"I\",\n  \"O\",\n  \"MV\",\n  \"ILB\",\n  \"OB\",\n  \"P\",\n  \"L\",\n  \"E\",\n  \"TYPE\",\n  \"SYMBOL\",\n  \"NUMBER\",\n  \"NEWLINE\"\n]\n\nt_ignore = \" \\t|\"\nt_I = r\"\\.[iI]\"\nt_O = r\"\\.[oO]\"\nt_MV = r\"\\.[mM][vV]\"\nt_ILB = r\"\\.[iI][lL][bB]\"\nt_OB = r\"\\.[oO][bB]\"\nt_P = r\"\\.[pP]\"\nt_L = r\"\\.[lL]\"\nt_E = r\"\\.[eE]\"\nt_TYPE = r\"\\.type\"\nt_SYMBOL = r\"[a-zA-Z_][a-zA-Z0-9_\\<\\>\\-\\$]*\"\n\ndef t_NUMBER(t):\n  r\"[\\d\\-]+\"\n  return t\n\ndef t_NEWLINE(t):\n  r\"\\n+\"\n  t.lexer.lineno += t.value.count(\"\\n\")\n  return t\n\ndef t_error(t):\n  print(\"Illegal character '{}'\".format(t.value))\n  t.lexer.skip(1)\n\nlex.lex()\n\ndef p_pla(p):\n  \"\"\"pla : pla_declarations pla_table pla_end\"\"\"\n\ndef p_pla_declarations(p):\n  \"\"\"pla_declarations : pla_declarations pla_declaration\n                      | pla_declaration\"\"\"\n\ndef p_pla_declaration(p):\n  \"\"\"pla_declaration : I NUMBER NEWLINE\n                     | O NUMBER NEWLINE\n                     | P NUMBER NEWLINE\n                     | MV number_list NEWLINE\n                     | ILB symbol_list NEWLINE\n                     | OB symbol_list NEWLINE\n                     | L NUMBER symbol_list NEWLINE\n                     | TYPE SYMBOL NEWLINE\n  \"\"\"\n  token = p[1].lower()\n  if token == \".i\":\n    pla.ni = int(p[2])\n  elif token == \".o\":\n    pla.no = int(p[2])\n  elif token == \".mv\":\n    pla.mv = [int(v) for v in p[2]]\n  elif token == \".ilb\":\n    pla.ilb = p[2]\n  elif token == \".ob\":\n    pla.ob = p[2]\n  elif token == \".l\":\n    pla.label = p[2]\n  elif token == \".type\":\n    pla.set_type = p[2]\n\n\ndef p_pla_table(p):\n  \"\"\"pla_table : pla_table number_symbol_list NEWLINE\n               | number_symbol_list NEWLINE\"\"\"\n  if len(p[1:]) == 3:\n    line = \"\".join(p[2])\n  else:\n    line = \"\".join(p[1])\n\n  assert hasattr(pla, \"ni\") and hasattr(pla, \"no\")\n\n  # right now we only process binary functions\n\n  line = [_1 if v == \"1\" else _0 if v == \"0\" else _X for v in line]\n\n  pla.pla_i.append(line[0:pla.ni])\n  pla.pla_o.append(line[pla.ni:])\n\n\ndef p_pla_end(p):\n  \"\"\"pla_end : E opt_new_line\"\"\"\n  pass\n\n\ndef p_opt_new_line(p):\n  \"\"\"opt_new_line : NEWLINE\n                  |\n  \"\"\"\n  pass\n\n\ndef p_number_list(p):\n  \"\"\"number_list : number_list NUMBER\n                 | NUMBER\n  \"\"\"\n  if len(p[1:]) == 2:\n    p[0] = p[1] + [p[2]]\n  else:\n    p[0] = [p[1]]\n\n\ndef p_symbol_list(p):\n  \"\"\"symbol_list : symbol_list SYMBOL\n                 | SYMBOL\n  \"\"\"\n  if len(p[1:]) == 2:\n    p[0] = p[1] + [p[2]]\n  else:\n    p[0] = [p[1]]\n\n\ndef p_number_symbol_list(p):\n  \"\"\"number_symbol_list : number_symbol_list number_or_symbol\n                        | number_or_symbol\n  \"\"\"\n  if len(p[1:]) == 2:\n    p[0] = p[1] + [p[2]]\n  else:\n    p[0] = [p[1]]\n\n\ndef p_number_or_symbol(p):\n  \"\"\"number_or_symbol : NUMBER\n                      | SYMBOL\n  \"\"\"\n  p[0] = p[1]\n\n\ndef p_error(p):\n  print(\"Error text at {}\".format(p)) #p.value))\n\nyacc.yacc()\n\ndef get_tokens(fn):\n  lex.input(\"\".join(open(fn).readlines()))\n  return lex.token\n\ndef parse(fn):\n  yacc.parse(\"\".join(open(fn).readlines()))\n\n  pla.pla_i = np.array(pla.pla_i)\n  pla.pla_o = np.array(pla.pla_o)\n\n  return pla\n"
  },
  {
    "path": "experimental/lo/table/utils.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Reads and processes tables of PLAs and CSVs.\"\"\"\n\nfrom csv import reader\nfrom csv import QUOTE_NONNUMERIC\nfrom math import sqrt\nimport os\nimport pprint\nfrom random import seed\nfrom random import randrange\nimport sys\n\nimport numpy as np\nfrom .parser import parse, _X, _0, _1\n\n\ndef str_column_to_float(dataset, column):\n  \"\"\"Converts string column to float.\"\"\"\n  for row in dataset:\n    row[column] = float(row[column].strip())\n\ndef str_column_to_int(dataset, column, d_values):\n  \"\"\"Converts string column to int.\"\"\"\n  for row in dataset:\n    v = int(row[column].strip())\n    row[column] = v if not d_values else d_values[v]\n\ndef str_column_to_number(dataset, column):\n  \"\"\"Converts output to integer if possible or float.\"\"\"\n\n  class_values = [row[column] for row in dataset]\n  unique = set(class_values)\n  lookup = dict()\n  is_symbolic = False\n  for value in unique:\n    try:\n      # try int first\n      lookup[value] = int(value)\n    except ValueError:\n      try:\n        # if it fails, try float\n        lookup[value] = float(value)\n      except ValueError:\n        # if it fails, it is symbolic\n        is_symbolic = True\n        break\n\n  # best we an do is to assign unique numbers to the classes\n  if is_symbolic:\n    for i, value in enumerate(unique):\n      lookup[value] = i\n\n  # convert output to unique number\n  for row in dataset:\n    row[column] = lookup[row[column]]\n\n  return lookup\n\n\ndef int2bin(v, bits):\n  str_v = format((v & ((1<<bits)-1)), \"#0\" + str(bits+2) + \"b\")[2:]\n  return [int(b) for b in str_v]\n\n\ndef load_csv(filename):\n  \"\"\"Loads CSV file.\"\"\"\n  dataset = list()\n\n  with open(filename, 'r') as file:\n    csv_reader = reader(file, quoting=QUOTE_NONNUMERIC)\n    for row in csv_reader:\n      if not row:\n        continue\n      dataset.append(row)\n      #dataset.append([int(v) for v in row])\n\n  return np.array(dataset)\n\n\ndef load_pla(filename):\n  \"\"\"Loads PLA file.\"\"\"\n  dataset = list()\n  pla = parse(filename)\n  for i,o in zip(pla.pla_i, pla.pla_o):\n    i_s = [1 if v == _1 else 0 if v == _0 else 0 for v in i]\n    o_s = [sum([(1 << (len(o)-1-oo)) if o[oo] == _1 else 0\n                for oo in range(len(o))])]\n    dataset.append(i_s + o_s)\n  dataset = np.array(dataset)\n  return dataset\n\n\ndef load(filename):\n  \"\"\"Loads and decides if we will load PLA or CSV file based on suffix.\"\"\"\n\n  suffix_split = filename.split(\".\")\n\n  if suffix_split[-1] == \"pla\":\n    print(\"... loading pla\")\n    dataset = load_pla(filename)\n  else:\n    dataset = load_csv(filename)\n  return dataset\n\n"
  },
  {
    "path": "experimental/lo/utils.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Computes padding and quantization dictionary values.\"\"\"\n\nimport numpy as np\n\n\ndef get_padding_value(padding, kernel):\n  \"\"\"Returns padding value for kernel.\"\"\"\n\n  if padding == \"valid\":\n    return 0\n  elif padding == \"same\":\n    return kernel // 2\n  elif padding == \"full\":\n    return kernel - 1\n\n  raise ValueError(\"accepted paddings are 'valid', 'same' or 'full', found \" +\n                   padding)\n\n\ndef get_quantized_bits_dict(bits, ibits, sign=False, mode=\"bin\"):\n  \"\"\"Returns map from floating values to bit encoding.\"\"\"\n\n  o_dict = {}\n\n  n_bits = bits\n\n  for b in range(1 << (bits - sign)):\n    v = (1.0 * b) * (1 << ibits) / (1 << bits)\n    if mode == \"bin\":\n      b_str = bin(b)[2:]\n      b_str = \"0\" * (n_bits - len(b_str)) + b_str\n    else:  # mode == \"dec\":\n      b_str = str(b)\n\n    o_dict[v] = b_str\n\n    if b > 0 and sign:\n      if mode == \"bin\":\n        b_str = bin(-b & ((1 << n_bits) - 1))[2:]\n      else:  # mode == \"dec\"\n        b_str = str(-b)\n\n      o_dict[-v] = b_str\n\n  if sign:\n    v = (1.0 * (1 << (bits - sign))) * (1 << ibits) / (1 << bits)\n    if mode == \"bin\":\n      b_str = bin(-(1 << (bits - sign)) & ((1 << bits) - 1))[2:]\n    else:\n      b_str = str(-(1 << (bits - sign)))\n    o_dict[-v] = b_str\n  return o_dict\n\n\ndef get_quantized_po2_dict(\n    bits, max_exp, sign=False, make_smaller_zero=True, mode=\"bin\"):\n  \"\"\"Returns map from floating values to bit encoding.\"\"\"\n\n  # if make_smaller_zero we will make sure smaller number is 000...0\n\n  # mode = \"bin\" |-> make_smaller_zero\n\n  assert mode != \"bin\" or  make_smaller_zero\n\n  o_dict = {}\n\n  if max_exp > 0:\n    v = 1.0\n    if mode == \"bin\":\n      b_str = \"0\" * bits\n    else:\n      b_str = \"1\"\n\n    o_dict[v] = b_str\n\n    if sign:\n      v = -1.0\n      if mode == \"bin\":\n        b_str = \"1\" + \"0\"*(bits-sign)\n      else:\n        b_str = \"-1\"\n\n      o_dict[v] = b_str\n\n  for b in range(1, 1<<(bits - sign - 1)):\n    v = np.power(2.0, -b)\n    if mode == \"bin\":\n      b_sign = \"0\" if sign else \"\"\n      b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]\n    else:\n      b_str = str(v)\n    o_dict[v] = b_str\n\n    if b <= max_exp:\n      v = np.power(2.0, b)\n      if mode == \"bin\":\n        b_str = bin(b)[2:]\n        b_str = b_sign + \"0\"*(bits - sign - len(b_str)) + b_str\n      else:\n        b_str = str(v)\n      o_dict[v] = b_str\n\n    if sign:\n      v = -np.power(2.0, -b)\n      if mode == \"bin\":\n        b_sign = \"1\" if sign else \"\"\n        b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]\n      else:\n        b_str = str(v)\n      o_dict[v] = b_str\n\n      if b <= max_exp:\n        v = -np.power(2.0, b)\n        if mode == \"bin\":\n          b_str = bin(b)[2:]\n          b_str = b_sign + \"0\"*(bits - sign - len(b_str)) + b_str\n        else:\n          b_str = str(v)\n        o_dict[v] = b_str\n\n  b = 1 << (bits - sign - 1)\n  v = np.power(2.0, -b)\n  if mode == \"bin\":\n    b_sign = \"0\" if sign else \"\"\n    b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]\n  else:\n    b_str = str(v)\n  o_dict[v] = b_str\n\n  smaller_mask = b_str\n\n  if sign:\n    v = -np.power(2.0, -b)\n    if mode == \"bin\":\n      b_sign = \"1\" if sign else \"\"\n      b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]\n    else:\n      b_str = str(v)\n    o_dict[v] = b_str\n\n  def invert_bit(bit, mask):\n    \"\"\"Inverts bits if mask is 1.\"\"\"\n\n    if mask == \"0\":\n      return bit\n    else:\n      return \"0\" if bit == \"1\" else \"1\"\n\n  if mode == \"bin\":\n    if make_smaller_zero:\n      for v in o_dict:\n        o_dict[v] = \"\".join(\n            invert_bit(bit, mask_bit)\n            for bit, mask_bit in zip(o_dict[v], smaller_mask))\n  else:\n    keys_sorted = list(sorted(o_dict.keys()))\n    if make_smaller_zero:\n      min_positive_key = min([abs(v) for v in keys_sorted])\n      min_positive_index = keys_sorted.index(min_positive_key)\n    else:\n      min_positive_index = 0\n    for i, k in enumerate(keys_sorted):\n      o_dict[k] = str(i - min_positive_index)\n\n  return o_dict\n\n\ndef get_ternary_dict(mode=\"bin\"):\n  \"\"\"Returns map from floating values to bit encoding.\"\"\"\n\n  if mode == \"bin\":\n    return {-1.0: \"11\", 0.0: \"00\", 1.0: \"01\"}\n  else:\n    return {-1.0: \"-1\", 0.0: \"0\", 1.0: \"1\"}\n\n\ndef get_binary_dict(symmetric=False, mode=\"bin\"):\n  \"\"\"Returns map from floating values to bit encoding.\"\"\"\n\n  if mode == \"bin\":\n    if symmetric:\n      return {-1.0: \"10\", 1.0: \"01\"}\n    else:\n      return {0.0: \"0\", 1.0: \"1\"}\n  else:\n    if symmetric:\n      return {-1.0: \"-1\", 1.0: \"1\"}\n    else:\n      return {0.0: \"0\", 1.0: \"1\"}\n"
  },
  {
    "path": "notebook/AutoQKeras.ipynb",
    "content": "{\n \"cells\": [\n   {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"##### Copyright 2020 Google LLC\\n\",\n    \"#\\n\",\n    \"#\\n\",\n    \"# Licensed under the Apache License, Version 2.0 (the \\\"License\\\");\\n\",\n    \"# you may not use this file except in compliance with the License.\\n\",\n    \"# You may obtain a copy of the License at\\n\",\n    \"#\\n\",\n    \"# https://www.apache.org/licenses/LICENSE-2.0\\n\",\n    \"#\\n\",\n    \"# Unless required by applicable law or agreed to in writing, software\\n\",\n    \"# distributed under the License is distributed on an \\\"AS IS\\\" BASIS,\\n\",\n    \"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\\n\",\n    \"# See the License for the specific language governing permissions and\\n\",\n    \"# limitations under the License.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"QC9sVuNrzT-f\"\n   },\n   \"source\": [\n    \"# Introduction\\n\",\n    \"\\n\",\n    \"In this notebook, we show how to quantize a model using AutoQKeras.\\n\",\n    \"\\n\",\n    \"As usual, let's first make sure we are using Python 3.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 51\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 926,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840345558,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"0sY-O2IfzdB3\",\n    \"outputId\": \"1c5a4e7a-1003-4b56-a30a-ca6bc196f18b\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"import sys\\n\",\n    \"print(sys.version)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"6V7FxYH0zfY0\"\n   },\n   \"source\": [\n    \"Now, let's load some packages we will need to run AutoQKeras.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"wuVqOAcbz3Go\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"import warnings\\n\",\n    \"warnings.filterwarnings(\\\"ignore\\\")\\n\",\n    \"\\n\",\n    \"import json\\n\",\n    \"import pprint\\n\",\n    \"import numpy as np\\n\",\n    \"import six\\n\",\n    \"import tempfile\\n\",\n    \"import tensorflow.compat.v2 as tf\\n\",\n    \"# V2 Behavior is necessary to use TF2 APIs before TF2 is default TF version internally.\\n\",\n    \"tf.enable_v2_behavior()\\n\",\n    \"from tensorflow.keras.optimizers import *\\n\",\n    \"\\n\",\n    \"from qkeras.autoqkeras import *\\n\",\n    \"from qkeras import *\\n\",\n    \"from qkeras.utils import model_quantize\\n\",\n    \"from qkeras.qtools import run_qtools\\n\",\n    \"from qkeras.qtools import settings as qtools_settings\\n\",\n    \"\\n\",\n    \"from tensorflow.keras.utils import to_categorical\\n\",\n    \"import tensorflow_datasets as tfds\\n\",\n    \"\\n\",\n    \"print(\\\"using tensorflow\\\", tf.__version__)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Let's define `get_data` and `get_model` as you may not have stand alone access to examples directory inside autoqkeras.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_data(dataset_name, fast=False):\\n\",\n    \"  \\\"\\\"\\\"Returns dataset from tfds.\\\"\\\"\\\"\\n\",\n    \"  ds_train = tfds.load(name=dataset_name, split=\\\"train\\\", batch_size=-1)\\n\",\n    \"  ds_test = tfds.load(name=dataset_name, split=\\\"test\\\", batch_size=-1)\\n\",\n    \"\\n\",\n    \"  dataset = tfds.as_numpy(ds_train)\\n\",\n    \"  x_train, y_train = dataset[\\\"image\\\"].astype(np.float32), dataset[\\\"label\\\"]\\n\",\n    \"\\n\",\n    \"  dataset = tfds.as_numpy(ds_test)\\n\",\n    \"  x_test, y_test = dataset[\\\"image\\\"].astype(np.float32), dataset[\\\"label\\\"]\\n\",\n    \"\\n\",\n    \"  if len(x_train.shape) == 3:\\n\",\n    \"    x_train = x_train.reshape(x_train.shape + (1,))\\n\",\n    \"    x_test = x_test.reshape(x_test.shape + (1,))\\n\",\n    \"\\n\",\n    \"  x_train /= 256.0\\n\",\n    \"  x_test /= 256.0\\n\",\n    \"\\n\",\n    \"  x_mean = np.mean(x_train, axis=0)\\n\",\n    \"\\n\",\n    \"  x_train -= x_mean\\n\",\n    \"  x_test -= x_mean\\n\",\n    \"\\n\",\n    \"  nb_classes = np.max(y_train) + 1\\n\",\n    \"  y_train = to_categorical(y_train, nb_classes)\\n\",\n    \"  y_test = to_categorical(y_test, nb_classes)\\n\",\n    \"\\n\",\n    \"  print(x_train.shape[0], \\\"train samples\\\")\\n\",\n    \"  print(x_test.shape[0], \\\"test samples\\\")\\n\",\n    \"  return (x_train, y_train), (x_test, y_test)\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from tensorflow.keras.initializers import *\\n\",\n    \"from tensorflow.keras.layers import *\\n\",\n    \"from tensorflow.keras.models import Model\\n\",\n    \"from tensorflow.keras.optimizers import *\\n\",\n    \"\\n\",\n    \"class ConvBlockNetwork(object):\\n\",\n    \"  \\\"\\\"\\\"Creates Convolutional block type of network.\\\"\\\"\\\"\\n\",\n    \"\\n\",\n    \"  def __init__(\\n\",\n    \"      self,\\n\",\n    \"      shape,\\n\",\n    \"      nb_classes,\\n\",\n    \"      kernel_size,\\n\",\n    \"      filters,\\n\",\n    \"      dropout_rate=0.0,\\n\",\n    \"      with_maxpooling=True,\\n\",\n    \"      with_batchnorm=True,\\n\",\n    \"      kernel_initializer=\\\"he_normal\\\",\\n\",\n    \"      bias_initializer=\\\"zeros\\\",\\n\",\n    \"      use_separable=False,\\n\",\n    \"      use_xnornet_trick=False,\\n\",\n    \"      all_conv=False\\n\",\n    \"  ):\\n\",\n    \"    \\\"\\\"\\\"Creates class.\\n\",\n    \"\\n\",\n    \"    Args:\\n\",\n    \"      shape: shape of inputs.\\n\",\n    \"      nb_classes: number of output classes.\\n\",\n    \"      kernel_size: kernel_size of network.\\n\",\n    \"      filters: sizes of filters (if entry is a list, we create a block).\\n\",\n    \"      dropout_rate: dropout rate if > 0.\\n\",\n    \"      with_maxpooling: if true, use maxpooling.\\n\",\n    \"      with_batchnorm: with BatchNormalization.\\n\",\n    \"      kernel_initializer: kernel_initializer.\\n\",\n    \"      bias_initializer: bias and beta initializer.\\n\",\n    \"      use_separable: if \\\"dsp\\\", do conv's 1x3 + 3x1. If \\\"mobilenet\\\",\\n\",\n    \"        use MobileNet separable convolution. If False or \\\"none\\\", perform single\\n\",\n    \"        conv layer.\\n\",\n    \"      use_xnornet_trick: use bn+act after max pool to enable binary\\n\",\n    \"        to avoid saturation to largest value.\\n\",\n    \"      all_conv: if true, implements all convolutional network.\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    self.shape = shape\\n\",\n    \"    self.nb_classes = nb_classes\\n\",\n    \"    self.kernel_size = kernel_size\\n\",\n    \"    self.filters = filters\\n\",\n    \"    self.dropout_rate = dropout_rate\\n\",\n    \"    self.with_maxpooling = with_maxpooling\\n\",\n    \"    self.with_batchnorm = with_batchnorm\\n\",\n    \"    self.kernel_initializer = kernel_initializer\\n\",\n    \"    self.bias_initializer = bias_initializer\\n\",\n    \"    self.use_separable = use_separable\\n\",\n    \"    self.use_xnornet_trick = use_xnornet_trick\\n\",\n    \"    self.all_conv = all_conv\\n\",\n    \"\\n\",\n    \"  def build(self):\\n\",\n    \"    \\\"\\\"\\\"Builds model.\\\"\\\"\\\"\\n\",\n    \"    x = x_in = Input(self.shape, name=\\\"input\\\")\\n\",\n    \"    for i in range(len(self.filters)):\\n\",\n    \"      if len(self.filters) > 1:\\n\",\n    \"        name_suffix_list = [str(i)]\\n\",\n    \"      else:\\n\",\n    \"        name_suffix_list = []\\n\",\n    \"      if not isinstance(self.filters[i], list):\\n\",\n    \"        filters = [self.filters[i]]\\n\",\n    \"      else:\\n\",\n    \"        filters = self.filters[i]\\n\",\n    \"      for j in range(len(filters)):\\n\",\n    \"        if len(filters) > 1:\\n\",\n    \"          name_suffix = \\\"_\\\".join(name_suffix_list + [str(j)])\\n\",\n    \"        else:\\n\",\n    \"          name_suffix = \\\"_\\\".join(name_suffix_list)\\n\",\n    \"        if self.use_separable == \\\"dsp\\\":\\n\",\n    \"          kernels = [(1, self.kernel_size), (self.kernel_size, 1)]\\n\",\n    \"        else:\\n\",\n    \"          kernels = [(self.kernel_size, self.kernel_size)]\\n\",\n    \"        for k, kernel in enumerate(kernels):\\n\",\n    \"          strides = 1\\n\",\n    \"          if (\\n\",\n    \"              not self.with_maxpooling and j == len(filters)-1 and\\n\",\n    \"              k == len(kernels)-1\\n\",\n    \"          ):\\n\",\n    \"            strides = 2\\n\",\n    \"          if self.use_separable == \\\"dsp\\\":\\n\",\n    \"            kernel_suffix = (\\n\",\n    \"                \\\"\\\".join([str(k) for k in kernel]) + \\\"_\\\" + name_suffix)\\n\",\n    \"          elif self.use_separable == \\\"mobilenet\\\":\\n\",\n    \"            depth_suffix = (\\n\",\n    \"                \\\"\\\".join([str(k) for k in kernel]) + \\\"_\\\" + name_suffix)\\n\",\n    \"            kernel_suffix = \\\"11_\\\" + name_suffix\\n\",\n    \"          else:\\n\",\n    \"            kernel_suffix = name_suffix\\n\",\n    \"          if self.use_separable == \\\"mobilenet\\\":\\n\",\n    \"            x = DepthwiseConv2D(\\n\",\n    \"                kernel,\\n\",\n    \"                padding=\\\"same\\\", strides=strides,\\n\",\n    \"                use_bias=False,\\n\",\n    \"                name=\\\"conv2d_dw_\\\" + depth_suffix)(x)\\n\",\n    \"            if self.with_batchnorm:\\n\",\n    \"              x = BatchNormalization(name=\\\"conv2d_dw_bn_\\\" + depth_suffix)(x)\\n\",\n    \"            x = Activation(\\\"relu\\\", name=\\\"conv2d_dw_act_\\\" + depth_suffix)(x)\\n\",\n    \"            kernel = (1, 1)\\n\",\n    \"            strides = 1\\n\",\n    \"          x = Conv2D(\\n\",\n    \"              filters[j], kernel,\\n\",\n    \"              strides=strides, use_bias=not self.with_batchnorm,\\n\",\n    \"              padding=\\\"same\\\",\\n\",\n    \"              kernel_initializer=self.kernel_initializer,\\n\",\n    \"              bias_initializer=self.bias_initializer,\\n\",\n    \"              name=\\\"conv2d_\\\" + kernel_suffix)(x)\\n\",\n    \"          if not (\\n\",\n    \"              self.with_maxpooling and self.use_xnornet_trick and\\n\",\n    \"              j == len(filters)-1 and k == len(kernels)-1\\n\",\n    \"          ):\\n\",\n    \"            if self.with_batchnorm:\\n\",\n    \"              x = BatchNormalization(\\n\",\n    \"                  beta_initializer=self.bias_initializer,\\n\",\n    \"                  name=\\\"bn_\\\" + kernel_suffix)(x)\\n\",\n    \"            x = Activation(\\\"relu\\\", name=\\\"act_\\\" + kernel_suffix)(x)\\n\",\n    \"      if self.with_maxpooling:\\n\",\n    \"        x = MaxPooling2D(2, 2, name=\\\"mp_\\\" + name_suffix)(x)\\n\",\n    \"        # this is a trick from xnornet to enable full binary or ternary\\n\",\n    \"        # networks to be after maxpooling.\\n\",\n    \"        if self.use_xnornet_trick:\\n\",\n    \"          x = BatchNormalization(\\n\",\n    \"              beta_initializer=self.bias_initializer,\\n\",\n    \"              name=\\\"mp_bn_\\\" + name_suffix)(x)\\n\",\n    \"          x = Activation(\\\"relu\\\", name=\\\"mp_act_\\\" + name_suffix)(x)\\n\",\n    \"      if self.dropout_rate > 0:\\n\",\n    \"        x = Dropout(self.dropout_rate, name=\\\"drop_\\\" + name_suffix)(x)\\n\",\n    \"\\n\",\n    \"    if not self.all_conv:\\n\",\n    \"      x = Flatten(name=\\\"flatten\\\")(x)\\n\",\n    \"      x = Dense(\\n\",\n    \"          self.nb_classes,\\n\",\n    \"          kernel_initializer=self.kernel_initializer,\\n\",\n    \"          bias_initializer=self.bias_initializer,\\n\",\n    \"          name=\\\"dense\\\")(x)\\n\",\n    \"      x = Activation(\\\"softmax\\\", name=\\\"softmax\\\")(x)\\n\",\n    \"    else:\\n\",\n    \"      x = Conv2D(\\n\",\n    \"          self.nb_classes, 1, strides=1, padding=\\\"same\\\",\\n\",\n    \"          kernel_initializer=self.kernel_initializer,\\n\",\n    \"          bias_initializer=self.bias_initializer,\\n\",\n    \"          name=\\\"dense\\\")(x)\\n\",\n    \"      x = Activation(\\\"softmax\\\", name=\\\"softmax\\\")(x)\\n\",\n    \"      x = Flatten(name=\\\"flatten\\\")(x)\\n\",\n    \"\\n\",\n    \"    model = Model(inputs=[x_in], outputs=[x])\\n\",\n    \"\\n\",\n    \"    return model\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"def get_model(dataset):\\n\",\n    \"  \\\"\\\"\\\"Returns a model for the demo of AutoQKeras.\\\"\\\"\\\"\\n\",\n    \"  if dataset == \\\"mnist\\\":\\n\",\n    \"    model = ConvBlockNetwork(\\n\",\n    \"        shape=(28, 28, 1),\\n\",\n    \"        nb_classes=10,\\n\",\n    \"        kernel_size=3,\\n\",\n    \"        filters=[16, 32, 48, 64, 128],\\n\",\n    \"        dropout_rate=0.2,\\n\",\n    \"        with_maxpooling=False,\\n\",\n    \"        with_batchnorm=True,\\n\",\n    \"        kernel_initializer=\\\"he_uniform\\\",\\n\",\n    \"        bias_initializer=\\\"zeros\\\",\\n\",\n    \"    ).build()\\n\",\n    \"\\n\",\n    \"  elif dataset == \\\"fashion_mnist\\\":\\n\",\n    \"    model = ConvBlockNetwork(\\n\",\n    \"        shape=(28, 28, 1),\\n\",\n    \"        nb_classes=10,\\n\",\n    \"        kernel_size=3,\\n\",\n    \"        filters=[16, [32]*3, [64]*3],\\n\",\n    \"        dropout_rate=0.2,\\n\",\n    \"        with_maxpooling=True,\\n\",\n    \"        with_batchnorm=True,\\n\",\n    \"        use_separable=\\\"mobilenet\\\",\\n\",\n    \"        kernel_initializer=\\\"he_uniform\\\",\\n\",\n    \"        bias_initializer=\\\"zeros\\\",\\n\",\n    \"        use_xnornet_trick=True\\n\",\n    \"    ).build()\\n\",\n    \"\\n\",\n    \"  elif dataset == \\\"cifar10\\\":\\n\",\n    \"    model = ConvBlockNetwork(\\n\",\n    \"        shape=(32, 32, 3),\\n\",\n    \"        nb_classes=10,\\n\",\n    \"        kernel_size=3,\\n\",\n    \"        filters=[16, [32]*3, [64]*3, [128]*3],\\n\",\n    \"        dropout_rate=0.2,\\n\",\n    \"        with_maxpooling=True,\\n\",\n    \"        with_batchnorm=True,\\n\",\n    \"        use_separable=\\\"mobilenet\\\",\\n\",\n    \"        kernel_initializer=\\\"he_uniform\\\",\\n\",\n    \"        bias_initializer=\\\"zeros\\\",\\n\",\n    \"        use_xnornet_trick=True\\n\",\n    \"    ).build()\\n\",\n    \"\\n\",\n    \"  elif dataset == \\\"cifar100\\\":\\n\",\n    \"    model = ConvBlockNetwork(\\n\",\n    \"        shape=(32, 32, 3),\\n\",\n    \"        nb_classes=100,\\n\",\n    \"        kernel_size=3,\\n\",\n    \"        filters=[16, [32]*3, [64]*3, [128]*3, [256]*3],\\n\",\n    \"        dropout_rate=0.2,\\n\",\n    \"        with_maxpooling=True,\\n\",\n    \"        with_batchnorm=True,\\n\",\n    \"        use_separable=\\\"mobilenet\\\",\\n\",\n    \"        kernel_initializer=\\\"he_uniform\\\",\\n\",\n    \"        bias_initializer=\\\"zeros\\\",\\n\",\n    \"        use_xnornet_trick=True\\n\",\n    \"    ).build()\\n\",\n    \"\\n\",\n    \"  model.summary()\\n\",\n    \"\\n\",\n    \"  return model\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"uXsGtqRcN7fY\"\n   },\n   \"source\": [\n    \"`AutoQKeras` has some examples on how to run with `mnist`, `fashion_mnist`, `cifar10` and `cifar100`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 51\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 18554,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840377936,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"lB8CBTehz9FK\",\n    \"outputId\": \"09f791cf-8db5-40c5-b17d-89d433308716\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"DATASET = \\\"mnist\\\"\\n\",\n    \"(x_train, y_train), (x_test, y_test) = get_data(DATASET)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"bk4rOks2OIbW\"\n   },\n   \"source\": [\n    \"Before we create the model, let's see if we can perform distributed training.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 206\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 304,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840378251,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"EMbYcKb-wMOc\",\n    \"outputId\": \"22e85769-4659-4212-ccdb-4b00be2fcefe\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"physical_devices = tf.config.list_physical_devices()\\n\",\n    \"for d in physical_devices:\\n\",\n    \"  print(d)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 34\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 14553,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840392823,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"eMVill0TxUuG\",\n    \"outputId\": \"97c07213-fdce-4eed-9af7-cc51393cd996\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"has_tpus = np.any([d.device_type == \\\"TPU\\\" for d in physical_devices])\\n\",\n    \"\\n\",\n    \"if has_tpus:\\n\",\n    \"  TPU_WORKER = 'local'\\n\",\n    \"\\n\",\n    \"  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(\\n\",\n    \"      tpu=TPU_WORKER, job_name='tpu_worker')\\n\",\n    \"  if TPU_WORKER != 'local':\\n\",\n    \"    tf.config.experimental_connect_to_cluster(resolver, protocol='grpc+loas')\\n\",\n    \"  tf.tpu.experimental.initialize_tpu_system(resolver)\\n\",\n    \"  strategy = tf.distribute.experimental.TPUStrategy(resolver)\\n\",\n    \"  print('Number of devices: {}'.format(strategy.num_replicas_in_sync))\\n\",\n    \"\\n\",\n    \"  cur_strategy = strategy\\n\",\n    \"else:\\n\",\n    \"  cur_strategy = tf.distribute.get_strategy()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"6FIAmXgOOPWg\"\n   },\n   \"source\": [\n    \"Now we can create the model with the distributed strategy in place if TPUs are available. We have some test models that we can use, or you can build your own models. \"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 977\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1149,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840393983,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"P0_-26kwxZiQ\",\n    \"outputId\": \"bf2828fe-2968-4d7d-82e7-0e2b87f063ae\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"with cur_strategy.scope():\\n\",\n    \"  model = get_model(DATASET)\\n\",\n    \"  custom_objects = {}\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"jok7tJq1OVuJ\"\n   },\n   \"source\": [\n    \"Let's see the accuracy on a unquantized model.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 360\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 10292,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840404285,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"nvFSJpeDxmWZ\",\n    \"outputId\": \"ceac171d-2357-4d2a-ecbe-6c2775bc2a94\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"with cur_strategy.scope():\\n\",\n    \"  optimizer = Adam(lr=0.02)\\n\",\n    \"  model.compile(optimizer=optimizer, loss=\\\"categorical_crossentropy\\\", metrics=[\\\"acc\\\"])\\n\",\n    \"  model.fit(x_train, y_train, epochs=10, batch_size=2048, steps_per_epoch=29, validation_data=(x_test, y_test))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"pKArZ2VwQlph\"\n   },\n   \"source\": [\n    \"For `mnist`, we should get 99% validation accuracy, and for `fashion_mnist`, we should get around 86% of validation accuracy. Let's get a metric for high-level estimation of energy of this model. \\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 413,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840404708,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"AlIk3gtFS6iJ\",\n    \"outputId\": \"780a9c28-6234-49ff-9a85-e52bf00a5c59\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"  reference_internal = \\\"fp32\\\"\\n\",\n    \"  reference_accumulator = \\\"fp32\\\"\\n\",\n    \"\\n\",\n    \"  q = run_qtools.QTools(\\n\",\n    \"      model,\\n\",\n    \"      # energy calculation using a given process\\n\",\n    \"      # \\\"horowitz\\\" refers to 45nm process published at\\n\",\n    \"      # M. Horowitz, \\\"1.1 Computing's energy problem (and what we can do about\\n\",\n    \"      # it), \\\"2014 IEEE International Solid-State Circuits Conference Digest of\\n\",\n    \"      # Technical Papers (ISSCC), San Francisco, CA, 2014, pp. 10-14, \\n\",\n    \"      # doi: 10.1109/ISSCC.2014.6757323.\\n\",\n    \"      process=\\\"horowitz\\\",\\n\",\n    \"      # quantizers for model input\\n\",\n    \"      source_quantizers=[quantized_bits(8, 0, 1)],\\n\",\n    \"      is_inference=False,\\n\",\n    \"      # absolute path (including filename) of the model weights\\n\",\n    \"      # in the future, we will attempt to optimize the power model\\n\",\n    \"      # by using weight information, although it can be used to further\\n\",\n    \"      # optimize QBatchNormalization.\\n\",\n    \"      weights_path=None,\\n\",\n    \"      # keras_quantizer to quantize weight/bias in un-quantized keras layers\\n\",\n    \"      keras_quantizer=reference_internal,\\n\",\n    \"      # keras_quantizer to quantize MAC in un-quantized keras layers\\n\",\n    \"      keras_accumulator=reference_accumulator,\\n\",\n    \"      # whether calculate baseline energy\\n\",\n    \"      for_reference=True)\\n\",\n    \"  \\n\",\n    \"# caculate energy of the derived data type map.\\n\",\n    \"energy_dict = q.pe(\\n\",\n    \"    # whether to store parameters in dram, sram, or fixed\\n\",\n    \"    weights_on_memory=\\\"sram\\\",\\n\",\n    \"    # store activations in dram or sram\\n\",\n    \"    activations_on_memory=\\\"sram\\\",\\n\",\n    \"    # minimum sram size in number of bits. Let's assume a 16MB SRAM.\\n\",\n    \"    min_sram_size=8*16*1024*1024,\\n\",\n    \"    # whether load data from dram to sram (consider sram as a cache\\n\",\n    \"    # for dram. If false, we will assume data will be already in SRAM\\n\",\n    \"    rd_wr_on_io=False)\\n\",\n    \"\\n\",\n    \"# get stats of energy distribution in each layer\\n\",\n    \"energy_profile = q.extract_energy_profile(\\n\",\n    \"    qtools_settings.cfg.include_energy, energy_dict)\\n\",\n    \"# extract sum of energy of each layer according to the rule specified in\\n\",\n    \"# qtools_settings.cfg.include_energy\\n\",\n    \"total_energy = q.extract_energy_sum(\\n\",\n    \"    qtools_settings.cfg.include_energy, energy_dict)\\n\",\n    \"\\n\",\n    \"pprint.pprint(energy_profile)\\n\",\n    \"print()\\n\",\n    \"print(\\\"Total energy: {:.2f} uJ\\\".format(total_energy / 1000000.0))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"-eDXxDSUVJ2m\"\n   },\n   \"source\": [\n    \"During the computation, we had a dictionary that outlines the energy per layer (`energy_profile`), and total energy (`total_energy`). The reader should remember that `energy_profile` may need additional filtering as implementations will fuse some\\n\",\n    \"layers. When we compute the `total_energy`, we consider an approximation that some layers will be fused to compute the final energy number. For example, a convolution layer followed by an activation layer will be fused into a single layer so that the output of the convolution layer is not used.\\n\",\n    \"\\n\",\n    \"You have to remember that our high-level model for energy has several assumptions:\\n\",\n    \"\\n\",\n    \"The energy of a layer is estimated as `energy(layer) = energy(input) + energy(parameters) + energy(MAC) + energy(output)`.\\n\",\n    \"\\n\",\n    \"1) Reading inputs, parameters and outputs consider only _compulsory_ accesses, i.e. first access to the data, which is independent of the hardware architecture. If you remember _The 3 C's of Caches_ (https://courses.cs.washington.edu/courses/cse410/99au/lectures/Lecture-10-18/tsld035.htm) other types of accesses will depend on the accelerator architecture.\\n\",\n    \"\\n\",\n    \"2) For the multiply-and-add (MAC) energy estimation, we only consider the energy to compute the MAC, but not any other type energy. For example, in a real accelerator, you have registers, glue logic, pipeline logic that will affect the overall energy profile of the device.\\n\",\n    \"\\n\",\n    \"Although this model is simple and provides an initial estimate on what to expect, it has high-variance with respect to actual energy numbers you will find in practice, especially with respect to different architectural implementations.\\n\",\n    \"\\n\",\n    \"We assume that the real energy `Energy(layer)` is a linear combination of the high-level energy model, i.e.`Energy(layer) = k1 * energy(layer) + k2`, where `k1` and `k2` are constants that depend on the architecture of the accelerator. One can think of `k1` as the factor that accounts for the additional storage to keep the model running, and `k2` as the additional always on logic that is required to perform the operations. If we compare the energy of two implementations with different quantizations of the same layer, let's say `layer1` and `layer2`, `Energy(layer1) > Energy(layer2)` holds true iff `energy(layer1) > energy(layer2)` for the same architecture, but for different architectures, this will not be true in general.\\n\",\n    \"\\n\",\n    \"Despite its limitations to predict a single energy number, this model is quite good to compare the energy of two different models, or different types of quantizations, when we restrict it to a single architecture, and that's how we use it here.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"Hr1FL8wVSy-q\"\n   },\n   \"source\": [\n    \"# Quantizing a Model With `AutoQKeras`\\n\",\n    \"\\n\",\n    \"To quantize this model with `AutoQKeras`, we need to define the quantization for kernels, biases and activations; forgiving factors and quantization strategy.\\n\",\n    \"\\n\",\n    \"Below we define which quantizers are allowed for kernel, bias, activations and linear. Linear is a proxy that we use to capture `Activation(\\\"linear\\\")` to apply quantization without applying a non-linear operation.  In some networks, we found that this trick may be necessary to better represent the quantization space.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"vSsEwDr_yRG4\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"quantization_config = {\\n\",\n    \"        \\\"kernel\\\": {\\n\",\n    \"                \\\"binary\\\": 1,\\n\",\n    \"                \\\"stochastic_binary\\\": 1,\\n\",\n    \"                \\\"ternary\\\": 2,\\n\",\n    \"                \\\"stochastic_ternary\\\": 2,\\n\",\n    \"                \\\"quantized_bits(2,1,1,alpha=1.0)\\\": 2,\\n\",\n    \"                \\\"quantized_bits(4,0,1,alpha=1.0)\\\": 4,\\n\",\n    \"                \\\"quantized_bits(8,0,1,alpha=1.0)\\\": 8,\\n\",\n    \"                \\\"quantized_po2(4,1)\\\": 4\\n\",\n    \"        },\\n\",\n    \"        \\\"bias\\\": {\\n\",\n    \"                \\\"quantized_bits(4,0,1)\\\": 4,\\n\",\n    \"                \\\"quantized_bits(8,3,1)\\\": 8,\\n\",\n    \"                \\\"quantized_po2(4,8)\\\": 4\\n\",\n    \"        },\\n\",\n    \"        \\\"activation\\\": {\\n\",\n    \"                \\\"binary\\\": 1,\\n\",\n    \"                \\\"ternary\\\": 2,\\n\",\n    \"                \\\"quantized_relu_po2(4,4)\\\": 4,\\n\",\n    \"                \\\"quantized_relu(3,1)\\\": 3,\\n\",\n    \"                \\\"quantized_relu(4,2)\\\": 4,\\n\",\n    \"                \\\"quantized_relu(8,2)\\\": 8,\\n\",\n    \"                \\\"quantized_relu(8,4)\\\": 8,\\n\",\n    \"                \\\"quantized_relu(16,8)\\\": 16\\n\",\n    \"        },\\n\",\n    \"        \\\"linear\\\": {\\n\",\n    \"                \\\"binary\\\": 1,\\n\",\n    \"                \\\"ternary\\\": 2,\\n\",\n    \"                \\\"quantized_bits(4,1)\\\": 4,\\n\",\n    \"                \\\"quantized_bits(8,2)\\\": 8,\\n\",\n    \"                \\\"quantized_bits(16,10)\\\": 16\\n\",\n    \"        }\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"GmW_xaAvZo4D\"\n   },\n   \"source\": [\n    \"Now let's define how to apply quantization. In the simplest form, we specify how many bits for kernels, biases and activations by layer types. Note that the entry `BatchNormalization` needs to be specified here, as we only quantize layer types specified by these patterns.  For example, a `Flatten` layer is not quantized as it does not change the data type of its inputs.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"emTRLIZmR-P7\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"limit = {\\n\",\n    \"    \\\"Dense\\\": [8, 8, 4],\\n\",\n    \"    \\\"Conv2D\\\": [4, 8, 4],\\n\",\n    \"    \\\"DepthwiseConv2D\\\": [4, 8, 4],\\n\",\n    \"    \\\"Activation\\\": [4],\\n\",\n    \"    \\\"BatchNormalization\\\": []\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"-iu5gFNhaLNE\"\n   },\n   \"source\": [\n    \"Here, we are specifying that we want to use at most 4 bits for weights and activations, and at most 8 bits for biases in convolutional and depthwise convolutions, but we allow up to 8 bits for kernels in dense layers.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"ZUMQGEIDblSa\"\n   },\n   \"source\": [\n    \"Let's define now the forgiving factor. We will consider energy minimization as a goal as follows.  Here, we are saying that we allow 8% reduction in accuracy for a 2x reduction in energy, both reference and trials have parameters and activations on SRAM, both reference model and quantization trials do not read/write from DRAM on I/O operations, and we should consider both experiments to use SRAMs with minimum tensor sizes (commonly called distributed SRAM implementation).\\n\",\n    \"\\n\",\n    \"We also need to specify the quantizers for the inputs. In this case, we want to use `int8` as source quantizers. Other possible types are `int16`, `int32`, `fp16` or `fp32`, besides `QKeras` quantizer types.\\n\",\n    \"\\n\",\n    \"Finally, to be fair, we want to compare our quantization against fixed-point 8-bit inputs, outputs, activations, weights and biases, and 32-bit accumulators.\\n\",\n    \"\\n\",\n    \"Remember that a `forgiving factor` forgives a drop in a metric such as `accuracy` if the gains of the model are much bigger than the drop. For example, it corresponds to the sentence *we allow $\\\\tt{delta}\\\\%$ reduction in accuracy if the quantized model has $\\\\tt{rate} \\\\times$ smaller energy than the original model*, being a multiplicative factor to the metric. It is computed by $1 + \\\\tt{delta} \\\\times  \\\\log_{\\\\tt{rate}}(\\\\tt{stress} \\\\times \\\\tt{reference\\\\_cost} / \\\\tt{trial\\\\_cost})$.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"kS31TuZ-aKb1\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"goal = {\\n\",\n    \"    \\\"type\\\": \\\"energy\\\",\\n\",\n    \"    \\\"params\\\": {\\n\",\n    \"        \\\"delta_p\\\": 8.0,\\n\",\n    \"        \\\"delta_n\\\": 8.0,\\n\",\n    \"        \\\"rate\\\": 2.0,\\n\",\n    \"        \\\"stress\\\": 1.0,\\n\",\n    \"        \\\"process\\\": \\\"horowitz\\\",\\n\",\n    \"        \\\"parameters_on_memory\\\": [\\\"sram\\\", \\\"sram\\\"],\\n\",\n    \"        \\\"activations_on_memory\\\": [\\\"sram\\\", \\\"sram\\\"],\\n\",\n    \"        \\\"rd_wr_on_io\\\": [False, False],\\n\",\n    \"        \\\"min_sram_size\\\": [0, 0],\\n\",\n    \"        \\\"source_quantizers\\\": [\\\"int8\\\"],\\n\",\n    \"        \\\"reference_internal\\\": \\\"int8\\\",\\n\",\n    \"        \\\"reference_accumulator\\\": \\\"int32\\\"\\n\",\n    \"        }\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"-QzyWPA-dCxm\"\n   },\n   \"source\": [\n    \"There are a few more things we need to define. Let's bundle them on a dictionary and pass them to `AutoQKeras`.  We will try a maximum of 10 trials (`max_trials`) just to limit the time we will spend finding the best quantization here.  Please note that this parameter is not valid if you are running in `hyperband` mode.\\n\",\n    \"\\n\",\n    \"`output_dir` is the directory where we will store our results. Since we are running on a colab, we will let `tempfile` chooce a directory for us.\\n\",\n    \"\\n\",\n    \"`learning_rate_optimizer` allows `AutoQKeras` to change the optimization function and the `learning_rate` to try to improve the quantization results. Since it is still experimental, it may be the case that in some cases it will get worse results. \\n\",\n    \"\\n\",\n    \"Because we are tuning filters as well, we should set `transfer_weights` to `False` as the trainable parameters will have different shapes.\\n\",\n    \"\\n\",\n    \"In `AutoQKeras` we have three modes of operation: `random`, `bayesian` and `hyperband`. I recommend the user to refer to `KerasTuner` (https://keras-team.github.io/keras-tuner/) for a complete description of them.\\n\",\n    \"\\n\",\n    \"`tune_filters` can be set to `layer`, `block` or `none`. If `tune_filters` is `block`, we change the filters by the same amount for all layers being quantized in the trial. If `tune_filters` is `layer`, we will possibly change the number of filters for each layer independently. Finally, if `tune_filters` is `none`, we will not perform filter tuning.\\n\",\n    \"\\n\",\n    \"Together with `tune_filters`, `tune_filter_exceptions` allows the user to specify by a regular expression which filters we should not perform filter tuning, which is especially good for the last layers of the network.\\n\",\n    \"\\n\",\n    \"Filter tuning is a very important feature of `AutoQKeras`. When we deep quantize a model, we may need less or more filters for each layer (and you can guess we do not know a priori how many filters we will need for each layer). Let me give you a rationale behind this.\\n\",\n    \"\\n\",\n    \"- **less filters**: let us assume we have two set of filter coefficients we want quantize: $[-0.3, 0.2, 0.5, 0.15]$ and $[-0.5, 0.4, 0.1, 0.65]$. If we apply a $\\\\tt{binary}$ quantizer with $\\\\tt{scale} = \\\\big\\\\lceil \\\\log_2(\\\\frac{\\\\sum |w|}{N}) \\\\big\\\\rceil$, where $w$ are the filter coefficients and $N$ is the number of coefficients, we will end up with the same filter $\\\\tt{binary}([-0.3, 0.2, 0.5, 0.15]) = \\\\tt{binary}([-0.5, 0.4, 0.1, 0.65]) = [-1,1,1,1] \\\\times 0.5$. In this case we are assuming the $\\\\tt{scale}$ is a power-of-2 number so that it can be efficiently implemented by a shift operation;\\n\",\n    \"\\n\",\n    \"- **more filters**: it is clear that quantization will drop information (just look at the example above) and deep quantization will drop more information, so to recover some of the boundary regions in layers that perform feature extraction, we may need to add more filters to the layer when we quantize it.\\n\",\n    \"\\n\",\n    \"We do not want to quantize the `softmax` layer, which is the last layer of the network. In `AutoQKeras`, you can specify the indexes that you want to perform quantization by specifying the corresponding index of the layer in `Keras`, i.e. if you can get the layer as `model.layers[i]` in `Keras`, `i` is the index of the layer.\\n\",\n    \"\\n\",\n    \"Finally, for data parallel distributed training, we should pass the strategy in `distribution_strategy` to `KerasTuner`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 54\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 297,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840405963,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"2-fyACb2dIAN\",\n    \"outputId\": \"a180fa3f-8cc3-4f70-ce70-c05c28f88d1e\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"run_config = {\\n\",\n    \"  \\\"output_dir\\\": tempfile.mkdtemp(),\\n\",\n    \"  \\\"goal\\\": goal,\\n\",\n    \"  \\\"quantization_config\\\": quantization_config,\\n\",\n    \"  \\\"learning_rate_optimizer\\\": False,\\n\",\n    \"  \\\"transfer_weights\\\": False,\\n\",\n    \"  \\\"mode\\\": \\\"random\\\",\\n\",\n    \"  \\\"seed\\\": 42,\\n\",\n    \"  \\\"limit\\\": limit,\\n\",\n    \"  \\\"tune_filters\\\": \\\"layer\\\",\\n\",\n    \"  \\\"tune_filters_exceptions\\\": \\\"^dense\\\",\\n\",\n    \"  \\\"distribution_strategy\\\": cur_strategy,\\n\",\n    \"  # first layer is input, layer two layers are softmax and flatten\\n\",\n    \"  \\\"layer_indexes\\\": range(1, len(model.layers) - 1),\\n\",\n    \"  \\\"max_trials\\\": 20\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"print(\\\"quantizing layers:\\\", [model.layers[i].name for i in run_config[\\\"layer_indexes\\\"]])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 471192,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840877167,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"MxlZFpa3fBv2\",\n    \"outputId\": \"4d339846-1832-4a79-89b3-c9c4944dd47a\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"autoqk = AutoQKeras(model, metrics=[\\\"acc\\\"], custom_objects=custom_objects, **run_config)\\n\",\n    \"autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"LW_qN8-lOwL0\"\n   },\n   \"source\": [\n    \"Now, let's see which model is the best model we got.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 3961,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840881173,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"1L7KivAoffaL\",\n    \"outputId\": \"f44b07a3-027d-4d69-9864-b3670815c407\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel = autoqk.get_best_model()\\n\",\n    \"qmodel.save_weights(\\\"qmodel.h5\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"RB2xBRhJiwoh\"\n   },\n   \"source\": [\n    \"We got here >90% reduction in energy when compared to 8-bit tensors and 32-bit accumulators. Remember that our original number was 3.3 uJ for fp32.  The end model has 11 nJ for the quantized model as opposed to 204 nJ for the 8-bit original quantized model. As these energy numbers are from high-level energy models, you should remember to consider the relations between them, and not the actual numbers.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"Wy0zcqvQoBnb\"\n   },\n   \"source\": [\n    \"Let's train this model to see how much accuracy we can get of it.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 71353,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840952535,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"_ipZSEfgoGdb\",\n    \"outputId\": \"b184269d-1161-417a-e1ae-e852dc451561\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel.load_weights(\\\"qmodel.h5\\\")\\n\",\n    \"with cur_strategy.scope():\\n\",\n    \"  optimizer = Adam(lr=0.02)\\n\",\n    \"  qmodel.compile(optimizer=optimizer, loss=\\\"categorical_crossentropy\\\", metrics=[\\\"acc\\\"])\\n\",\n    \"  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"Fr95jcPROz7p\"\n   },\n   \"source\": [\n    \"One of problems of trying to quantize the whole thing in one shot is that we may end up with too many choices to make, which will make the entire search space very high. In order to reduce the search space, `AutoQKeras` has two methods to enable users to cope with the explosion of choices.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"9zc7ZrnbPIJA\"\n   },\n   \"source\": [\n    \"## Grouping Layers to Use the Same Choice\\n\",\n    \"\\n\",\n    \"In this case, we can provide regular expressions to `limit` to specify layer names that should be grouped together. In our example, suppose we want to group  convolution layers (except the first one) and all activations except the last one to use the same quantization.\\n\",\n    \"\\n\",\n    \"For the first convolution layer, we want to limit the quantization types to fewer choices as the input is already an 8-bit number.  The last activation will be fed to a feature classifier layer, so we may leave it with more bits. Because our `dense` is actually a `Conv2D` operation, we will enable 8-bits for the weights by layer name. \\n\",\n    \"\\n\",\n    \"We first need to look at the names of the layers for this. \"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 428\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 301,\n     \"status\": \"ok\",\n     \"timestamp\": 1591840952867,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"w-d8nhG0pJF0\",\n    \"outputId\": \"6529b630-f382-4e2a-94ef-ba3d9e3f875c\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"pprint.pprint([layer.name for layer in model.layers])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"32Enp890pU_4\"\n   },\n   \"source\": [\n    \"Convolution layers for `mnist` have names specified as `conv2d_[01234]`. Activation layers have names specified as `act_[01234]`. So, we can create the following regular expressions to reduce the search space in our model.\\n\",\n    \"\\n\",\n    \"Please note that layer class names always select different quantizers, so the user needs to specify a pattern for layer names if he/she wants to use the same quantization for the group of layers.\\n\",\n    \"\\n\",\n    \"You can see here another feature of the limit. You can specify the maximum number of bits, or cherry pick which quantizers you want to try for a specific layer if instead of the maximum number of bits you specify a list of quantizers fron `quantization_config`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"Y5XItp95PHW6\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"limit = {\\n\",\n    \"    \\\"Dense\\\": [8, 8, 4],\\n\",\n    \"    \\\"Conv2D\\\": [4, 8, 4],\\n\",\n    \"    \\\"DepthwiseConv2D\\\": [4, 8, 4],\\n\",\n    \"    \\\"Activation\\\": [4],\\n\",\n    \"    \\\"BatchNormalization\\\": [],\\n\",\n    \"\\n\",\n    \"    \\\"^conv2d_0$\\\": [\\n\",\n    \"                   [\\\"binary\\\", \\\"ternary\\\", \\\"quantized_bits(2,1,1,alpha=1.0)\\\"],\\n\",\n    \"                   8, 4\\n\",\n    \"    ],\\n\",\n    \"    \\\"^conv2d_[1234]$\\\": [4, 8, 4],\\n\",\n    \"    \\\"^act_[0123]$\\\": [4],\\n\",\n    \"    \\\"^act_4$\\\": [8],\\n\",\n    \"    \\\"^dense$\\\": [8, 8, 4]\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"EJs1L-jIie7w\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"run_config = {\\n\",\n    \"  \\\"output_dir\\\": tempfile.mkdtemp(),\\n\",\n    \"  \\\"goal\\\": goal,\\n\",\n    \"  \\\"quantization_config\\\": quantization_config,\\n\",\n    \"  \\\"learning_rate_optimizer\\\": False,\\n\",\n    \"  \\\"transfer_weights\\\": False,\\n\",\n    \"  \\\"mode\\\": \\\"random\\\",\\n\",\n    \"  \\\"seed\\\": 42,\\n\",\n    \"  \\\"limit\\\": limit,\\n\",\n    \"  \\\"tune_filters\\\": \\\"layer\\\",\\n\",\n    \"  \\\"tune_filters_exceptions\\\": \\\"^dense\\\",\\n\",\n    \"  \\\"distribution_strategy\\\": cur_strategy,\\n\",\n    \"  \\\"layer_indexes\\\": range(1, len(model.layers) - 1),\\n\",\n    \"  \\\"max_trials\\\": 40\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 993665,\n     \"status\": \"ok\",\n     \"timestamp\": 1591841947161,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"c7eSwXyijhzc\",\n    \"outputId\": \"6c76a21f-cbb3-4bc5-b899-b02c28821b78\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"autoqk = AutoQKeras(model, metrics=[\\\"acc\\\"], custom_objects=custom_objects, **run_config)\\n\",\n    \"autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"7sYp8Z2pnLi1\"\n   },\n   \"source\": [\n    \"Let's see the reduction now.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 7109,\n     \"status\": \"ok\",\n     \"timestamp\": 1591841954308,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"yj826gNhjsfK\",\n    \"outputId\": \"2e7f17d7-794e-44f6-d23a-452759727a53\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel = autoqk.get_best_model()\\n\",\n    \"qmodel.save_weights(\\\"qmodel.h5\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"eXMcqxLAnY8t\"\n   },\n   \"source\": [\n    \"Let's train this model for more time to see how much we can get in accuracy.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 68145,\n     \"status\": \"ok\",\n     \"timestamp\": 1591842022471,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"qpT8QgkJnQPa\",\n    \"outputId\": \"61e711db-6187-4047-dae8-9ce2d093f56c\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel.load_weights(\\\"qmodel.h5\\\")\\n\",\n    \"with cur_strategy.scope():\\n\",\n    \"  optimizer = Adam(lr=0.02)\\n\",\n    \"  qmodel.compile(optimizer=optimizer, loss=\\\"categorical_crossentropy\\\", metrics=[\\\"acc\\\"])\\n\",\n    \"  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"gAV6Kw0QoODq\"\n   },\n   \"source\": [\n    \"## Quantization by Blocks\\n\",\n    \"\\n\",\n    \"In the previous section, we enforced that all decisions were the same in order to reduce the number of options to quantize a model. \\n\",\n    \"\\n\",\n    \"Another approach is still to allow models to have each block of layers to makde their own choice, but quantizing the blocks sequentially, either from inputs to outputs, or by quantizing higher energy blocks first.\\n\",\n    \"\\n\",\n    \"The rationale for this method is that if we quantize the blocks one by one, and assuming that each block has $N$ choices, and $B$ blocks, we end up trying $N B$ options, instead of $N^B$ choices.  The reader should note that this is an approximation as there is no guarantee that we will obtain the best quantization possible.\\n\",\n    \"\\n\",\n    \"Should you do sequential from inputs to outputs or starting from the block that has the highest impact?\\n\",\n    \"\\n\",\n    \"If you have a network like ResNet, and if you want to do filter tuning, you need to block the layers by the resnet definition of a block, i.e. including full identity or convolutional blocks, and quantize the model from inputs to outputs, so that you can preserve at each stage the number of channels for the residual block. \\n\",\n    \"\\n\",\n    \"In order to perform quantization by blocks, you need to specify two other parameters in our `run_config`. `blocks` is a list of regular expressions of the groups you want to quantize. If a layer does not match the block pattern, it will not be quantized.  `schedule_block` specifies the mode for block quantization scheduling. It can be `sequential` or `cost` if you want to schedule first the blocks by decreasing cost size (energy or bits).\\n\",\n    \"\\n\",\n    \"In this model, there are a few optimizations that we perform automatically. First, we dynamically reduce the learning rate of the blocks that we have already quantized as setting them to not-trainable does not seem to work, so we still allow them to train, but at a slower pace. In addition, we try to dynamically adjust the learning rate for the layer we are trying to quantize as opposed to the learning rate of the unquantized layers. Finally, we transfer the weights of the models we have already quantized whenever we can do (if the shapes remain the same). \\n\",\n    \"\\n\",\n    \"Regardless on how we schedule the operations, we amortize the nubmer of trials for the cost of the block (energy or bits with respect to the total energy or number of bits of the network).\\n\",\n    \"\\n\",\n    \"Instead of invoking `AutoQKeras` now, we will invoke `AutoQKeras` scheduler.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"NUz4A6SKnhUf\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"run_config = {\\n\",\n    \"  \\\"output_dir\\\": tempfile.mkdtemp(),\\n\",\n    \"  \\\"goal\\\": goal,\\n\",\n    \"  \\\"quantization_config\\\": quantization_config,\\n\",\n    \"  \\\"learning_rate_optimizer\\\": False,\\n\",\n    \"  \\\"transfer_weights\\\": False,\\n\",\n    \"  \\\"mode\\\": \\\"random\\\",\\n\",\n    \"  \\\"seed\\\": 42,\\n\",\n    \"  \\\"limit\\\": limit,\\n\",\n    \"  \\\"tune_filters\\\": \\\"layer\\\",\\n\",\n    \"  \\\"tune_filters_exceptions\\\": \\\"^dense\\\",\\n\",\n    \"  \\\"distribution_strategy\\\": cur_strategy,\\n\",\n    \"  \\\"layer_indexes\\\": range(1, len(model.layers) - 1),\\n\",\n    \"  \\\"max_trials\\\": 40,\\n\",\n    \"\\n\",\n    \"  \\\"blocks\\\": [\\n\",\n    \"    \\\"^.*_0$\\\",\\n\",\n    \"    \\\"^.*_1$\\\",\\n\",\n    \"    \\\"^.*_2$\\\",\\n\",\n    \"    \\\"^.*_3$\\\",\\n\",\n    \"    \\\"^.*_4$\\\",\\n\",\n    \"    \\\"^dense\\\"\\n\",\n    \"  ],\\n\",\n    \"  \\\"schedule_block\\\": \\\"cost\\\"\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"JWJiZZ9vsORJ\"\n   },\n   \"source\": [\n    \"Because specifying regular expressions is error prone, we recommend that you first try to run `AutoQKerasScheduler` in debug mode to print the blocks.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 737\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 395,\n     \"status\": \"ok\",\n     \"timestamp\": 1591842023212,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"uSOxKQGwsqf2\",\n    \"outputId\": \"18647e4f-ef7a-4c6a-aeb8-0c9c2039fdbb\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"pprint.pprint([layer.name for layer in model.layers])\\n\",\n    \"autoqk = AutoQKerasScheduler(model, metrics=[\\\"acc\\\"], custom_objects=custom_objects, debug=True, **run_config)\\n\",\n    \"autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"TQPUKPZhC_SI\"\n   },\n   \"source\": [\n    \"All blocks seem to be fine. Let's find the best quantization now.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1938883,\n     \"status\": \"ok\",\n     \"timestamp\": 1591843962106,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"sXt-cRKvDEaL\",\n    \"outputId\": \"36db3217-86ff-4425-ee12-f637a4fc1841\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"autoqk = AutoQKerasScheduler(model, metrics=[\\\"acc\\\"], custom_objects=custom_objects, **run_config)\\n\",\n    \"autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 291\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 396,\n     \"status\": \"ok\",\n     \"timestamp\": 1591843962540,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"ArdGbsXFDK-I\",\n    \"outputId\": \"43730cd5-93fc-4838-c49a-1f3f4151fa54\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel = autoqk.get_best_model()\\n\",\n    \"qmodel.save_weights(\\\"qmodel.h5\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"height\": 1000\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 69779,\n     \"status\": \"ok\",\n     \"timestamp\": 1591844032332,\n     \"user\": {\n      \"displayName\": \"Claudionor Coelho\",\n      \"photoUrl\": \"\",\n      \"userId\": \"01084525977535968041\"\n     },\n     \"user_tz\": 420\n    },\n    \"id\": \"RHGb6YHFEgtV\",\n    \"outputId\": \"5578ce49-1ee9-4063-deab-1b3db9f4b66b\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel.load_weights(\\\"qmodel.h5\\\")\\n\",\n    \"with cur_strategy.scope():\\n\",\n    \"  optimizer = Adam(lr=0.02)\\n\",\n    \"  qmodel.compile(optimizer=optimizer, loss=\\\"categorical_crossentropy\\\", metrics=[\\\"acc\\\"])\\n\",\n    \"  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab\": {},\n    \"colab_type\": \"code\",\n    \"id\": \"fJCkMdAcjnoh\"\n   },\n   \"source\": [\n    \"Perfect! You have learned how to perform automatic quantization using AutoQKeras with QKeras.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"colab\": {\n   \"collapsed_sections\": [],\n   \"last_runtime\": {\n    \"build_target\": \"//learning/deepmind/dm_python:dm_notebook3_tpu\",\n    \"kind\": \"private\"\n   },\n   \"name\": \"AutoQKeras.ipynb\",\n   \"provenance\": []\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.7.3\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 1\n}\n"
  },
  {
    "path": "notebook/CodebookQuantization.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"##### Copyright 2020 Google LLC\\n\",\n    \"#\\n\",\n    \"#\\n\",\n    \"# Licensed under the Apache License, Version 2.0 (the \\\"License\\\");\\n\",\n    \"# you may not use this file except in compliance with the License.\\n\",\n    \"# You may obtain a copy of the License at\\n\",\n    \"#\\n\",\n    \"# https://www.apache.org/licenses/LICENSE-2.0\\n\",\n    \"#\\n\",\n    \"# Unless required by applicable law or agreed to in writing, software\\n\",\n    \"# distributed under the License is distributed on an \\\"AS IS\\\" BASIS,\\n\",\n    \"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\\n\",\n    \"# See the License for the specific language governing permissions and\\n\",\n    \"# limitations under the License.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Codebook based quantization\\n\",\n    \"\\n\",\n    \"Codebook based quantizaion is a non-uniform quantization technique that maps each weight or activation value to the index of a value in the codebook. This allows us to compress weights/activations even further with neglibible loss in performance. We will demonstrate this by training an object classification model and applying codebook quantization to the activation with the most values.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from tensorflow.keras.regularizers import *\\n\",\n    \"from tensorflow.keras.layers import *\\n\",\n    \"from tensorflow.keras.models import Model\\n\",\n    \"from tensorflow.keras.optimizers import *\\n\",\n    \"from tensorflow.keras.datasets import *\\n\",\n    \"from tensorflow.keras.utils import to_categorical\\n\",\n    \"\\n\",\n    \"from qkeras import *\\n\",\n    \"from qkeras.codebook import *\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"def get_data(name, sample_size=1.0):\\n\",\n    \"  (x_train, y_train), (x_test, y_test) = globals()[name].load_data()\\n\",\n    \"\\n\",\n    \"  if len(x_train.shape) == 3:\\n\",\n    \"    x_train = x_train.reshape(x_train.shape + (1,))\\n\",\n    \"    x_test = x_test.reshape(x_test.shape + (1,))\\n\",\n    \"\\n\",\n    \"  x_train = x_train.astype(\\\"float32\\\")\\n\",\n    \"  x_test = x_test.astype(\\\"float32\\\")\\n\",\n    \"\\n\",\n    \"  mean = np.mean(x_train,axis=(0,1,2,3))\\n\",\n    \"  std = np.std(x_train,axis=(0,1,2,3))\\n\",\n    \"  x_train = (x_train-mean)/(std+1e-7)\\n\",\n    \"  x_test = (x_test-mean)/(std+1e-7)\\n\",\n    \"\\n\",\n    \"  y_train_c = to_categorical(y_train, np.max(y_train) + 1)\\n\",\n    \"  y_test_c = to_categorical(y_test, np.max(y_test) + 1)\\n\",\n    \"\\n\",\n    \"  if sample_size != 1.0:\\n\",\n    \"    indexes = np.asarray(range(x_train.shape[0]))\\n\",\n    \"    np.random.shuffle(indexes)\\n\",\n    \"    indexes = indexes[:int(x_train.shape[0] * sample_size)]\\n\",\n    \"\\n\",\n    \"    x_train = x_train[indexes]\\n\",\n    \"    y_train_c = y_train_c[indexes]\\n\",\n    \"\\n\",\n    \"  return (x_train, y_train_c), (x_test, y_test_c)\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"def get_model(\\n\",\n    \"  name, X_train, y_train, X_test, y_test,\\n\",\n    \"  blocks=[[32], [64], [128]],\\n\",\n    \"  quantizer_list=[\\n\",\n    \"      \\\"quantized_relu_po2(4,4)\\\",\\n\",\n    \"      \\\"quantized_relu_po2(4,4)\\\"\\n\",\n    \"  ],\\n\",\n    \"  use_stochastic_rounding=0,\\n\",\n    \"  l1v=None,\\n\",\n    \"  epochs=10,\\n\",\n    \"  load_weights=True):\\n\",\n    \"\\n\",\n    \"  if l1v is None:\\n\",\n    \"    l1v = [0.0] * len(blocks)\\n\",\n    \"\\n\",\n    \"  X_shape = X_train.shape[1:]\\n\",\n    \"  x_i = x = Input(X_shape)\\n\",\n    \"\\n\",\n    \"  for b, block in enumerate(blocks):\\n\",\n    \"    # we are assuming we want to quantize the block that has sparsity\\n\",\n    \"    # so let's add dropout to the next layer\\n\",\n    \"\\n\",\n    \"    if b >= 1 and l1v[b-1] != 0.0:\\n\",\n    \"      x = Dropout(0.3, name=f\\\"drop{b}\\\")(x)\\n\",\n    \"\\n\",\n    \"    for i in range(len(block)):\\n\",\n    \"      x = QConv2D(\\n\",\n    \"          block[i], kernel_size=(3,3), strides=(2,2), padding=\\\"same\\\",\\n\",\n    \"          kernel_quantizer=f\\\"quantized_bits(4, use_stochastic_rounding={use_stochastic_rounding})\\\",\\n\",\n    \"          bias_quantizer=f\\\"quantized_po2(4, use_stochastic_rounding={use_stochastic_rounding})\\\",\\n\",\n    \"          kernel_regularizer=l1(l1v[b]) if l1v[b] != 0.0 else None,\\n\",\n    \"          name=f\\\"d{b}_{i}\\\")(x)\\n\",\n    \"      if i != len(block) - 1:\\n\",\n    \"        if quantizer_list[b] in [\\\"linear\\\", \\\"relu\\\", \\\"softmax\\\", \\\"sigmoid\\\"]:\\n\",\n    \"          x = Activation(quantizer_list[b], name=f\\\"a{b}_{i}\\\")(x)\\n\",\n    \"        else:\\n\",\n    \"          x = QActivation(quantizer_list[b], name=f\\\"a{b}_{i}\\\")(x)\\n\",\n    \"      else:\\n\",\n    \"        x = QBatchNormalization(name=f\\\"bn{b}_{i}\\\")(x)\\n\",\n    \"    if b < len(blocks) - 1:\\n\",\n    \"      if quantizer_list[b] in [\\\"linear\\\", \\\"relu\\\", \\\"softmax\\\", \\\"sigmoid\\\"]:\\n\",\n    \"        x = Activation(quantizer_list[b], name=f\\\"a{b}_{len(block)-1}\\\")(x)\\n\",\n    \"      else:\\n\",\n    \"        x = QActivation(quantizer_list[b], name=f\\\"a{b}_{len(block)-1}\\\")(x)\\n\",\n    \"    else:\\n\",\n    \"      if len(block) > 0:\\n\",\n    \"        x = QActivation(f\\\"quantized_relu(6,2, use_stochastic_rounding={use_stochastic_rounding})\\\", \\n\",\n    \"                        name=f\\\"a{b}_{len(block)-1}\\\")(x)\\n\",\n    \"      x = Flatten(name=\\\"flatten\\\")(x)\\n\",\n    \"      x = QDense(\\n\",\n    \"          y_train.shape[1], name=f\\\"d{len(blocks)-1}_{len(block)}\\\")(x)\\n\",\n    \"      x = Activation(\\\"softmax\\\", name=f\\\"a{len(blocks)-1}_{len(block)}\\\")(x)\\n\",\n    \"\\n\",\n    \"  model = Model(inputs=x_i, outputs=x)\\n\",\n    \"  model.summary()\\n\",\n    \"\\n\",\n    \"  model.compile(loss=\\\"categorical_crossentropy\\\", optimizer=Adam(0.001), metrics=[\\\"acc\\\"])\\n\",\n    \"\\n\",\n    \"  try:\\n\",\n    \"    if load_weights and os.path.isfile(name + \\\".h5\\\"):\\n\",\n    \"      print('Found file...')\\n\",\n    \"      model.load_weights(name + \\\".h5\\\")\\n\",\n    \"    else:\\n\",\n    \"      model.fit(X_train, y_train, validation_data=(X_test, y_test),\\n\",\n    \"                batch_size=128, epochs=epochs, verbose=2)\\n\",\n    \"      model.save_weights(name + \\\".h5\\\")\\n\",\n    \"  except:\\n\",\n    \"    model.fit(X_train, y_train, validation_data=(X_test, y_test),\\n\",\n    \"              batch_size=128, epochs=epochs, verbose=2)\\n\",\n    \"    model.save_weights(name + \\\".h5\\\")\\n\",\n    \"\\n\",\n    \"  return model\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"name = \\\"cifar10\\\"\\n\",\n    \"(X_train, y_train), (X_test, y_test) = get_data(name, sample_size=1)\\n\",\n    \"model = get_model(\\n\",\n    \"  name, X_train, y_train, X_test, y_test,\\n\",\n    \"  blocks=[[32, 32], [64, 64], [128]],\\n\",\n    \"  quantizer_list=[\\\"quantized_relu(6,2)\\\", \\\"quantized_relu(6,2)\\\"],\\n\",\n    \"  epochs=50,\\n\",\n    \"  load_weights=True\\n\",\n    \")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from qkeras.codebook import *\\n\",\n    \"\\n\",\n    \"cb_tables, models, km_models = activation_compression(\\n\",\n    \"  model, \\n\",\n    \"  {'loss' : \\\"categorical_crossentropy\\\", 'metrics' : [\\\"acc\\\"]},\\n\",\n    \"  [2], 3, \\n\",\n    \"  X_train, y_train, \\n\",\n    \"  X_test, y_test,\\n\",\n    \"  sample_size=0.3\\n\",\n    \")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"q = models[0].layers[-1].quantizer\\n\",\n    \"in_table, out_table = cb_tables[0]\\n\",\n    \"print(q)\\n\",\n    \"print('in_table:', in_table)\\n\",\n    \"print('out_table:', out_table)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"for i,x in enumerate(q.range()):\\n\",\n    \"  print(f'{x:8}, {in_table[out_table[i]]:6}')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Weight compression using codebook quantization\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"conv_weights = model.layers[1].weights[0].numpy()\\n\",\n    \"print(conv_weights.shape)\\n\",\n    \"quantizer = model.layers[1].kernel_quantizer_internal\\n\",\n    \"print(quantizer)\\n\",\n    \"axis = 3\\n\",\n    \"bits = 3\\n\",\n    \"index_table, codebook_table = weight_compression(\\n\",\n    \"  conv_weights, \\n\",\n    \"  bits, \\n\",\n    \"  axis, \\n\",\n    \"  quantizer)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print(codebook_table.shape)\\n\",\n    \"codebook_table[0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print(index_table.shape)\\n\",\n    \"index_table[:,:,:,0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"new_conv_weights = np.zeros(conv_weights.shape)\\n\",\n    \"for i in range(conv_weights.shape[axis]):\\n\",\n    \"  new_conv_weights[:,:,:,i] = codebook_table[i][index_table[:,:,:,i]]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"new_conv_weights[:,:,:,0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"conv_weights[:,:,:,0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"bias = model.layers[1].weights[1].numpy()\\n\",\n    \"model.layers[1].set_weights([new_conv_weights, bias])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.evaluate(X_test, y_test)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Py3\",\n   \"language\": \"python\",\n   \"name\": \"py3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.7.7\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n}\n"
  },
  {
    "path": "notebook/QKerasTutorial.ipynb",
    "content": "{\n \"cells\": [\n   {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"##### Copyright 2020 Google LLC\\n\",\n    \"#\\n\",\n    \"#\\n\",\n    \"# Licensed under the Apache License, Version 2.0 (the \\\"License\\\");\\n\",\n    \"# you may not use this file except in compliance with the License.\\n\",\n    \"# You may obtain a copy of the License at\\n\",\n    \"#\\n\",\n    \"# https://www.apache.org/licenses/LICENSE-2.0\\n\",\n    \"#\\n\",\n    \"# Unless required by applicable law or agreed to in writing, software\\n\",\n    \"# distributed under the License is distributed on an \\\"AS IS\\\" BASIS,\\n\",\n    \"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\\n\",\n    \"# See the License for the specific language governing permissions and\\n\",\n    \"# limitations under the License.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# QKeras Lab Book\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"__QKeras__ is a quantization extension to Keras that provides drop-in replacement for some of the Keras layers, especially the ones that creates parameters and activation layers, and perform arithmetic operations, so that we can quickly create a deep quantized version of Keras network.\\n\",\n    \"\\n\",\n    \"According to Tensorflow documentation, Keras is a high-level API to build and train deep learning models. It's used for fast prototyping, advanced research, and production, with three key advantages:\\n\",\n    \"\\n\",\n    \"- User friendly<br>\\n\",\n    \"Keras has a simple, consistent interface optimized for common use cases. It provides clear and actionable feedback for user errors.\\n\",\n    \"\\n\",\n    \"- Modular and composable<br>\\n\",\n    \"Keras models are made by connecting configurable building blocks together, with few restrictions.\\n\",\n    \"\\n\",\n    \"- Easy to extend<br>\\n\",\n    \"Write custom building blocks to express new ideas for research. Create new layers, loss functions, and develop state-of-the-art models.\\n\",\n    \"\\n\",\n    \"__QKeras__ is being designed to extend the functionality of Keras using Keras' design principle, i.e. being user friendly, modular and extensible, adding to it being \\\"minimally intrusive\\\" of Keras native functionality.\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Related Work\\n\",\n    \"\\n\",\n    \"__QKeras__ has been implemented based on the work of _\\\"B.Moons et al. - Minimum Energy Quantized Neural Networks\\\"_ , Asilomar Conference on Signals, Systems and Computers, 2017 and _“Zhou, S. et al. DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients,”_ but the framework should be easily extensible. The original code from QNN can be found below.\\n\",\n    \"\\n\",\n    \"https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\\n\",\n    \"\\n\",\n    \"__QKeras__ extends QNN by providing a richer set of layers (including SeparableConv2D, DepthwiseConv2D, ternary and stochastic ternary quantizations), besides some functions to aid the estimation for the accumulators and conversion between non-quantized to quantized networks. Finally, our main goal is easy of use, so we attempt to make QKeras layers a true drop-in replacement for Keras, so that users can easily exchange non-quantized layers by quantized ones.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Layers Implemented in QKeras\\n\",\n    \"\\n\",\n    \"The following layers have been implemented in __QKeras__.\\n\",\n    \"\\n\",\n    \"- __`QDense`__\\n\",\n    \"\\n\",\n    \"- __`QConv1D`__\\n\",\n    \"\\n\",\n    \"- __`QConv2D`__\\n\",\n    \"\\n\",\n    \"- __`QDepthwiseConv2D`__\\n\",\n    \"\\n\",\n    \"- __`QSeparableConv2D`__ (depthwise + pointwise expanded, extended from MobileNet SeparableConv2D implementation)\\n\",\n    \"\\n\",\n    \"- __`QActivation`__\\n\",\n    \"\\n\",\n    \"- __`QAveragePooling2D`__ (in fact, a AveragePooling2D stacked with a QActivation layer for quantization of the result, so this layer does not exist)\\n\",\n    \"\\n\",\n    \"- __`QBatchNormalization`__\\n\",\n    \"\\n\",\n    \"- __`QOctaveConv2D`__\\n\",\n    \"\\n\",\n    \"It is worth noting that not all functionality is safe at this time to be used with other high-level operations, such as with layer wrappers. For example, `Bidirectional` layer wrappers are used with RNNs.  If this is required, we encourage users to use quantization functions invoked as strings instead of the actual functions as a way through this, but we may change that implementation in the future.\\n\",\n    \"\\n\",\n    \"__`QSeparableConv2D`__ is implemented as a depthwise + pointwise quantized expansions, which is extended from the `SeparableConv2D` implementation of MobileNet. With the exception of __`QBatchNormalization`__, if quantizers are not specified, no quantization is applied to the layer and it ends up behaving like the orgininal unquantized layers. On the other hand, __`QBatchNormalization`__ has been implemented differently as if the user does not specify any quantizers as parameters, it uses a set up that has worked best when attempting to implement quantization efficiently in hardware and software, i.e. `gamma` and `variance` with po2 quantizers (as they become shift registers in an implementation, and with further constraining variance po2 quantizer to use quadratic approximation as we take the square root of the variance to obtain the standard deviation), `beta` using po2 quantizer to maintain the dynamic range aspect of the center parameter, and `mean` remaining unquantized, as it inherits the properties of the previous layer.\\n\",\n    \"\\n\",\n    \"Activation has been migrated to __`QActivation`__ although it __QKeras__ also recognizes activation parameter used in convolutional and dense layers.\\n\",\n    \"\\n\",\n    \"We have improved the setup of quantization as convolution, dense and batch normalization layers now notify the quantizers when the quantizers are used as internal parameters, so the user does not need to worry about setting up options that work best in `weights` and `bias` like `alpha` and `use_stochastic_rounding` (although users may override the automatic setup).\\n\",\n    \"\\n\",\n    \"Finally, in the current version, we have eliminated the need to set up the range of the quantizers like `kernel_range` in __`QDense`__. This is automatically computed internally at this point. Although we kept the parameters for backward compatibility, these parameters will be removed in the future.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Activation Layers and Quantizers Implemented in __QKeras__\\n\",\n    \"\\n\",\n    \"Quantizers and activation layers are treated interchangingly in __QKeras__.   \\n\",\n    \"\\n\",\n    \"The list of quantizers and its parameters is listed below.\\n\",\n    \"\\n\",\n    \"- __`smooth_sigmoid(x)`__\\n\",\n    \"\\n\",\n    \"- __`hard_sigmoid(x)`__\\n\",\n    \"\\n\",\n    \"- __`binary_sigmoid(x)`__\\n\",\n    \"\\n\",\n    \"- __`smooth_tanh(x)`__\\n\",\n    \"\\n\",\n    \"- __`hard_tanh(x)`__\\n\",\n    \"\\n\",\n    \"- __`binary_tanh(x)`__\\n\",\n    \"\\n\",\n    \"- __`quantized_bits(bits=8, integer=0, symmetric=0, keep_negative=1, alpha=None, use_stochastic_rouding=False)(x)`__\\n\",\n    \"\\n\",\n    \"- __`bernoulli(alpha=None, temperature=6.0, use_real_sigmoid=True)(x)`__\\n\",\n    \"\\n\",\n    \"- __`stochastic_ternary(alpha=None, threshold=None, temperature=8.0, use_real_sigmoid=True)(x)`__\\n\",\n    \"\\n\",\n    \"- __`ternary(alpha=None, threshold=None, use_stochastic_rounding=False)(x)`__\\n\",\n    \"\\n\",\n    \"- __`stochastic_binary(alpha=None, temperature=6.0, use_real_sigmoid=True)(x)`__\\n\",\n    \"\\n\",\n    \"- __`binary(use_01=False, alpha=None, use_stochastic_rounding=False)(x)`__\\n\",\n    \"\\n\",\n    \"- __`quantized_relu(bits=8, integer=0, use_sigmoid=0, use_stochastic_rounding=False)(x)`__\\n\",\n    \"\\n\",\n    \"- __`quantized_ulaw(bits=8, integer=0, symmetric=0, u=255.0)(x)`__\\n\",\n    \"\\n\",\n    \"- __`quantized_tanh(bits=8, integer=0, symmetric=0, use_stochastic_rounding=False)(x)`__\\n\",\n    \"\\n\",\n    \"- __`quantized_po2(bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False)(x)`__\\n\",\n    \"\\n\",\n    \"- __`quantized_relu_po2(bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False)(x)`__\\n\",\n    \"\\n\",\n    \"The __`stochastic_*`__ functions and __`bernoulli`__ rely on stochastic versions of the activation functions, so they are best suited for weights and biases.  They draw a random number with uniform distribution from `sigmoid` of the input x, and result is based on the expected value of the activation function. Please refer to the papers if you want to understand the underlying theory, or the documentation in qkeras/quantizers.py. The parameter `temperature` determines how steep the sigmoid function will behave, and the default values seem to work fine.\\n\",\n    \"\\n\",\n    \"As we lower the number of bits, rounding becomes problematic as it adds bias to the number system. Numpy attempt to reduce the effects of bias by rounding to even instead of rounding to infinity. Recent results (_\\\"Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, Pritish Narayanan; Deep Learning with Limited Numerical Precision_ [https://arxiv.org/abs/1502.02551]) suggested using stochastic rounding, which uses the fracional part of the number as a probability to round up or down. We can turn on stochastic rounding in some quantizers by setting `use_stochastic_rounding` to `True` in __`quantized_bits`__, __`binary`__, __`ternary`__, __`quantized_relu`__ and __`quantized_tanh`__, __`quantized_po2`__, and __`quantized_relu_po2`__. Please note that if one is considering an efficient hardware or software implementation, we should avoid setting this flag to `True` in activations as it may affect the efficiency of an implementation. In addition, as mentioned before, we already set this flag to `True` in some quantized layers when the quantizers are used as weights/biases.\\n\",\n    \"\\n\",\n    \"The parameters `bits` specify the number of bits for the quantization, and `integer` specifies how many bits of `bits` are to the left of the decimal point. Finally, our experience in training networks with __`QSeparableConv2D`__, it is advisable to allocate more bits between the depthwise and the pointwise quantization, and both __`quantized_bits`__ and __`quantized_tanh`__ should use symmetric versions for weights and bias in order to properly converge and eliminate the bias.\\n\",\n    \"\\n\",\n    \"We have substantially improved stochastic rounding implementation in __QKeras__ $>= 0.7$, and added a symbolic way to compute alpha in __`binary`__, __`stochastic_binary`__, __`ternary`__, __`stochastic_ternary`__, __`bernoulli`__ and __`quantized_bits`__. Right now, a scale and the threshold (for ternary and stochastic_ternary) can be computed independently of the distribution of the inputs, which is required when using these quantizers in weights.\\n\",\n    \"\\n\",\n    \"The main problem in using very small bit widths in large deep learning networks stem from the fact that weights are initialized with variance roughly $\\\\propto \\\\sqrt{1/\\\\tt{fanin}}$, but during the training the variance shifts outwards.  If the smallest quantization representation (threshold in ternary networks) is smaller than $\\\\sqrt{1/\\\\tt{fanin}}$, we run the risk of having the weights stuck at 0 during training. So, the weights need to dynamically adjust to the variance shift from initialization to the final training.  This can be done by scaling the quantization. \\n\",\n    \"\\n\",\n    \"Scale is computed using the formula $\\\\sum(\\\\tt{dot}(Q,x))/\\\\sum(\\\\tt{dot}(Q,Q))$ which is described in several papers, including _Mohammad Rastegari, Vicente Ordonez, Joseph Redmon, Ali Farhadi \\\"XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks\\\"_ [https://arxiv.org/abs/1603.05279]. Scale computation is computed for each output channel, making our implementation sometimes behaving like a mini-batch normalization adjustment.  \\n\",\n    \"\\n\",\n    \"For __`ternary`__ and __`stochastic_ternary`__, we iterate between scale computation and threshold computation, as presented in _K. Hwang and W. Sung, \\\"Fixed-point feedforward deep neural network design using weights +1, 0, and −1,\\\" 2014 IEEE Workshop on Signal Processing Systems (SiPS), Belfast, 2014, pp. 1-6_ which makes the search for threshold and scale tolerant to different input distributions. This is especially important when we need to consider that the threshold shifts depending on the input distribution,  affecting the scale as well, as pointed out by _Fengfu Li, Bo Zhang, Bin Liu, \\\"Ternary Weight Networks\\\"_ [https://arxiv.org/abs/1605.04711]. \\n\",\n    \"\\n\",\n    \"When computing the scale in these quantizers, if `alpha=\\\"auto\\\"`, we compute the scale as a floating point number. If `alpha=\\\"auto_po2\\\"`, we enforce the scale to be a power of 2, meaning that an actual hardware or software implementation can be performed by just shifting the result of the convolution or dense layer to the right or left by checking the sign of the scale (positive shifts left, negative shifts right), and taking the log2 of the scale.  This behavior is compatible with shared exponent approaches, as it performs a shift adjustment to the channel.\\n\",\n    \"\\n\",\n    \"We have implemented a method for each quantizer called __`_set_trainable_parameter`__ that instructs __QKeras__ to set best options when this quantizer is used as a weight or for gamma, variance and beta in __`QBatchNormalization`__, so in principle, users should not worry about this.\\n\",\n    \"\\n\",\n    \"The following pictures show the behavior of __`binary`__ vs stochastic rounding in __`binary`__ vs __`stochastic_binary`__ (Figure 1) and __`ternary`__ vs stochastic rounding in __`ternary`__ and __`stochastic_ternary`__ (Figure 2). We generated a normally distributed input with mean 0.0 and standard deviation of 0.02, ordered the data, and ran the quantizer 1,000 times, averaging the result for each case. Note that because of scale, the output does not range from $[-1.0, +1.0]$, but from $[-\\\\tt{scale}, +\\\\tt{scale}]$.\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"<img src=\\\"images/figure1.png\\\" alt=\\\"Binary quantizers\\\" title=\\\"Figure 1: Behavior of binary quantizers\\\" style=\\\"width:60%;height:60%;\\\"/><center>Figure 1: Behavior of binary quantizers</center>\\n\",\n    \"\\n\",\n    \"<img src=\\\"images/figure2.png\\\" alt=\\\"Ternary quantizers\\\" title=\\\"Figure 2: Behavior of ternary quantizers\\\" style=\\\"width:60%;height:60%;\\\"/><center>Figure 2: Behavior of ternary quantizers</center>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Using QKeras\\n\",\n    \"\\n\",\n    \"__QKeras__ works by tagging all variables and weights/bias created by Keras as well as output of arithmetic layers by quantized functions. Quantized functions can be instantiated directly in __`QDense`__/__`QConv2D`__/__`QSeparableConv2D`__ functions, and they can be passed to __`QActivation`__, which act as a merged quantization and activation function.\\n\",\n    \"\\n\",\n    \"In order to successfully quantize a model, users need to replace layers that create variables (trainable or not) (`Dense`, `Conv2D`, etc) by their equivalent ones in __Qkeras__ (__`QDense`__, __`QConv2D`__, etc), and any layers that perform math operations need to be quantized afterwards.\\n\",\n    \"\\n\",\n    \"Quantized values are clipped between their maximum and minimum quantized representation (which may be different than $[-1.0, 1.0]$), although for `po2` type of quantizers, we still recommend the users to specify the parameter for `max_value`.\\n\",\n    \"\\n\",\n    \"An example of a very simple network is given below in Keras.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import six\\n\",\n    \"import numpy as np\\n\",\n    \"import tensorflow.compat.v2 as tf\\n\",\n    \"\\n\",\n    \"from tensorflow.keras.layers import *\\n\",\n    \"from tensorflow.keras.models import Model\\n\",\n    \"from tensorflow.keras.datasets import mnist\\n\",\n    \"from tensorflow.keras.utils import to_categorical\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def CreateModel(shape, nb_classes):\\n\",\n    \"    x = x_in = Input(shape)\\n\",\n    \"    x = Conv2D(18, (3, 3), name=\\\"conv2d_1\\\")(x)\\n\",\n    \"    x = Activation(\\\"relu\\\", name=\\\"act_1\\\")(x)\\n\",\n    \"    x = Conv2D(32, (3, 3), name=\\\"conv2d_2\\\")(x)\\n\",\n    \"    x = Activation(\\\"relu\\\", name=\\\"act_2\\\")(x)\\n\",\n    \"    x = Flatten(name=\\\"flatten\\\")(x)\\n\",\n    \"    x = Dense(nb_classes, name=\\\"dense\\\")(x)\\n\",\n    \"    x = Activation(\\\"softmax\\\", name=\\\"softmax\\\")(x)\\n\",\n    \"    \\n\",\n    \"    model = Model(inputs=x_in, outputs=x)\\n\",\n    \"\\n\",\n    \"    return model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_data():\\n\",\n    \"    (x_train, y_train), (x_test, y_test) = mnist.load_data()\\n\",\n    \"    x_train = x_train.reshape(x_train.shape + (1,)).astype(\\\"float32\\\")\\n\",\n    \"    x_test = x_test.reshape(x_test.shape + (1,)).astype(\\\"float32\\\")\\n\",\n    \"\\n\",\n    \"    x_train /= 256.0\\n\",\n    \"    x_test /= 256.0\\n\",\n    \"\\n\",\n    \"    x_mean = np.mean(x_train, axis=0)\\n\",\n    \"\\n\",\n    \"    x_train -= x_mean\\n\",\n    \"    x_test -= x_mean\\n\",\n    \"\\n\",\n    \"    nb_classes = np.max(y_train)+1\\n\",\n    \"    y_train = to_categorical(y_train, nb_classes)\\n\",\n    \"    y_test = to_categorical(y_test, nb_classes)\\n\",\n    \"\\n\",\n    \"    return (x_train, y_train), (x_test, y_test)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"(x_train, y_train), (x_test, y_test) = get_data()\\n\",\n    \"\\n\",\n    \"model = CreateModel(x_train.shape[1:], y_train.shape[-1])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.compile(loss=\\\"categorical_crossentropy\\\", optimizer=\\\"adam\\\", metrics=[\\\"accuracy\\\"])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.fit(x_train, y_train, epochs=3, batch_size=128, validation_data=(x_test, y_test), verbose=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Great! it is relatively easy to create a network that converges in MNIST with very high test accuracy. The reader should note that we named all the layers as it will make it easier to automatically convert the network by name.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.summary()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"The corresponding quantized network is presented below.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from qkeras import *\\n\",\n    \"\\n\",\n    \"def CreateQModel(shape, nb_classes):\\n\",\n    \"    x = x_in = Input(shape)\\n\",\n    \"    x = QConv2D(18, (3, 3),\\n\",\n    \"        kernel_quantizer=\\\"stochastic_ternary\\\", \\n\",\n    \"        bias_quantizer=\\\"quantized_po2(4)\\\",\\n\",\n    \"        name=\\\"conv2d_1\\\")(x)\\n\",\n    \"    x = QActivation(\\\"quantized_relu(2)\\\", name=\\\"act_1\\\")(x)\\n\",\n    \"    x = QConv2D(32, (3, 3), \\n\",\n    \"        kernel_quantizer=\\\"stochastic_ternary\\\", \\n\",\n    \"        bias_quantizer=\\\"quantized_po2(4)\\\",\\n\",\n    \"        name=\\\"conv2d_2\\\")(x)\\n\",\n    \"    x = QActivation(\\\"quantized_relu(2)\\\", name=\\\"act_2\\\")(x)\\n\",\n    \"    x = Flatten(name=\\\"flatten\\\")(x)\\n\",\n    \"    x = QDense(nb_classes,\\n\",\n    \"        kernel_quantizer=\\\"quantized_bits(3,0,1)\\\",\\n\",\n    \"        bias_quantizer=\\\"quantized_bits(3)\\\",\\n\",\n    \"        name=\\\"dense\\\")(x)\\n\",\n    \"    x = Activation(\\\"softmax\\\", name=\\\"softmax\\\")(x)\\n\",\n    \"    \\n\",\n    \"    model = Model(inputs=x_in, outputs=x)\\n\",\n    \"    \\n\",\n    \"    return model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel = CreateQModel(x_train.shape[1:], y_train.shape[-1])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 10,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from tensorflow.keras.optimizers import Adam\\n\",\n    \"\\n\",\n    \"qmodel.compile(\\n\",\n    \"    loss=\\\"categorical_crossentropy\\\",\\n\",\n    \"    optimizer=Adam(0.0005),\\n\",\n    \"    metrics=[\\\"accuracy\\\"])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 44,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test), verbose=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"You should note that we had to lower the learning rate and train the network for longer time. On the other hand, the network should not involve in any multiplications in the convolution layers, and very small multipliers in the dense layers.\\n\",\n    \"\\n\",\n    \"Please note that the last `Activation` was not changed to __`QActivation`__ as during inference we usually perform the operation `argmax` on the result instead of `softmax`.\\n\",\n    \"\\n\",\n    \"It seems it is a lot of code to write besides the main network, but in fact, this additional code is only specifying the sizes of the weights and the sizes of the outputs in the case of the activations.  Right now, we do not have a way to extract this information from the network structure or problem we are trying to solve, and if we quantize too much a layer, we may end up not been able to recover from that later on.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Converting a Model Automatically\\n\",\n    \"\\n\",\n    \"In addition to the drop-in replacement of Keras functions, we have written the following function to assist anyone who wants to quantize a network.\\n\",\n    \"\\n\",\n    \"__`model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False)`__\\n\",\n    \"\\n\",\n    \"This function converts an non-quantized model (such as the one from `model` in the previous example) into a quantized version, by applying a configuration specified by the dictionary `quantizer_config`, and `activation_bits` specified for unamed activation functions, with this parameter probably being removed in future versions.\\n\",\n    \"\\n\",\n    \"The parameter `custom_objects` specifies object dictionary unknown to Keras, required when you copy a model with lambda layers, or customized layer functions, for example, and if `transfer_weights` is `True`, the returned model will have as initial weights the weights from the original model, instead of using random initial weights.\\n\",\n    \"\\n\",\n    \"The dictionary specified in `quantizer_config` can be indexed by a layer name or layer class name. In the example below, conv2d_1 corresponds to the first convolutional layer of the example, while  QConv2D corresponds to the default behavior of two dimensional convolutional layers. The reader should note that right now we recommend using __`QActivation`__ with a dictionary to avoid the conversion of activations such as `softmax` and `linear`.  In addition, although we could use `activation` field in the layers, we do not recommend that. \\n\",\n    \"\\n\",\n    \"`{\\n\",\n    \"  \\\"conv2d_1\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"stochastic_ternary\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_po2(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QConv2D\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"stochastic_ternary\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_po2(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QDense\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"quantized_bits(3,0,1)\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_bits(3)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"act_1\\\": \\\"quantized_relu(2)\\\",\\n\",\n    \"  \\\"QActivation\\\": { \\\"relu\\\": \\\"quantized_relu(2)\\\" }\\n\",\n    \"}`\\n\",\n    \"\\n\",\n    \"In the following example, we will quantize the model using a different strategy.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 73,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"config = {\\n\",\n    \"  \\\"conv2d_1\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"stochastic_binary\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_po2(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QConv2D\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"stochastic_ternary\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_po2(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QDense\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"quantized_bits(4,0,1)\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_bits(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QActivation\\\": { \\\"relu\\\": \\\"binary\\\" },\\n\",\n    \"  \\\"act_2\\\": \\\"quantized_relu(3)\\\",\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 75,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from qkeras.utils import model_quantize\\n\",\n    \"\\n\",\n    \"qmodel = model_quantize(model, config, 4, transfer_weights=True)\\n\",\n    \"\\n\",\n    \"for layer in qmodel.layers:\\n\",\n    \"    if hasattr(layer, \\\"kernel_quantizer\\\"):\\n\",\n    \"        print(layer.name, \\\"kernel:\\\", str(layer.kernel_quantizer_internal), \\\"bias:\\\", str(layer.bias_quantizer_internal))\\n\",\n    \"    elif hasattr(layer, \\\"quantizer\\\"):\\n\",\n    \"        print(layer.name, \\\"quantizer:\\\", str(layer.quantizer))\\n\",\n    \"\\n\",\n    \"print()\\n\",\n    \"qmodel.summary()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 76,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel.compile(\\n\",\n    \"    loss=\\\"categorical_crossentropy\\\",\\n\",\n    \"    optimizer=Adam(0.001),\\n\",\n    \"    metrics=[\\\"accuracy\\\"])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 78,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test), verbose=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"in addition to __`model_quantize`__, __QKeras__ offers the additional utility functions.\\n\",\n    \"\\n\",\n    \"__`BinaryToThermometer(x, classes, value_range, with_residue=False, merge_with_channels, use_two_hot_encoding=False)`__\\n\",\n    \"\\n\",\n    \"This function converts a dense binary encoding of inputs to one-hot (with scales).\\n\",\n    \"\\n\",\n    \"Given input matrix `x` with values (for example) 0, 1, 2, 3, 4, 5, 6, 7, create a number of classes as follows:\\n\",\n    \"\\n\",\n    \"If classes=2, value_range=8, with_residue=0, a true one-hot representation is created, and the remaining bits are truncated, using one bit representation.\\n\",\n    \"\\n\",\n    \"`\\n\",\n    \"0 - [1,0] 1 - [1,0] 2 - [1,0] 3 - [1,0]\\n\",\n    \"4 - [0,1] 5 - [0,1] 6 - [0,1] 7 - [0,1]\\n\",\n    \"`\\n\",\n    \"\\n\",\n    \"If classes=2, value_range=8, with_residue=1, the residue is added to the one-hot class, and the class will use 2 bits (for the remainder) + 1 bit (for the one hot)\\n\",\n    \"\\n\",\n    \"`\\n\",\n    \"0 - [1,0] 1 - [1.25,0] 2 - [1.5,0] 3 - [1.75,0]\\n\",\n    \"4 - [0,1] 5 - [0,1.25] 6 - [0,1.5] 7 - [0,1.75]\\n\",\n    \"`\\n\",\n    \"\\n\",\n    \"The arguments of this functions are as follows:\\n\",\n    \"\\n\",\n    \"`\\n\",\n    \"x: the input vector we want to convert. typically its dimension will be\\n\",\n    \"      (B,H,W,C) for an image, or (B,T,C) or (B,C) for for a 1D signal, where\\n\",\n    \"      B=batch, H=height, W=width, C=channels or features, T=time for time\\n\",\n    \"      series.\\n\",\n    \"classes: the number of classes to (or log2(classes) bits) to use of the\\n\",\n    \"      values.\\n\",\n    \"value_range: max(x) - min(x) over all possible x values (e.g. for 8 bits,\\n\",\n    \"      we would use 256 here).\\n\",\n    \"with_residue: if true, we split the value range into two sets and add\\n\",\n    \"      the decimal fraction of the set to the one-hot representation for partial\\n\",\n    \"      thermometer representation.\\n\",\n    \"merge_with_channels: if True, we will not create a separate dimension\\n\",\n    \"      for the resulting matrix, but we will merge this dimension with\\n\",\n    \"      the last dimension.\\n\",\n    \"use_two_hot_encoding: if true, we will distribute the weight between\\n\",\n    \"      the current value and the next one to make sure the numbers will always\\n\",\n    \"      be < 1.\\n\",\n    \"`\\n\",\n    \"\\n\",\n    \"__`model_save_quantized_weights(model, filename)`__\\n\",\n    \"\\n\",\n    \"This function saves the quantized weights in the model or writes the quantized weights in the file `filename` for production, as the weights during training are maintained non-quantized because of training. Typically, you should call this function before productizing the final model.  The saved model is compatible with Keras for inference, so for power-of-2 quantization, we will not return `(sign, round(log2(weights)))`, but rather `(-1)**sign*2**(round(log2(weights)))`. We also return a dictionary containing the name of the layer and the quantized weights, and for power-of-2 quantizations, we will return `sign` and `round(log2(weights))` so that other tools can properly process that.\\n\",\n    \"\\n\",\n    \"__`load_qmodel(filepath, custom_objects=None, compile=True)`__\\n\",\n    \"\\n\",\n    \"Load quantized model from Keras's model.save() h5 file, where filepath is the path to the filename, custom_objects is an optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization, and compile instructs __QKeras__ to compile the model after reading it.  If an optimizer was found as part of the saved model, the model is already compiled. Otherwise, the model is uncompiled and a warning will be displayed. When compile is set to `False`, the compilation is omitted without any warning.\\n\",\n    \"\\n\",\n    \"__`print_model_sparsity(model)`__\\n\",\n    \"\\n\",\n    \"Prints sparsity for the pruned layers in the model.\\n\",\n    \"\\n\",\n    \"__`quantized_model_debug(model, X_test, plot=False)`__\\n\",\n    \"\\n\",\n    \"Debugs and plots model weights and activations. It is usually useful to print weights, biases and activations for inputs and outputs when debugging a model.  model contains the mixed quantized/unquantized layers for a model. We only print/plot activations and weights/biases for quantized models with the exception of Activation. X_test is the set of inputs we will use to compute activations, and we recommend that the user uses a subsample from the entire set he/she wants to debug. if plot is True, we also plot weights and activations (inputs/outputs) for each layer.\\n\",\n    \"\\n\",\n    \"__`extract_model_operations(model)`__\\n\",\n    \"\\n\",\n    \"As each operation depends on the quantization method for the weights/bias and on the quantization of the inputs, we estimate which operations are required for each layer of the quantized model.  For example, inputs of a __`QDense`__ layer are quantized using __`quantized_relu_po2`__ and weights are quantized using __`quantized_bits`__, the matrix multiplication can be implemented as a barrel shifter + accumulator without multiplication operations. Right now, we return for each layer one of the following operations: `mult`, `barrel`, `mux`, `adder`, `xor`, and the sizes of the operator.\\n\",\n    \"\\n\",\n    \"We are currently refactoring this function and it may be substantially changed in the future.\\n\",\n    \"\\n\",\n    \"__`print_qstats(model)`__\\n\",\n    \"\\n\",\n    \"Prints statistics of number of operations per operation type and layer so that user can see how big the model is. This function utilizes __`extract_model_operations`__.\\n\",\n    \"\\n\",\n    \"An example of the output is presented below.\\n\",\n    \"\\n\",\n    \"`Number of operations in model:\\n\",\n    \"    conv2d_0_m                    : 25088 (smult_4_8)\\n\",\n    \"    conv2d_1_m                    : 663552 (smult_4_4)\\n\",\n    \"    conv2d_2_m                    : 147456 (smult_4_4)\\n\",\n    \"    dense                         : 5760  (smult_4_4)\\n\",\n    \"\\n\",\n    \"Number of operation types in model:\\n\",\n    \"    smult_4_4                     : 816768\\n\",\n    \"    smult_4_8                     : 25088`\\n\",\n    \"\\n\",\n    \"In this example, smult_4_4 stands for 4x4 bit signed multiplication and smult_4_8 stands for 8x4 signed multiplication.\\n\",\n    \"\\n\",\n    \"We are currently refactoring this function and it may be substantially changed in the future.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"In the quantized network `qmodel`, let's print the statistics of the model and weights.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 79,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print_qstats(qmodel)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 81,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from qkeras.utils import quantized_model_debug\\n\",\n    \"\\n\",\n    \"quantized_model_debug(qmodel, x_test, plot=False)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Where the values in `conv2d_1 -4.6218   4.0295 ( -1.0000   1.0000) ( -0.5000   0.5000) a(  0.125000   0.500000)` corresponde to min and max values of the output of the convolution layer, weight ranges (min and max), bias (min and max) and alpha (min and max).\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 2\",\n   \"language\": \"python\",\n   \"name\": \"python2\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 2\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython2\",\n   \"version\": \"2.7.15\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n}\n"
  },
  {
    "path": "notebook/QRNNTutorial.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"##### Copyright 2020 Google LLC\\n\",\n    \"#\\n\",\n    \"#\\n\",\n    \"# Licensed under the Apache License, Version 2.0 (the \\\"License\\\");\\n\",\n    \"# you may not use this file except in compliance with the License.\\n\",\n    \"# You may obtain a copy of the License at\\n\",\n    \"#\\n\",\n    \"# https://www.apache.org/licenses/LICENSE-2.0\\n\",\n    \"#\\n\",\n    \"# Unless required by applicable law or agreed to in writing, software\\n\",\n    \"# distributed under the License is distributed on an \\\"AS IS\\\" BASIS,\\n\",\n    \"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\\n\",\n    \"# See the License for the specific language governing permissions and\\n\",\n    \"# limitations under the License.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import warnings\\n\",\n    \"warnings.filterwarnings(\\\"ignore\\\")\\n\",\n    \"import tempfile\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"import tensorflow.compat.v2 as tf\\n\",\n    \"tf.enable_v2_behavior()\\n\",\n    \"\\n\",\n    \"from tensorflow.keras.layers import Input, Dense, Embedding, SimpleRNN, GRU, LSTM, Bidirectional\\n\",\n    \"from tensorflow.keras.optimizers import *\\n\",\n    \"from tensorflow.keras.datasets import imdb\\n\",\n    \"from tensorflow.keras.preprocessing import sequence\\n\",\n    \"\\n\",\n    \"from qkeras.autoqkeras import *\\n\",\n    \"from qkeras import *\\n\",\n    \"\\n\",\n    \"print(\\\"using tensorflow\\\", tf.__version__)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"physical_devices = tf.config.list_physical_devices()\\n\",\n    \"for d in physical_devices:\\n\",\n    \"  print(d)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"try:\\n\",\n    \"  device_name = os.environ['COLAB_TPU_ADDR']\\n\",\n    \"  TPU_ADDRESS = 'grpc://' + device_name\\n\",\n    \"  print('Found TPU at: {}'.format(TPU_ADDRESS))\\n\",\n    \"  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)\\n\",\n    \"  tf.config.experimental_connect_to_cluster(resolver)\\n\",\n    \"  # This is the TPU initialization code that has to be at the beginning.\\n\",\n    \"  tf.tpu.experimental.initialize_tpu_system(resolver)\\n\",\n    \"  print(\\\"All devices: \\\", tf.config.list_logical_devices('TPU'))\\n\",\n    \"  strategy = tf.distribute.experimental.TPUStrategy(resolver)  \\n\",\n    \"except KeyError:\\n\",\n    \"  print('TPU not found')\\n\",\n    \"  strategy = tf.distribute.get_strategy()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"np.random.seed(12)\\n\",\n    \"tf.random.set_seed(12)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"max_features = 10000\\n\",\n    \"# cut texts after this number of words\\n\",\n    \"# (among top max_features most common words)\\n\",\n    \"maxlen = 100\\n\",\n    \"BATCH_SIZE = 1000\\n\",\n    \"SHUFFLE_BUFFER_SIZE = 25000\\n\",\n    \"\\n\",\n    \"print('Loading data...')\\n\",\n    \"(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)\\n\",\n    \"print(len(x_train), 'train sequences')\\n\",\n    \"print(len(x_test), 'test sequences')\\n\",\n    \"\\n\",\n    \"print('Pad sequences (samples x time)')\\n\",\n    \"x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\\n\",\n    \"x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\\n\",\n    \"print('x_train shape:', x_train.shape)\\n\",\n    \"print('x_test shape:', x_test.shape)\\n\",\n    \"y_train = np.array(y_train)\\n\",\n    \"y_test = np.array(y_test)\\n\",\n    \"\\n\",\n    \"train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\\n\",\n    \"test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))\\n\",\n    \"\\n\",\n    \"train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(SHUFFLE_BUFFER_SIZE)\\n\",\n    \"test_dataset = test_dataset.batch(BATCH_SIZE)\\n\",\n    \"\\n\",\n    \"train_dataset, test_dataset\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Using QKeras\\n\",\n    \"\\n\",\n    \"__QKeras__ works by tagging all variables and weights/bias created by Keras as well as output of arithmetic layers by quantized functions. Quantized functions can be instantiated directly in __`QSimpleRNN`__/__`QLSTM`__/__`QGRU`__/__`QBidirectional`__/__`QDense`__/__`QConv2D`__/__`QSeparableConv2D`__ functions, and they can be passed to __`QActivation`__, which act as a merged quantization and activation function.\\n\",\n    \"\\n\",\n    \"In order to successfully quantize a model, users need to replace layers that create variables (trainable or not) (`LSTM`, `Conv2D`, etc) by their equivalent ones in __QKeras__ (__`QLSTM`__/__`QDense`__, etc), and any layers that perform math operations need to be quantized afterwards.\\n\",\n    \"\\n\",\n    \"Quantized values are clipped between their maximum and minimum quantized representation (which may be different than $[-1.0, 1.0]$), although for `po2` type of quantizers, we still recommend the users to specify the parameter for `max_value`.\\n\",\n    \"\\n\",\n    \"An example of a very simple recurrent network is given below in Keras.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"units = 64\\n\",\n    \"embedding_dim = 64\\n\",\n    \"loss = 'binary_crossentropy'\\n\",\n    \"\\n\",\n    \"def create_model(batch_size=None):\\n\",\n    \"  x = x_in = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32)\\n\",\n    \"  x = Embedding(input_dim=max_features, output_dim=embedding_dim)(x)\\n\",\n    \"  x = Activation('linear', name='embedding_act')(x)\\n\",\n    \"  x = Bidirectional(LSTM(units))(x)\\n\",\n    \"  x = Dense(1)(x)\\n\",\n    \"  x = Activation('sigmoid')(x)\\n\",\n    \"  model = tf.keras.Model(inputs=[x_in], outputs=[x])\\n\",\n    \"  return model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"tf.keras.backend.clear_session()\\n\",\n    \"with strategy.scope():\\n\",\n    \"  model = create_model(BATCH_SIZE)\\n\",\n    \"  custom_objects = {}\\n\",\n    \"  model.compile(\\n\",\n    \"      optimizer=Adam(learning_rate=0.01),\\n\",\n    \"      loss=loss,\\n\",\n    \"      metrics=['acc'])\\n\",\n    \"\\n\",\n    \"model.summary()\\n\",\n    \"print('Train...')\\n\",\n    \"model.fit(\\n\",\n    \"    train_dataset,\\n\",\n    \"    epochs=10,\\n\",\n    \"    batch_size=BATCH_SIZE,\\n\",\n    \"    validation_data=test_dataset,\\n\",\n    \"    verbose=2)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Replacing with quantized layers\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"def create_qmodel(batch_size=None):\\n\",\n    \"  x = x_in = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32)\\n\",\n    \"  x = Embedding(input_dim=max_features, output_dim=embedding_dim)(x)\\n\",\n    \"  x = QActivation('binary', name='embedding_act')(x)\\n\",\n    \"  x = QLSTM(\\n\",\n    \"    units,\\n\",\n    \"    activation='quantized_tanh(4)',\\n\",\n    \"    recurrent_activation='quantized_relu(4,0,1)',\\n\",\n    \"    kernel_quantizer='stochastic_ternary(\\\"auto\\\")',\\n\",\n    \"    recurrent_quantizer='quantized_bits(2,1,1,alpha=1.0)',\\n\",\n    \"    bias_quantizer='quantized_bits(4,0,1)')(x)\\n\",\n    \"  x = QDense(\\n\",\n    \"    1, \\n\",\n    \"    kernel_quantizer=\\\"quantized_bits(4,0,1)\\\",\\n\",\n    \"    bias_quantizer='quantized_bits(4,0,1)')(x)\\n\",\n    \"  x = QActivation('sigmoid')(x)\\n\",\n    \"  model = tf.keras.Model(inputs=[x_in], outputs=[x])\\n\",\n    \"  return model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tf.keras.backend.clear_session()\\n\",\n    \"with strategy.scope():\\n\",\n    \"  qmodel = create_qmodel(BATCH_SIZE)\\n\",\n    \"  custom_objects = {}\\n\",\n    \"  qmodel.compile(\\n\",\n    \"      optimizer=Adam(learning_rate=0.01),\\n\",\n    \"      loss=loss,\\n\",\n    \"      metrics=['acc'])\\n\",\n    \"\\n\",\n    \"qmodel.summary()\\n\",\n    \"print('Train...')\\n\",\n    \"qmodel.fit(train_dataset,\\n\",\n    \"          batch_size=BATCH_SIZE,\\n\",\n    \"          epochs=10,\\n\",\n    \"          verbose=2,\\n\",\n    \"          validation_data=test_dataset)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Converting a Model Automatically\\n\",\n    \"\\n\",\n    \"In addition to the drop-in replacement of Keras functions, we have written the following function to assist anyone who wants to quantize a network.\\n\",\n    \"\\n\",\n    \"__`model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False)`__\\n\",\n    \"\\n\",\n    \"This function converts an non-quantized model (such as the one from `model` in the previous example) into a quantized version, by applying a configuration specified by the dictionary `quantizer_config`, and `activation_bits` specified for unamed activation functions, with this parameter probably being removed in future versions.\\n\",\n    \"\\n\",\n    \"The parameter `custom_objects` specifies object dictionary unknown to Keras, required when you copy a model with lambda layers, or customized layer functions, for example, and if `transfer_weights` is `True`, the returned model will have as initial weights the weights from the original model, instead of using random initial weights.\\n\",\n    \"\\n\",\n    \"The dictionary specified in `quantizer_config` can be indexed by a layer name or layer class name. In the example below, conv2d_1 corresponds to the first convolutional layer of the example, while  QConv2D corresponds to the default behavior of two dimensional convolutional layers. The reader should note that right now we recommend using __`QActivation`__ with a dictionary to avoid the conversion of activations such as `softmax` and `linear`.  In addition, although we could use `activation` field in the layers, we do not recommend that. \\n\",\n    \"\\n\",\n    \"`{\\n\",\n    \"  \\\"conv2d_1\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"stochastic_ternary\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_po2(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QConv2D\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"stochastic_ternary\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_po2(4)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"QDense\\\": {\\n\",\n    \"      \\\"kernel_quantizer\\\": \\\"quantized_bits(3,0,1)\\\",\\n\",\n    \"      \\\"bias_quantizer\\\": \\\"quantized_bits(3)\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"act_1\\\": \\\"quantized_relu(2)\\\",\\n\",\n    \"  \\\"QActivation\\\": { \\\"relu\\\": \\\"quantized_relu(2)\\\" }\\n\",\n    \"}`\\n\",\n    \"\\n\",\n    \"In the following example, we will quantize the model using a different strategy.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"bits = 4\\n\",\n    \"quantizer_config = {\\n\",\n    \"  \\\"bidirectional\\\": {\\n\",\n    \"      'activation' : f\\\"quantized_tanh({bits})\\\",\\n\",\n    \"      'recurrent_activation' : f\\\"quantized_relu(4,0,1)\\\",\\n\",\n    \"      'kernel_quantizer' : f\\\"quantized_bits({bits}, alpha='auto')\\\",\\n\",\n    \"      'recurrent_quantizer' : f\\\"quantized_bits({bits}, alpha='auto')\\\",\\n\",\n    \"      'bias_quantizer' : f\\\"quantized_bits({bits}, alpha='auto')\\\",\\n\",\n    \"  },\\n\",\n    \"  \\\"dense\\\": {\\n\",\n    \"      'kernel_quantizer' : f\\\"quantized_bits({bits}), alpha='auto'\\\",\\n\",\n    \"      'bias_quantizer' : f\\\"quantized_bits({bits}), alpha='auto'\\\"\\n\",\n    \"  },\\n\",\n    \"  \\\"embedding_act\\\": f\\\"quantized_bits({bits}), alpha='auto'\\\",\\n\",\n    \"}\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"tf.keras.backend.clear_session()\\n\",\n    \"with strategy.scope():\\n\",\n    \"  model = create_model(BATCH_SIZE)\\n\",\n    \"  custom_objects = {}\\n\",\n    \"  \\n\",\n    \"  qmodel = model_quantize(model, quantizer_config, bits, custom_objects)\\n\",\n    \"  qmodel.compile(\\n\",\n    \"      optimizer=Adam(learning_rate=0.01),\\n\",\n    \"      loss=loss,\\n\",\n    \"      metrics=['acc'])\\n\",\n    \"  \\n\",\n    \"qmodel.summary()\\n\",\n    \"print('Train...')\\n\",\n    \"qmodel.fit(train_dataset,\\n\",\n    \"          batch_size=BATCH_SIZE,\\n\",\n    \"          epochs=10,\\n\",\n    \"          verbose=2,\\n\",\n    \"          validation_data=test_dataset)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Quantizing a Model With `AutoQKeras`\\n\",\n    \"\\n\",\n    \"To quantize this model with `AutoQKeras`, we need to define the quantization for kernels, biases and activations; forgiving factors and quantization strategy.\\n\",\n    \"\\n\",\n    \"Below we define which quantizers are allowed for kernel, bias, activations and linear. Linear is a proxy that we use to capture `Activation(\\\"linear\\\")` to apply quantization without applying a non-linear operation.  In some networks, we found that this trick may be necessary to better represent the quantization space.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tf.keras.backend.clear_session()\\n\",\n    \"with strategy.scope():\\n\",\n    \"  model = create_model(BATCH_SIZE)\\n\",\n    \"  custom_objects = {}\\n\",\n    \"  model.compile(\\n\",\n    \"      optimizer=Adam(learning_rate=0.01),\\n\",\n    \"      loss=loss,\\n\",\n    \"      metrics=['acc'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"quantization_config = {\\n\",\n    \"        \\\"kernel\\\": {\\n\",\n    \"                \\\"stochastic_binary\\\": 1,\\n\",\n    \"                \\\"stochastic_ternary\\\": 2,\\n\",\n    \"                \\\"quantized_bits(4,0,1,alpha=1.0)\\\": 4,\\n\",\n    \"                \\\"quantized_po2(4,1)\\\": 4\\n\",\n    \"        },\\n\",\n    \"        \\\"recurrent_kernel\\\": {\\n\",\n    \"                \\\"stochastic_binary\\\": 1,\\n\",\n    \"                \\\"stochastic_ternary\\\": 2,\\n\",\n    \"                \\\"quantized_bits(4,0,1,alpha=1.0)\\\": 4,\\n\",\n    \"                \\\"quantized_po2(4,1)\\\": 4\\n\",\n    \"          \\n\",\n    \"        },\\n\",\n    \"        \\\"recurrent_activation\\\": {\\n\",\n    \"                \\\"quantized_relu(4,0,1)\\\": 4          \\n\",\n    \"        },\\n\",\n    \"        \\\"bias\\\": {\\n\",\n    \"                \\\"quantized_bits(4,0,1)\\\": 4,\\n\",\n    \"                \\\"quantized_po2(4,1)\\\": 4\\n\",\n    \"        },\\n\",\n    \"        \\\"activation\\\" : {\\n\",\n    \"            \\\"stochastic_ternary('auto')\\\": 2,\\n\",\n    \"            \\\"quantized_tanh(4)\\\" : 4, \\n\",\n    \"            \\\"quantized_relu_po2(4,1)\\\": 4,\\n\",\n    \"            \\\"quantized_relu(4,2)\\\": 4,\\n\",\n    \"        },\\n\",\n    \"        \\\"linear\\\": { \\n\",\n    \"                \\\"stochastic_ternary('auto')\\\" : 2,\\n\",\n    \"                \\\"quantized_tanh(4)\\\" : 4, \\n\",\n    \"                \\\"quantized_relu_po2(4,1)\\\": 4,\\n\",\n    \"                \\\"quantized_relu(3,1)\\\": 3,\\n\",\n    \"                \\\"quantized_relu(4,2)\\\": 4,\\n\",\n    \"        }\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"limit = {\\n\",\n    \"    \\\"Dense\\\": [4],\\n\",\n    \"    \\\"Bidirectional\\\": [4],\\n\",\n    \"    \\\"Activation\\\": [4],\\n\",\n    \"    \\\"default\\\" : [4]*4\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"goal = {\\n\",\n    \"    \\\"type\\\": \\\"bits\\\",\\n\",\n    \"    \\\"params\\\": {\\n\",\n    \"        \\\"delta_p\\\": 8.0,\\n\",\n    \"        \\\"delta_n\\\": 8.0,\\n\",\n    \"        \\\"rate\\\": 2.0,\\n\",\n    \"        \\\"stress\\\": 1.0,\\n\",\n    \"        \\\"input_bits\\\": 4,\\n\",\n    \"        \\\"output_bits\\\": 4,\\n\",\n    \"        \\\"ref_bits\\\": 4,\\n\",\n    \"        \\\"config\\\": {\\n\",\n    \"            \\\"default\\\": [\\\"parameters\\\", \\\"activations\\\"]\\n\",\n    \"        }\\n\",\n    \"    }\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"run_config = {\\n\",\n    \"  \\\"output_dir\\\": tempfile.mkdtemp(),\\n\",\n    \"  \\\"goal\\\": goal,\\n\",\n    \"  \\\"quantization_config\\\": quantization_config,\\n\",\n    \"  \\\"learning_rate_optimizer\\\": False,\\n\",\n    \"  \\\"transfer_weights\\\": False,\\n\",\n    \"  \\\"mode\\\": \\\"random\\\",\\n\",\n    \"  \\\"seed\\\": 42,\\n\",\n    \"  \\\"limit\\\": limit,\\n\",\n    \"  \\\"tune_filters\\\": \\\"layer\\\",\\n\",\n    \"  \\\"tune_filters_exceptions\\\": \\\"^dense\\\",\\n\",\n    \"  \\\"distribution_strategy\\\": strategy,\\n\",\n    \"\\n\",\n    \"  \\\"layer_indexes\\\": range(2, len(model.layers) - 1),\\n\",\n    \"  \\\"max_trials\\\": 1000\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"print(\\\"quantizing layers:\\\", [model.layers[i].name for i in run_config[\\\"layer_indexes\\\"]])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"autoqk = AutoQKeras(model, metrics=[\\\"acc\\\"], custom_objects={}, **run_config)\\n\",\n    \"autoqk.fit(\\n\",\n    \"  train_dataset, \\n\",\n    \"  validation_data=test_dataset, \\n\",\n    \"  batch_size=BATCH_SIZE, \\n\",\n    \"  epochs=10,\\n\",\n    \"  verbose=2)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"qmodel = autoqk.get_best_model()\\n\",\n    \"qmodel.save_weights(\\\"qmodel.h5\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print_qmodel_summary(qmodel)\\n\",\n    \"print(get_quantization_dictionary(qmodel))\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Py3\",\n   \"language\": \"python\",\n   \"name\": \"py3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.7.7\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n}\n"
  },
  {
    "path": "qkeras/__init__.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Exports qkeras modules to quantizer package.\"\"\"\n\n# We use wildcard import for convenience at this moment, which will be later\n# refactored and removed.\nimport tensorflow as tf\n\nfrom .b2t import *  # pylint: disable=wildcard-import\nfrom .estimate import *  # pylint: disable=wildcard-import\nfrom .qconv2d_batchnorm import QConv2DBatchnorm\nfrom .qconvolutional import *  # pylint: disable=wildcard-import\nfrom .qdepthwise_conv2d_transpose import QDepthwiseConv2DTranspose\nfrom .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm\nfrom .qlayers import *  # pylint: disable=wildcard-import\nfrom .qmac import *  # pylint: disable=wildcard-import\nfrom .qnormalization import *  # pylint: disable=wildcard-import\nfrom .qoctave import *  # pylint: disable=wildcard-import\nfrom .qpooling import *  # pylint: disable=wildcard-import\nfrom .qrecurrent import *  # pylint: disable=wildcard-import\nfrom .qseparable_conv2d_transpose import QSeparableConv2DTranspose\n#from .qtools.run_qtools import QTools\n#from .qtools.settings import cfg\nfrom .quantizers import *  # pylint: disable=wildcard-import\nfrom .registry import *  # pylint: disable=wildcard-import\nfrom .safe_eval import *  # pylint: disable=wildcard-import\n\n\nassert tf.executing_eagerly(), \"QKeras requires TF with eager execution mode on\"\n\n__version__ = \"0.9.0\"\n"
  },
  {
    "path": "qkeras/autoqkeras/__init__.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Exports autoqkeras as a package.\"\"\"\n\n# We use wildcard import for convenience at this moment, which will be later\n# refactored and removed.\nfrom .autoqkeras_internal import *  # pylint: disable=wildcard-import\nfrom .quantization_config import default_quantization_config  # pylint: disable=line-too-long\nfrom .utils import *  # pylint: disable=wildcard-import\n"
  },
  {
    "path": "qkeras/autoqkeras/autoqkeras_internal.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements support for auto-quantization.\"\"\"\n\nimport collections\nimport json\nimport os\nimport re\nimport copy\nfrom absl import logging\nimport keras_tuner as kt\nfrom keras_tuner import HyperModel\nfrom keras_tuner import BayesianOptimization\nfrom keras_tuner import Hyperband\nfrom keras_tuner import RandomSearch\nimport numpy as np\nimport six\nimport tensorflow as tf\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.metrics import binary_accuracy\nfrom tensorflow.keras.metrics import categorical_accuracy\nfrom tensorflow.keras.metrics import sparse_categorical_accuracy\nfrom qkeras.autoqkeras.forgiving_metrics import forgiving_factor  # pylint: disable=line-too-long\nfrom qkeras.autoqkeras.forgiving_metrics import ForgivingFactor  # pylint: disable=line-too-long\nfrom qkeras.autoqkeras.quantization_config import default_quantization_config  # pylint: disable=line-too-long\nfrom qkeras.autoqkeras.utils import print_qmodel_summary\nfrom qkeras.utils import clone_model\nfrom qkeras.utils import model_quantize\n\n\n# AutoQKHyperModel is implemented on top of keras_tuner\n# It basically creates a quantized model based on some rules\n# and it computes a acc_delta that boosts the accuracy when\n# choosing smaller models.\n\n# Boosting function behaves like this.\n# We use the following formula to compute the decrease factor:\n#   reference_size: number of parameters + activations of the model,\n#     assuming an 8-bit implementation.\n#   trial_size: number of parameters + activations of trial.\n#\n#   1) First, we compute how many times we decresed/increased the model\n#     i = log(reference_size / trial_size) / log(rate)\n#\n#   2) Then, we use delta_p / delta_n if model is smaller/bigger\n#      than reference model.\n#\n#      delta = i * (\n#          (i < 0) * delta_n + (i >= 0) * delta_p\n#      )\n#\n#   3) the accuracy of the model (score) is adjusted by acc * delta\n#\n#   The delta \"boosts\" the accuracy to allow worse model to be\n#   chosen by hypermodel tuner.\n#\n\nREGISTERED_LAYERS = [\"Dense\", \"Conv1D\", \"Conv2D\", \"DepthwiseConv2D\",\n                     \"SimpleRNN\", \"LSTM\", \"GRU\", \"Bidirectional\",\n                     \"Conv2DTranspose\", \"SeparableConv1D\", \"SeparableConv2D\"]\n\nQ_LAYERS = list(map(lambda x : 'Q' + x, REGISTERED_LAYERS))\n\nSEQUENCE_LAYERS = [\"SimpleRNN\", \"LSTM\", \"GRU\", \"Bidirectional\"]\n\nclass AutoQKHyperModel(HyperModel):\n  \"\"\"Creates an hypermodel to attempt to quantize a reference model.\n\n     Arguments:\n       model: Model to be quantized.\n       metrics: List of metrics to be used.\n       custom_objects: Custom objects used by Keras during quantization.\n       target: Secondary metric to chase during search (\"bits\" or \"energy\").\n       transfer_weights: if true, transfer weights from unquantized model.\n       frozen_layers: if true, these layers will not be quantized but\n         weights transferred from original model.\n       activation_bits: parameter to be used by 'model_quantize'.\n       limit: limit the number of bits in quantizers, specified as dictionary.\n       tune_filters: one of \"block\", \"layer\", \"none\" for tuning entire\n         network, each layer separately, or no tuning.\n       tune_filters_exceptions: name of layers that will not be tuned.\n       layer_indexes: we only quantize layers whose ids are in layer_indexes.\n       learning_rate_optimizer: if true, we optimize learning rate along with\n         other parameters.\n       head_name: specify which head to calcuate score/trial-size from in\n         autoqkeras\n       quantization_config: dictionary containing configuration of\n         quantizers for kernel, bias and activation.\n       extend_model_metrics: If to append the trial size and score metrics to\n         model metrics, which are used for AutoQKeras to determine the quality\n         of a model.\n\n     Returns:\n       quantized model in trial and boosted accuracy function compiled\n       into quantized model.\n  \"\"\"\n\n  def __init__(\n      self, model, metrics, custom_objects=None, target=None,\n      transfer_weights=False, frozen_layers=None, activation_bits=4, limit=None,\n      tune_filters=\"none\", tune_filters_exceptions=None,\n      layer_indexes=None, learning_rate_optimizer=False,\n      head_name=None, quantization_config=None, extend_model_metrics=True,\n  ):\n    self.model = model\n    self.metrics = metrics\n    self.custom_objects = custom_objects if custom_objects else {}\n\n    self.target = target\n\n    self.reference_size = self.target.get_reference(model)\n\n    self.transfer_weights = transfer_weights\n    self.frozen_layers = frozen_layers if frozen_layers else []\n    self.activation_bits = activation_bits\n    self.head_name = head_name\n    self.extend_model_metrics = extend_model_metrics\n    # make sure we have at least 3 elements in list\n    # first one for kernel, second one for bias and thid one for activations.\n    #\n    # limit is in the format, where default replaces missing values:\n    # '{\n    #      \"Conv2D\":[weight,bias,activation],\n    #      \"RNN\":[weight,bias,recurrent,activation],\n    #      \"Dense\":[weight,bias,activation],\n    #      \"Activation\":[activation]\n    #      \"default\": value\n    #  }'\n\n    if limit is None:\n      self.limit = {}\n    else:\n      self.limit = limit\n\n    self.groups = {}\n\n    assert isinstance(self.limit, dict)\n\n    if self.limit.get(\"default\", None) is None:\n      default = 8\n    else:\n      default = self.limit[\"default\"]\n\n    # make sure we have entries for every type of layer we process\n    self._adjust_limit(default)\n\n    print(\"Limit configuration:\" + json.dumps(self.limit))\n\n    assert tune_filters in [\"block\", \"layer\", \"none\"]\n\n    self.tune_filters = tune_filters\n    self.tune_filters_exceptions = re.compile(tune_filters_exceptions)\n\n    self.layer_indexes = layer_indexes\n    self.learning_rate_optimizer = learning_rate_optimizer\n\n    # load quantizer types for each type of quantizer\n    if quantization_config is None:\n      self.quantization_config = default_quantization_config\n    else:\n      self.quantization_config = quantization_config\n\n  def _adjust_limit(self, default):\n    \"\"\"Makes sure limit has all the fields required.\"\"\"\n    if isinstance(default, list):\n      assert 3 <= len(default) <= 4 \n    else:\n      default = [default] * 3\n\n    # we consider that if name is not there, we will ignore the layer\n    for name in REGISTERED_LAYERS:\n      if name in self.limit:\n        length = len(self.limit[name])\n        if length < 4 and name in SEQUENCE_LAYERS:\n          assert len(default) == 4\n          self.limit[name] = self.limit[name] + default[length:]\n        elif length < 3:\n          # No recurrent limit needed for non recurrent layers\n          self.limit[name] = self.limit[name] + default[length:2] + default[-1:]\n\n  def _n(self, name, s_list):\n    \"\"\"Creates a unique name for the tuner.\"\"\"\n    return name + \"_\".join([str(v) for v in s_list])\n\n  def _get_quantizer(self, hp, head, layer_name, layer_class_name,\n                     i_list=None, is_kernel=True, is_linear=False):\n    \"\"\"Gets a quantizer randomly for kernels/bias/activations.\"\"\"\n\n    # first pick up which group we belong to.\n\n    if not i_list:\n      i_list = []\n\n    if is_linear:\n      # linear quantizers\n      field_name = \"linear\"\n      kq = self.quantization_config[\"linear\"]\n      index = 0\n      q_list = list(kq.keys())\n      q_dict = kq\n    elif \"kernel\" in head:\n      # kernel quantizers\n      field_name = \"kernel\"\n      kq = self.quantization_config[\"kernel\"]\n      index = 0\n      q_list = list(kq.keys())\n      q_dict = kq\n    elif \"bias\" in head:\n      # bias quantizers\n      field_name = \"bias\"\n      bq = self.quantization_config[\"bias\"]\n      index = 1\n      q_list = list(bq.keys())\n      q_dict = bq\n    elif \"pointwise_kernel\" in head: # limit is same as kernel\n      # pointwise kernel quantizers\n      field_name = \"pointwise_kernel\"\n      kq = self.quantization_config[\"pointwise_kernel\"]\n      index = 2\n      q_list = list(kq.keys())\n      q_dict = kq\n    elif \"recurrent_kernel\" in head: # limit is same as kernel\n      # recurrent kernel quantizers\n      field_name = \"recurrent_kernel\"\n      kq = self.quantization_config[\"recurrent_kernel\"]\n      index = 2\n      q_list = list(kq.keys())\n      q_dict = kq\n    elif \"recurrent_activation\" in head: # limit is same as kernel\n      # recurrent activation quantizers\n      field_name = \"recurrent_activation\"\n      raq = self.quantization_config[\"recurrent_activation\"]\n      index = -1\n      q_list = list(raq.keys())\n      q_dict = raq\n    else:\n      # activation quantizers\n      field_name = \"activation\"\n      aq = self.quantization_config[\"activation\"]\n      index = -1\n      q_list = list(aq.keys())\n      q_dict = aq\n\n    # we first we search for layer name. If it is not there, we switch to\n    # layer class name.\n\n    found_pattern = False\n    name = layer_class_name\n    count = -1\n    for i, pattern in enumerate(self.limit):\n      if re.match(pattern, layer_name):\n        found_pattern = True\n        name = pattern\n        count = i\n        break\n\n    # for partially quantized networks we may not have\n    # the layer class name in the set.\n\n    if name == layer_class_name and name not in self.limit:\n      return None, -1\n\n    # groups is a dictionary that contains dictionary of the\n    # patterns so that we can group everything together\n\n    if found_pattern:\n      if name in self.groups and index in self.groups[name]:\n        return self.groups[name][index]\n\n      # not there, let's use a different name for\n      # the head and field\n      head = \"qk_group_\" + str(count) + \"_\" + field_name\n      head = name + \"_\" + field_name\n\n    # limit group can be a list of quantizers or a\n    # number that tells us maximum number of bits\n\n    if isinstance(self.limit[name][index], list):\n      # we assume this is a subset of the q_keys\n      # entry in quantization_config will be like:\n      #   \"Conv2D\": [ [\"q1\", \"q2\", \"q3\"], ... ]\n      #\n      # we always assume this list is a subset of\n      # the original list or we will raise an\n      # error.\n\n      q_list = self.limit[name][index]\n      q_dict = {\n          key: q_dict[key] for key in q_list\n      }\n    else:\n      q_dict = {\n          key: value for (key, value) in q_dict.items()\n          if value <= self.limit[name][index]\n      }\n      q_list = list(q_dict.keys())\n\n    # didn't found a match in groups, create one.\n\n    if len(q_list) == 1:\n      q_name = hp.Fixed(self._n(head + \"_quantizer\", i_list), q_list[0])\n    else:\n      q_name = hp.Choice(self._n(head + \"_quantizer\", i_list), q_list)\n\n    if found_pattern:\n      if name not in self.groups:\n        self.groups[name] = {index: (q_name, q_dict[q_name])}\n      else:\n        self.groups[name][index] = (q_name, q_dict[q_name])\n\n    return (q_name, q_dict[q_name])\n\n  def quantize_model(self, hp):\n    \"\"\"Quantize model by hyperparameter search and extracting size schema.\"\"\"\n\n    # configuration for quantization.\n    q_dict = {}\n\n    model = clone_model(self.model, self.custom_objects)\n\n    fanin = []\n\n    filter_range = [0.5, 0.75, 1.0, 1.5, 2.0]\n\n    # network_filters=hp.Choice(...) should only be defined if we are sure\n    # current blocks has any layer that need filter sweep.\n    # Otherwise, when no layer needs filter sweep and a hp variable is defined,\n    # there will be uneffective trials that loop around the network\n    # filter range, even though none of the filter sweep was ever applied to\n    # any layers. Therfore, we use filter_sweep_enabled to mark if any layer\n    # in current block needs filter sweep.\n    kernel_quantizer_dict = {}\n    filter_sweep_enabled = False\n    for layer in model.layers:\n      if layer.__class__.__name__ in REGISTERED_LAYERS:\n        kernel_quantizer, bits = self._get_quantizer(\n            hp, layer.name + \"_kernel\", layer.name, layer.__class__.__name__,\n            is_kernel=True)\n\n        kernel_quantizer_dict[layer.name] = (kernel_quantizer, bits)\n\n        # kernel_quantizer is not None ->  layer in the current block need\n        # to be quantized\n        if kernel_quantizer:\n          if (\n              not filter_sweep_enabled and self.tune_filters in\n              [\"layer\", \"block\"]\n              and not self.tune_filters_exceptions.search(layer.name) and\n              layer.__class__.__name__ in\n              [\"Dense\", \"Conv1D\", \"Conv2D\", \"Conv2DTranspose\"]\n          ):\n            filter_sweep_enabled = True\n\n        if layer.__class__.__name__ in SEQUENCE_LAYERS:\n          recurrent_quantizer, _ = self._get_quantizer(\n            hp, layer.name + \"_recurrent_kernel\", layer.name, layer.__class__.__name__,\n            is_kernel=True)\n\n        if layer.__class__.__name__ in [\"SeparableConv1D\", \"SeparableConv2D\"]:\n          pointwise_quantizer, _ = self._get_quantizer(\n            hp, layer.name + \"_pointwise_kernel\", layer.name, layer.__class__.__name__,\n            is_kernel=True)\n\n    if self.tune_filters == \"block\" and filter_sweep_enabled:\n      network_filters = hp.Choice(\n          \"network_filters\",\n          values=filter_range,\n          default=1.0\n      )\n    else:\n      network_filters = 1.0\n\n    for layer_id, layer in enumerate(model.layers):\n\n      # we can use these indexes to disable some layers, like the last\n      # layer\n\n      if self.layer_indexes is not None and layer_id not in self.layer_indexes:\n        continue\n\n      layer_d = {}\n\n      if layer.__class__.__name__ in Q_LAYERS:\n        weights = layer.get_weights()[0]\n        if (\n            layer.get_quantizers()[0] and\n            hasattr(layer.get_quantizers()[0], \"bits\")\n        ):\n          bits = layer.get_quantizers()[0].bits\n        else:\n          bits = 8\n        fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.)\n        \n      if layer.__class__.__name__ in REGISTERED_LAYERS:\n        # difference between depthwise and the rest is just the name\n        # of the kernel.\n        if layer.__class__.__name__ in [\n            \"DepthwiseConv2D\", \"SeparableConv1D\", \"SeparableConv2D\"\n        ]:\n          kernel_name = \"depthwise_quantizer\"\n        else:\n          kernel_name = \"kernel_quantizer\"\n\n        # sample kernel quantizer.\n        (kernel_quantizer, bits) = kernel_quantizer_dict[layer.name]\n\n        if not kernel_quantizer:\n          continue\n\n        # process fanin here\n\n        if bits < 8:\n          weights = layer.get_weights()[0]\n          fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.)\n\n        # we only want to do that if we are going to quantize layer\n        if (\n            self.tune_filters in [\"layer\", \"block\"] and\n            not self.tune_filters_exceptions.search(layer.name) and\n            layer.__class__.__name__ in [\n                \"Dense\", \"Conv1D\", \"Conv2D\", \"Conv2DTranspose\",\n                \"SeparableConv1D\", \"SeparableConv2D\"\n            ]\n        ):\n          if self.tune_filters == \"layer\":\n            layer_filters = hp.Choice(\n                \"network_filters_\" + layer.name,\n                values=filter_range,\n                default=1.0\n            )\n          else:\n            layer_filters = network_filters\n\n          if layer.__class__.__name__ == \"Dense\":\n            layer.units = max(int(layer.units * layer_filters), 1)\n          elif layer.__class__.__name__ in [\n              \"Conv1D\", \"Conv2D\", \"Conv2DTranspose\",\n              \"SeparableConv1D\", \"SeparableConv2D\"\n          ]:\n            layer.filters = max(int(layer.filters * layer_filters), 1)\n\n        layer_d[kernel_name] = kernel_quantizer\n\n        if layer.__class__.__name__ in SEQUENCE_LAYERS:\n          layer_d['recurrent_quantizer'] = recurrent_quantizer\n\n        if layer.__class__.__name__ in [\"SeparableConv1D\", \"SeparableConv2D\"]:\n          layer_d['pointwise_quantizer'] = pointwise_quantizer\n\n        if layer.__class__.__name__ in [\"LSTM\", \"GRU\", \"Bidirectional\"]:\n          layer_d['recurrent_activation'], _  = self._get_quantizer(\n              hp, layer.name + \"_recurrent_activation\", layer.name,\n              layer.__class__.__name__, is_kernel=False)\n\n        # if we use bias, sample quantizer.\n        if layer.__class__.__name__ == \"Bidirectional\":\n          layer_d[\"bias_quantizer\"], bits = self._get_quantizer(\n              hp, layer.name + \"_bias\", layer.name, layer.__class__.__name__,\n              is_kernel=False)\n          layer_d[\"activation\"], bits = self._get_quantizer(\n              hp, layer.name + \"_activation\", layer.name,\n              layer.__class__.__name__, is_kernel=False)\n          q_dict[layer.name] = layer_d \n        else:\n          if layer.use_bias:\n            layer_d[\"bias_quantizer\"], bits = self._get_quantizer(\n                hp, layer.name + \"_bias\", layer.name, layer.__class__.__name__,\n                is_kernel=False)\n\n          # if activation is not linear/softmax we need to process it.\n          if layer.activation is None:\n            is_softmax = False\n            is_linear = False\n          else:\n            if isinstance(layer.activation, six.string_types):\n              is_softmax = layer.activation == \"softmax\"\n              is_linear = layer.activation == \"linear\"\n            else:\n              is_softmax = layer.activation.__name__ == \"softmax\"\n              is_linear = layer.activation.__name__ == \"linear\"\n\n          if not is_softmax and not is_linear:\n            layer_d[\"activation\"], bits = self._get_quantizer(\n                hp, layer.name + \"_activation\", layer.name,\n                layer.__class__.__name__, is_kernel=False)\n\n          q_dict[layer.name] = layer_d\n\n      elif layer.__class__.__name__ in [\"Reshape\"]:\n        # we cannot handle fine tuning filters per layer right now.\n        assert self.tune_filters in [\"none\", \"block\"]\n\n        # we need to make sure this pattern exists, this should only occur for\n        # \"scheduler\", so the name will be complete and not a pattern.\n\n        if (\n            self.tune_filters == \"none\" or\n            layer.name not in self.limit or\n            self.tune_filters_exceptions.search(layer.name)\n        ):\n          continue\n\n        if K.image_data_format() == \"channels_last\":\n          layer.target_shape = layer.target_shape[:-1] + (\n              min(int(layer.target_shape[-1] * network_filters), 1),)\n        else:\n          layer.target_shape = (int(layer.target_shape[0] * network_filters),\n                                ) + layer.target_shape[1:]\n\n      elif layer.__class__.__name__ in [\"Activation\"]:\n        if isinstance(layer.activation, six.string_types):\n          is_linear = layer.activation == \"linear\"\n          is_softmax = layer.activation == \"softmax\"\n        else:\n          is_linear = layer.activation.__name__ == \"linear\"\n          is_softmax = layer.activation.__name__ == \"softmax\"\n\n        # if it is a linear activation, we will notify the\n        # quantizer we are searching for linear type of\n        # quantizers\n\n        if not is_softmax:\n          activation, bits = self._get_quantizer(\n              hp, layer.name + \"_activation\", layer.name,\n              layer.__class__.__name__, is_kernel=False,\n              is_linear=is_linear)\n\n          if not activation:\n            continue\n\n          # look at documentation on model_quantize\n          q_dict[layer.name] = activation\n      elif layer.__class__.__name__ in self.limit:\n        # mark it for conversion\n        q_dict[layer.name] = {}\n      else:\n        for pattern in self.limit:\n          if re.match(pattern, layer.name):\n            q_dict[layer.name] = {}\n            break\n\n    q_model = model_quantize(\n        model, q_dict, self.activation_bits,\n        custom_objects=self.custom_objects,\n        transfer_weights=self.transfer_weights)\n\n    return q_model, fanin\n\n  def build(self, hp):\n    \"\"\"Builds hyperparameterized quantized model.\"\"\"\n\n    self.groups = {}\n\n    # we are not using the fanin right now.\n\n    q_model, _ = self.quantize_model(hp)\n\n    # transfer weights from previous run as we know we will not\n    if self.learning_rate_optimizer:\n      # if learning_rate_optimizer, we try to transfer weights from previous run\n      print(\"... freezing layers {}.\".format(\", \".join(self.frozen_layers)))\n      for layer_name in self.frozen_layers:\n        o_weights = self.model.get_layer(layer_name).get_weights()\n        layer = q_model.get_layer(layer_name)\n        # don't know if setting trainable to False is good or not yet\n        # try to do \"soft-freeze\" by transferring weights. More experiments\n        # needed before we decide what to do.\n        # layer.trainable = False\n        weights = layer.get_weights()\n        # because we can be changing number of layers, we do not know\n        # if we can really use some of the weights or not.\n        equal_layer = True\n        for w in range(len(o_weights)):\n          if o_weights[w].shape != weights[w].shape:\n            equal_layer = False\n            break\n        if equal_layer:\n          layer.set_weights(o_weights)\n\n    self.trial_size = self.target.get_trial(q_model)\n\n    # we will use a boosted accuracy computation\n\n    delta = self.target.delta()\n\n    # by default, we use the first metric specified by the\n    # user to be the target metric.\n    if not self.metrics:\n      score_metric = None\n    elif isinstance(self.metrics, dict):\n      if not self.head_name:\n      # if head_name not provided, find the first metric from the dict\n        score_key = list(self.metrics.keys())[0]\n      else:\n        # find the metric assoicated with the head_name\n        score_key = self.head_name\n      score_metric = self.metrics[score_key]\n      if isinstance(score_metric, list):\n        score_metric = score_metric[0]\n    elif isinstance(self.metrics, list):\n      score_metric = self.metrics[0]\n\n    self.score = AutoQKHyperModel.adjusted_score(\n        self, delta, score_metric)\n\n    # some papers suggest that we use learning_rate * sqrt(fanin) / layer\n    # we cannot do that right now, but we can definitely do that\n    # if we are quantizing one layer at a time\n    #\n    # https://arxiv.org/pdf/1511.00363.pdf\n\n    # we use the magic number to smooth out the average\n    total_factor = self.target.get_total_factor()\n    delta_lr = 1.0 + (total_factor < 0) * total_factor\n\n    # we assume model has been compiled at least.\n\n    lr = float(self.model.optimizer.lr.numpy())\n\n    # we assume that delta_lr can lower lr to accommodate\n    # for more quantization\n    #\n    # if learning rate scheduler is used, we assume the callback to manage\n    # learning rate. Just set it to constant.\n\n    if self.learning_rate_optimizer:\n      lr_range = list(lr * np.linspace(delta_lr, 1.1, 5))\n      lr_choice = hp.Choice(\"learning_rate\", lr_range)\n      self.model.optimizer.learning_rate = lr_choice\n    else:\n      lr_choice = lr\n      print(\"learning_rate: {}\".format(lr))\n\n    optimizer = self.model.optimizer\n\n    q_model.summary()\n\n    metrics = self.metrics\n\n    # extend metrics by including score and trial_size metrics\n    if self.extend_model_metrics:\n      ext_metrics = copy.deepcopy(metrics)\n      if isinstance(ext_metrics, dict):\n        # for dict, add trial_size_metric and score metric to target output\n        if not self.head_name:\n          # if head_name not provided, find the first metric from the dict\n          score_key = list(ext_metrics.keys())[0]\n        else:\n          # find the metric assoicated with the head_name\n          score_key = self.head_name\n        score_metric = ext_metrics[score_key]\n        if isinstance(score_metric, list):\n          score_metric += [self.trial_size_metric(self.trial_size), self.score]\n        else:\n          score_metric = [score_metric]\n          score_metric += [self.trial_size_metric(self.trial_size), self.score]\n        ext_metrics[score_key] = score_metric\n      else:\n        ext_metrics += [\n            self.trial_size_metric(self.trial_size),\n            self.score]\n      metrics = ext_metrics\n\n    q_model.compile(\n        optimizer=optimizer,\n        loss=self.model.loss,\n        metrics=metrics\n    )\n    self.q_model = q_model\n\n    # this just prints a summary of the quantization for debugging\n    # purposes\n\n    self.target.print_stats()\n    print_qmodel_summary(q_model)\n\n    return q_model\n\n  @staticmethod\n  def adjusted_score(hyper_model, delta, metric_function=None):\n    def score(y_true, y_pred):\n      y_t_rank = len(y_true.shape.as_list())\n      y_p_rank = len(y_pred.shape.as_list())\n      y_t_last_dim = y_true.shape.as_list()[-1]\n      y_p_last_dim = y_pred.shape.as_list()[-1]\n\n      is_binary = y_p_last_dim == 1\n      is_sparse_categorical = (\n          y_t_rank < y_p_rank or y_t_last_dim == 1 and y_p_last_dim > 1)\n\n      if isinstance(metric_function, six.string_types):\n        if metric_function in [\"accuracy\", \"acc\"]:\n          if is_binary:\n            metric = binary_accuracy(y_true, y_pred)\n          elif is_sparse_categorical:\n            metric = sparse_categorical_accuracy(y_true, y_pred)\n          else:\n            metric = categorical_accuracy(y_true, y_pred)\n        else:\n          metric = categorical_accuracy(y_true, y_pred)\n      else:\n        metric = metric_function(y_true, y_pred)\n\n      return K.cast(metric * (1.0 + delta), K.floatx())\n\n    if not metric_function:\n      metric_function = \"accuracy\"\n\n    return score\n\n  @staticmethod\n  def trial_size_metric(trial_size):\n    def trial(y_true, y_pred):  # pylint: disable=unused-argument\n      return K.cast(trial_size, K.floatx())\n    return trial\n\n\nclass AutoQKeras:\n  \"\"\"Performs autoquantization in Keras model.\n\n     Arguments:\n       model: Model to be quantized.\n       metrics: List of metrics to be used.\n       custom_objects: Custom objects used by Keras during quantization.\n       goal: Metric to compute secondary goal of search (bits or energy)\n       output_dir: name of output directory to store results.\n       mode: random, hyperband or bayesian used by keras_tuner.\n       custom_tuner: The Keras Tuner class to use to search hyperparams\n       transfer_weights: if true, transfer weights from unquantized model.\n       frozen_layers: if true, these layers will not be quantized but\n         weights transferred from original model.\n       activation_bits: parameter to be used by 'model_quantize'.\n       limit: limit the number of bits in quantizers specified as a dictionary.\n       tune_filters: one of \"block\", \"layer\", \"none\" for tuning entire\n         network, each layer separately, or no tuning.\n       tune_filters_exceptions: name of layers that will not be tuned.\n       layer_indexes: indexes of layers we will quantize.\n       learning_rate_optimizer: if true, user will provide lr scheduler\n         callback.\n       quantization_config: file name of dictionary containing configuration of\n         quantizers for kernel, bias and activation.\n       head_name: specify which head to calcuate score/trial-size from in\n         autoqkeras\n       score_metric: Str. Optional metric name to use to evaluate the trials.\n         Defaults to val_score\n       tuner_kwargs: parameters for keras_tuner depending on whether\n         mode is random, hyperband or baeysian. Please refer to the\n         documentation of kerstuner Tuners.\n  \"\"\"\n\n  def __init__(\n      self, model, metrics=None, custom_objects=None, goal=None,\n      output_dir=\"result\", mode=\"random\", custom_tuner=None,\n      transfer_weights=False, frozen_layers=None, activation_bits=4,\n      limit=None, tune_filters=\"none\",\n      tune_filters_exceptions=None, learning_rate_optimizer=False,\n      layer_indexes=None, quantization_config=None, overwrite=True,\n      head_name=None, score_metric=None, **tuner_kwargs):\n\n    # Collect input arguments to AutoQKeras for usage by custom tuner\n    autoqkeras_input_args = locals()\n\n    if not metrics:\n      metrics = []\n\n    if not custom_objects:\n      custom_objects = {}\n\n    # goal: { \"type\": [\"bits\", \"energy\"], \"params\": {...} } or ForgivingFactor\n    #   type\n    # For type == \"bits\":\n    #   delta_p: increment (in %) of the accuracy if trial is smaller.\n    #   delta_n: decrement (in %) of the accuracy if trial is bigger.\n    #   rate: rate of decrease/increase in model size in terms of bits.\n    #   input_bits; size of input tensors.\n    #   output_bits; size of output tensors.\n    #   stress: parameter to reduce reference size to force tuner to\n    #     choose smaller models.\n    #   config: configuration on what to compute for each layer\n    #     minimum configuration is { \"default\": [\"parameters\", \"activations\"] }\n\n    # use simplest one - number of bits\n    if not goal:\n      goal = {\n          \"type\": \"bits\",\n          \"params\": {\n              \"delta_p\": 8.0,\n              \"delta_n\": 8.0,\n              \"rate\": 2.0,\n              \"stress\": 1.0,\n              \"input_bits\": 8,\n              \"output_bits\": 8,\n              \"ref_bits\": 8,\n              \"config\": {\n                  \"default\": [\"parameters\", \"activations\"]\n              }\n          }\n      }\n\n    self.overwrite = overwrite\n\n    # for multi-head model, we need to specify which head(/output) that\n    # score and trial metric needs to calculate from\n    self.head_name = head_name\n\n    # if we have not created it already, create new one.\n    if not isinstance(goal, ForgivingFactor):\n      target = forgiving_factor[goal[\"type\"]](**goal[\"params\"])\n    else:\n      target = goal\n\n    # if no metrics were specified, we want to make sure we monitor at least\n    # accuracy.\n    if not metrics:\n      metrics = [\"acc\"]\n\n    self.hypermodel = AutoQKHyperModel(\n        model, metrics, custom_objects, target,\n        transfer_weights=transfer_weights,\n        frozen_layers=frozen_layers,\n        activation_bits=activation_bits,\n        limit=limit,\n        tune_filters=tune_filters,\n        tune_filters_exceptions=tune_filters_exceptions,\n        layer_indexes=layer_indexes,\n        learning_rate_optimizer=learning_rate_optimizer,\n        head_name=head_name,\n        quantization_config=quantization_config\n    )\n\n    # right now we create unique results directory\n    idx = 0\n    name = output_dir\n    if self.overwrite:\n      while os.path.exists(name):\n        idx += 1\n        name = output_dir + \"_\" + str(idx)\n    output_dir = name\n    self.output_dir = output_dir\n\n    if score_metric is None:\n      if self.head_name:\n        score_metric = \"val_\" + self.head_name + \"_score\"\n      else:\n        score_metric = \"val_score\"\n    assert mode in [\"random\", \"bayesian\", \"hyperband\"]\n    if custom_tuner is not None:\n      self.tuner = custom_tuner(\n          self.hypermodel,\n          autoqkeras_config=autoqkeras_input_args,\n          objective=kt.Objective(score_metric, \"max\"),\n          project_name=output_dir,\n          **tuner_kwargs)\n    elif mode == \"random\":\n      self.tuner = RandomSearch(\n          self.hypermodel,\n          objective=kt.Objective(score_metric, \"max\"),\n          project_name=output_dir,\n          **tuner_kwargs)\n    elif mode == \"bayesian\":\n      self.tuner = BayesianOptimization(\n          self.hypermodel,\n          objective=kt.Objective(score_metric, \"max\"),\n          project_name=output_dir,\n          **tuner_kwargs)\n    elif mode == \"hyperband\":\n      self.tuner = Hyperband(\n          self.hypermodel,\n          objective=kt.Objective(score_metric, \"max\"),\n          project_name=output_dir,\n          **tuner_kwargs)\n    else:\n      pass\n\n    self.tuner.search_space_summary()\n\n  def _has_earlystopping(self, callbacks):\n    \"\"\"Check if EarlyStopping has been defined or not.\"\"\"\n    if callbacks is None:\n      return False\n\n    for callback in callbacks:\n      if isinstance(callback, tf.keras.callbacks.EarlyStopping):\n        return True\n    return False\n\n  def history(self, number_of_trials=-1):\n    \"\"\"Returns the history of the model search.\"\"\"\n    trials = self.tuner.oracle.get_best_trials(number_of_trials)\n    state = [trial.get_state() for trial in trials]\n\n    result = {}\n    result[\"score\"] = [\n        state[i][\"score\"] for i in range(len(state))\n        if trials[i].score is not None\n    ]\n    for i in range(len(state)):\n      if trials[i].score is not None:\n        keys = state[i][\"metrics\"][\"metrics\"].keys()\n\n        for key in keys:\n          if key != \"score\" and not key.startswith(\n              \"val_\") and key != \"loss\" and key != \"trial\":\n\n            cur_accuracy = state[i][\"metrics\"][\"metrics\"][key][\n                \"observations\"][0][\"value\"][0]\n            if \"val_\" + key in state[i][\"metrics\"][\"metrics\"].keys():\n              cur_val_accuracy = state[i][\"metrics\"][\"metrics\"][\"val_\" + key][\n                  \"observations\"][0][\"value\"][0]\n            else:\n              cur_val_accuracy = None\n\n            # only update result if both key and val_key exist\n            if cur_val_accuracy:\n              if key not in result.keys():\n                result[key] = [cur_accuracy]\n                result[\"val_\" + key] = [cur_val_accuracy]\n              else:\n                result[key].append(cur_accuracy)\n                result[\"val_\" + key].append(cur_val_accuracy)\n\n    if self.head_name:\n      trial_from_output = self.head_name + \"_trial\"\n    else:\n      trial_from_output = \"trial\"\n    result[\"trial_size\"] = [\n        state[i][\"metrics\"][\"metrics\"][trial_from_output][\"observations\"][0]\n        [\"value\"][0] for i in range(len(state)) if trials[i].score is not None\n    ]\n\n    return result\n\n  def fit(self, *fit_args, **fit_kwargs):\n    \"\"\"Invokes tuner fit algorithm.\"\"\"\n\n    callbacks = fit_kwargs.get(\"callbacks\", None)\n\n    if callbacks is None:\n      callbacks = []\n\n    epochs = fit_kwargs.get(\"epochs\", None)\n\n    if epochs is None:\n      epochs = 10\n\n    if not self._has_earlystopping(callbacks):\n      callbacks = callbacks + [\n          tf.keras.callbacks.EarlyStopping(\n              \"val_loss\", patience=min(20, epochs//5))\n      ]\n      fit_kwargs[\"callbacks\"] = callbacks\n\n    self.tuner.search(*fit_args, **fit_kwargs)\n\n  @staticmethod\n  def get_best_lr(qmodel):\n    \"\"\"Extracts best lr of model.\"\"\"\n    return qmodel.optimizer.lr.numpy()\n\n  def get_best_model(self):\n    params = self.tuner.get_best_hyperparameters()[0]\n\n    q_model = self.tuner.hypermodel.build(params)\n\n    self.learning_rate = q_model.optimizer.lr.numpy()\n\n    return q_model\n\n  def get_learning_rate(self):\n    return self.learning_rate\n\n\nclass AutoQKerasScheduler:\n  \"\"\"Performs autoquantization one layer/group at a time.\n\n     Arguments:\n       model: Model to be quantized.\n       metrics: List of metrics to be monitored.\n       custom_objects: Custom objects used by Keras during quantization.\n       goal: Metric to compute secondary goal of search (bits or energy)\n       output_dir: name of output directory to store results.\n       mode: random, hyperband or bayesian used by keras_tuner.\n       transfer_weights: if true, transfer weights from unquantized model.\n       activation_bits: parameter to be used by 'model_quantize'.\n       limit: limit the number of bits in quantizers specified as a dictionary.\n       tune_filters: one of \"block\", \"layer\", \"none\" for tuning entire\n         network, each layer separately, or no tuning.\n       tune_filters_exceptions: name of layers that will not be tuned.\n       layer_indexes: indexes of layer to be quantized.\n       learning_rate_optimizer: if true, user will provide lr scheduler\n         callback.\n       blocks: list of re patterns specifygin group configuration for layers.\n       schedule_block: \"sequential\" or \"cost\". Schedule blocks using the\n         order of the groups or decreasing cost (energy or bits).\n       quantization_config: file name of dictionary containing configuration of\n         quantizers for kernel, bias and activation.\n       debug: if True, fit will just print the groups for debugging purposes.\n       head_name: specify which head to calcuate score/trial-size from in\n         autoqkeras\n       tuner_kwargs: parameters for keras_tuner depending on whether\n         mode is random, hyperband or baeysian. Please refer to the\n         documentation of kerstuner Tuners.\n  \"\"\"\n\n  def __init__(\n      self, model, metrics=None, custom_objects=None, goal=None,\n      output_dir=\"result\", mode=\"random\", transfer_weights=False,\n      activation_bits=4, limit=None, tune_filters=\"none\",\n      tune_filters_exceptions=None, layer_indexes=None,\n      learning_rate_optimizer=False, blocks=None, schedule_block=\"sequential\",\n      quantization_config=None, overwrite=True, debug=False, head_name=None,\n      **tuner_kwargs):\n\n    if not metrics:\n      metrics = []\n\n    if not custom_objects:\n      custom_objects = {}\n\n    # goal: { \"type\": [\"bits\", \"energy\"], \"params\": {...} }\n    # For type == \"bits\":\n    #   delta_p: increment (in %) of the accuracy if trial is smaller.\n    #   delta_n: decrement (in %) of the accuracy if trial is bigger.\n    #   rate: rate of decrease/increase in model size in terms of bits.\n    #   input_bits; size of input tensors.\n    #   output_bits; size of output tensors.\n    #   stress: parameter to reduce reference size to force tuner to\n    #     choose smaller models.\n    #   config: configuration on what to compute for each layer\n    #     minimum configuration is { \"default\": [\"parameters\", \"activations\"] }\n\n    # use simplest one - number of bits\n    if not goal:\n      goal = {\n          \"type\": \"bits\",\n          \"params\": {\n              \"delta_p\": 8.0,\n              \"delta_n\": 8.0,\n              \"rate\": 2.0,\n              \"stress\": 1.0,\n              \"input_bits\": 8,\n              \"output_bits\": 8,\n              \"ref_bits\": 8,\n              \"config\": {\n                  \"default\": [\"parameters\", \"activations\"]\n              }\n          }\n      }\n\n    self.target = forgiving_factor[goal[\"type\"]](**goal[\"params\"])\n\n    self.model = model\n    self.metrics = metrics\n    self.custom_objects = custom_objects\n    self.mode = mode\n    self.transfer_weights = transfer_weights\n    self.activation_bits = activation_bits\n    self.limit = limit\n    self.tune_filters = tune_filters\n    self.tune_filters_exceptions = tune_filters_exceptions\n    self.layer_indexes = layer_indexes\n    self.learning_rate_optimizer = learning_rate_optimizer\n    self.blocks = blocks\n    self.schedule_block = schedule_block\n    self.quantization_config = quantization_config\n    self.tuner_kwargs = tuner_kwargs\n    self.debug = debug\n    self.head_name = head_name\n\n    self.autoqk = None\n    self.learning_rate = model.optimizer.lr.numpy()\n    self.overwrite = overwrite\n\n    assert self.schedule_block in [\"sequential\", \"cost\"]\n\n    # right now we create unique results directory\n    idx = 0\n    name = output_dir\n    if self.overwrite:\n      while os.path.exists(name):\n        idx += 1\n        name = output_dir + \"_\" + str(idx)\n    output_dir = name\n    self.output_dir = output_dir\n    self.next_block = self.get_next_block(overwrite)\n    if self.next_block > 0:\n      strategy = self.tuner_kwargs.get(\"distribution_strategy\", None)\n      if strategy:\n        with strategy.scope():\n          self.model = tf.keras.models.load_model(\n              os.path.join(\n                  self.output_dir, \"model_block_\" + str(self.next_block - 1)),\n              custom_objects=self.custom_objects)\n      else:\n        self.model = tf.keras.models.load_model(\n            os.path.join(\n                self.output_dir, \"model_block_\" + str(self.next_block - 1)),\n            custom_objects=self.custom_objects)\n      print(\"Load model completed\")\n\n  def get_next_block(self, overwrite):\n    \"\"\"Get the next block id to be worked on.\"\"\"\n    if overwrite:\n      return 0\n    else:\n      try:\n        with tf.io.gfile.GFile(os.path.join(self.output_dir, \"scheduler.json\"),\n                               \"r\") as f:\n          scheduler_json = f.read()\n        scheduler = json.loads(scheduler_json)\n        return scheduler[\"next_block\"]\n      except:  # pylint: disable=bare-except\n        return 0\n\n  def get_limit(self, model, pattern):\n    \"\"\"Apply patterned group to limit to obtain new limit set.\"\"\"\n    limit = self.limit\n    new_limit = {}\n    new_pattern = collections.defaultdict(list)\n\n    for layer_name in self.grouped_patterns[pattern]:\n      layer = model.get_layer(layer_name)\n      layer_class_name = layer.__class__.__name__\n\n      target_quantizers = limit.get(layer_class_name, -1)\n      for limit_pattern in limit:\n        if re.match(limit_pattern, layer_name):\n          target_quantizers = limit[limit_pattern]\n          new_pattern[limit_pattern].append(layer_name)\n          layer_name = limit_pattern\n          break\n      if target_quantizers != -1:\n        new_limit[layer_name] = target_quantizers\n\n    for key in new_pattern:\n      # grouped pattern in regex need to be ^(word1|word2|...)$ instead of\n      # ^word1|word2|...$; otherwise it cause non-exact match,\n      # e.g., fc.*_0 and fc.*_0_relu were miss-matched\n      new_key = \"^\" + \"(\" + \"|\".join(new_pattern[key]) + \")\" + \"$\"\n      new_limit[new_key] = new_limit[key]\n      if new_key != key:\n        del new_limit[key]\n\n    return new_limit\n\n  def fit(self, *fit_args, **fit_kwargs):\n    \"\"\"Invokes tuner fit algorithm.\"\"\"\n\n    self.history = []\n    self.compute_block_costs(self.blocks, self.model)\n\n    if self.tuner_kwargs.get(\"max_trials\", None):\n      max_trials = float(self.tuner_kwargs[\"max_trials\"])\n\n    lr = self.model.optimizer.lr.numpy()\n\n    model = self.model\n\n    frozen_layers = []\n\n    for i, (pattern, cost) in enumerate(self.retrieve_max_block()):\n\n      # now create new limit pattern\n      if not self.overwrite:\n        if i < self.next_block:\n          print(\"Resume tuning. Skipping block \", i)\n          continue\n\n      print(\"... block cost: {:.0f} / {:.0f}\".format(cost, self.reference_size))\n\n      if self.tuner_kwargs.get(\"max_trials\", None):\n        self.tuner_kwargs[\"max_trials\"] = int(\n            max(10, max_trials * cost / self.reference_size))\n        print(\"... adjusting max_trials for this block to {}\".format(\n            self.tuner_kwargs[\"max_trials\"]))\n\n      limit = self.get_limit(model, pattern)\n      new_frozen_layers = self.grouped_patterns[pattern]\n\n      # if dictionary is empty we did not match anything.\n      # we have a bug in the patterns specified by the\n      # user.\n\n      assert limit\n\n      print(\"Pattern {} is : {}\".format(i, limit))\n\n      if self.debug:\n        frozen_layers = frozen_layers + new_frozen_layers\n        continue\n\n      self.autoqk = AutoQKeras(\n          model, self.metrics,\n          custom_objects=self.custom_objects,\n          goal=self.target,\n          output_dir=self.output_dir + \"/\" + str(i),\n          mode=self.mode,\n          transfer_weights=self.transfer_weights,\n          frozen_layers=frozen_layers,\n          activation_bits=self.activation_bits,\n          limit=limit,\n          tune_filters=self.tune_filters,\n          tune_filters_exceptions=self.tune_filters_exceptions,\n          layer_indexes=self.layer_indexes,\n          learning_rate_optimizer=self.learning_rate_optimizer,\n          quantization_config=self.quantization_config,\n          overwrite=self.overwrite,\n          head_name=self.head_name,\n          **self.tuner_kwargs)\n\n      self.autoqk.fit(*fit_args, **fit_kwargs)\n\n      self.autoqk.tuner.results_summary()\n\n      self.history.append(self.autoqk.history())\n\n      model = self.autoqk.get_best_model()\n      self.learning_rate = model.optimizer.lr.numpy()\n\n      # restore learning rate\n      # this is just a placeholder for the optimizer.\n\n      model.compile(\n          model.optimizer,\n          loss=self.model.loss,\n          metrics=self.model.metrics)\n\n      frozen_layers = frozen_layers + new_frozen_layers\n\n      filename = self.output_dir + \"/model_block_\" + str(i)\n      model.save(filename)\n      self.next_block = i + 1\n\n      # update scheduler json\n      with tf.io.gfile.GFile(os.path.join(self.output_dir, \"scheduler.json\"),\n                             \"w\") as f:\n        f.write(json.dumps({\"next_block\": self.next_block}))\n\n    if self.debug:\n      return\n\n    self.best_model = model\n\n    # make all layers trainable again\n    for layer_name in frozen_layers:\n      layer = model.get_layer(layer_name)\n      layer.trainable = True\n\n  def compute_block_costs(self, patterns, model):\n    \"\"\"Computes costs for each block.\"\"\"\n\n    # get block cost for original model\n    self.reference_size = self.target.get_reference(model)\n    self.model_size = self.target.get_reference_stats()\n\n    # first group layers into the patterns\n\n    groups = {pattern: [] for pattern in patterns}\n\n    for layer_id, layer in enumerate(model.layers):\n      if (\n          self.layer_indexes is not None and\n          layer_id not in self.layer_indexes\n      ):\n        continue\n\n      for pattern in groups:\n        if re.match(pattern, layer.name):\n          groups[pattern].append(layer.name)\n\n    self.grouped_patterns = groups\n\n    # now compute cost for each group\n\n    self.costs = []\n    for pattern in patterns:  # self.grouped_patterns:\n      total = 0\n      for layer in self.grouped_patterns[pattern]:\n        if layer in self.model_size:\n          total += self.model_size[layer][\"total\"]\n      self.costs.append((pattern, total))\n\n    # the costs will be sorted by the total cost of the group\n    if self.schedule_block == \"cost\":\n      self.costs = sorted(self.costs, key=lambda cost_tuple: -cost_tuple[1])\n\n  def retrieve_max_block(self):\n    for cost in self.costs:\n      yield cost\n\n  def get_history(self):\n    \"\"\"Returns the history of the model search.\"\"\"\n    return self.history\n\n  def get_best_model(self):\n    \"\"\"Returns the best model.\"\"\"\n\n    # check if we have run fit first.\n    if not self.autoqk:\n      return None\n\n    self.autoqk.hypermodel.target.print_stats()\n    print_qmodel_summary(self.best_model)\n\n    return self.best_model\n\n  def get_learning_rate(self):\n    return self.learning_rate\n"
  },
  {
    "path": "qkeras/autoqkeras/examples/run/get_data.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Extracts sample dataset from tfds.\"\"\"\n\nimport numpy as np\nfrom tensorflow.keras.utils import to_categorical\nimport tensorflow_datasets as tfds\n\n\ndef get_data(dataset_name, fast=False):\n  \"\"\"Returns dataset from tfds.\"\"\"\n  ds_train = tfds.load(name=dataset_name, split=\"train\", batch_size=-1)\n  ds_test = tfds.load(name=dataset_name, split=\"test\", batch_size=-1)\n\n  dataset = tfds.as_numpy(ds_train)\n  x_train, y_train = dataset[\"image\"].astype(np.float32), dataset[\"label\"]\n\n  dataset = tfds.as_numpy(ds_test)\n  x_test, y_test = dataset[\"image\"].astype(np.float32), dataset[\"label\"]\n\n  if len(x_train.shape) == 3:\n    x_train = x_train.reshape(x_train.shape + (1,))\n    x_test = x_test.reshape(x_test.shape + (1,))\n\n  x_train /= 256.0\n  x_test /= 256.0\n\n  x_mean = np.mean(x_train, axis=0)\n\n  x_train -= x_mean\n  x_test -= x_mean\n\n  nb_classes = np.max(y_train) + 1\n  y_train = to_categorical(y_train, nb_classes)\n  y_test = to_categorical(y_test, nb_classes)\n\n  print(x_train.shape[0], \"train samples\")\n  print(x_test.shape[0], \"test samples\")\n\n  if fast:\n    i_train = np.arange(x_train.shape[0])\n    np.random.shuffle(i_train)\n    i_test = np.arange(x_test.shape[0])\n    np.random.shuffle(i_test)\n\n    s_x_train = x_train[i_train[0:fast]]\n    s_y_train = y_train[i_train[0:fast]]\n    s_x_test = x_test[i_test[0:fast]]\n    s_y_test = y_test[i_test[0:fast]]\n    return ((s_x_train, s_y_train), (x_train, y_train), (s_x_test, s_y_test),\n            (x_test, y_test))\n  else:\n    return (x_train, y_train), (x_test, y_test)\n\nif __name__ == \"__main__\":\n  get_data(\"mnist\")\n  get_data(\"fashion_mnist\")\n  get_data(\"cifar10\", fast=1000)\n  get_data(\"cifar100\")\n\n\n"
  },
  {
    "path": "qkeras/autoqkeras/examples/run/get_model.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\nfrom qkeras.autoqkeras.examples.run.networks import ConvBlockNetwork  # pylint: disable=line-too-long\n\ndef get_model(dataset):\n  \"\"\"Returns a model for the demo of AutoQKeras.\"\"\"\n  if dataset == \"mnist\":\n    model = ConvBlockNetwork(\n        shape=(28, 28, 1),\n        nb_classes=10,\n        kernel_size=3,\n        filters=[16, 32, 48, 64, 128],\n        dropout_rate=0.2,\n        with_maxpooling=False,\n        with_batchnorm=True,\n        kernel_initializer=\"he_uniform\",\n        bias_initializer=\"zeros\",\n    ).build()\n\n  elif dataset == \"fashion_mnist\":\n    model = ConvBlockNetwork(\n        shape=(28, 28, 1),\n        nb_classes=10,\n        kernel_size=3,\n        filters=[16, [32]*3, [64]*3],\n        dropout_rate=0.2,\n        with_maxpooling=True,\n        with_batchnorm=True,\n        use_separable=\"mobilenet\",\n        kernel_initializer=\"he_uniform\",\n        bias_initializer=\"zeros\",\n        use_xnornet_trick=True\n    ).build()\n\n  elif dataset == \"cifar10\":\n    model = ConvBlockNetwork(\n        shape=(32, 32, 3),\n        nb_classes=10,\n        kernel_size=3,\n        filters=[16, [32]*3, [64]*3, [128]*3],\n        dropout_rate=0.2,\n        with_maxpooling=True,\n        with_batchnorm=True,\n        use_separable=\"mobilenet\",\n        kernel_initializer=\"he_uniform\",\n        bias_initializer=\"zeros\",\n        use_xnornet_trick=True\n    ).build()\n\n  elif dataset == \"cifar100\":\n    model = ConvBlockNetwork(\n        shape=(32, 32, 3),\n        nb_classes=100,\n        kernel_size=3,\n        filters=[16, [32]*3, [64]*3, [128]*3, [256]*3],\n        dropout_rate=0.2,\n        with_maxpooling=True,\n        with_batchnorm=True,\n        use_separable=\"mobilenet\",\n        kernel_initializer=\"he_uniform\",\n        bias_initializer=\"zeros\",\n        use_xnornet_trick=True\n    ).build()\n\n  model.summary()\n\n  return model\n"
  },
  {
    "path": "qkeras/autoqkeras/examples/run/networks/__init__.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\nfrom .conv_block import ConvBlockNetwork\n"
  },
  {
    "path": "qkeras/autoqkeras/examples/run/networks/conv_block.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\nimport os\nfrom tensorflow.initializers import *  # pylint: disable=wildcard-import\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import BatchNormalization\nfrom tensorflow.keras.layers import Conv2D\nfrom tensorflow.keras.layers import Dense\nfrom tensorflow.keras.layers import DepthwiseConv2D\nfrom tensorflow.keras.layers import Dropout\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.layers import MaxPooling2D\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import *  # pylint: disable=wildcard-import\nfrom qkeras import *  # pylint: disable=wildcard-import\n\n\nclass ConvBlockNetwork(object):\n  \"\"\"Creates Convolutional block type of network.\"\"\"\n\n  def __init__(\n      self,\n      shape,\n      nb_classes,\n      kernel_size,\n      filters,\n      dropout_rate=0.0,\n      with_maxpooling=True,\n      with_batchnorm=True,\n      kernel_initializer=\"he_normal\",\n      bias_initializer=\"zeros\",\n      use_separable=False,\n      use_xnornet_trick=False\n  ):\n    \"\"\"Creates class.\n\n    Args:\n      shape: shape of inputs.\n      nb_classes: number of output classes.\n      kernel_size: kernel_size of network.\n      filters: sizes of filters (if entry is a list, we create a block).\n      dropout_rate: dropout rate if > 0.\n      with_maxpooling: if true, use maxpooling.\n      with_batchnorm: with BatchNormalization.\n      kernel_initializer: kernel_initializer.\n      bias_initializer: bias and beta initializer.\n      use_separable: if \"dsp\", do conv's 1x3 + 3x1. If \"mobilenet\",\n        use MobileNet separable convolution. If False or \"none\", perform single\n        conv layer.\n      use_xnornet_trick: use bn+act after max pool to enable binary\n        to avoid saturation to largest value.\n    \"\"\"\n\n    self.shape = shape\n    self.nb_classes = nb_classes\n    self.kernel_size = kernel_size\n    self.filters = filters\n    self.dropout_rate = dropout_rate\n    self.with_maxpooling = with_maxpooling\n    self.with_batchnorm = with_batchnorm\n    self.kernel_initializer = kernel_initializer\n    self.bias_initializer = bias_initializer\n    self.use_separable = use_separable\n    self.use_xnornet_trick = use_xnornet_trick\n\n  def build(self):\n    \"\"\"Builds model.\"\"\"\n    x = x_in = Input(self.shape, name=\"input\")\n    for i in range(len(self.filters)):\n      if len(self.filters) > 1:\n        name_suffix_list = [str(i)]\n      else:\n        name_suffix_list = []\n      if not isinstance(self.filters[i], list):\n        filters = [self.filters[i]]\n      else:\n        filters = self.filters[i]\n      for j in range(len(filters)):\n        if len(filters) > 1:\n          name_suffix = \"_\".join(name_suffix_list + [str(j)])\n        else:\n          name_suffix = \"_\".join(name_suffix_list)\n        if self.use_separable == \"dsp\":\n          kernels = [(1, self.kernel_size), (self.kernel_size, 1)]\n        else:\n          kernels = [(self.kernel_size, self.kernel_size)]\n        for k, kernel in enumerate(kernels):\n          strides = 1\n          if (\n              not self.with_maxpooling and j == len(filters)-1 and\n              k == len(kernels)-1\n          ):\n            strides = 2\n          if self.use_separable == \"dsp\":\n            kernel_suffix = (\n                \"\".join([str(k) for k in kernel]) + \"_\" + name_suffix)\n          elif self.use_separable == \"mobilenet\":\n            depth_suffix = (\n                \"\".join([str(k) for k in kernel]) + \"_\" + name_suffix)\n            kernel_suffix = \"11_\" + name_suffix\n          else:\n            kernel_suffix = name_suffix\n          if self.use_separable == \"mobilenet\":\n            x = DepthwiseConv2D(\n                kernel,\n                padding=\"same\", strides=strides,\n                use_bias=False,\n                name=\"conv2d_dw_\" + depth_suffix)(x)\n            if self.with_batchnorm:\n              x = BatchNormalization(name=\"conv2d_dw_bn_\" + depth_suffix)(x)\n            x = Activation(\"relu\", name=\"conv2d_dw_act_\" + depth_suffix)(x)\n            kernel = (1, 1)\n            strides = 1\n          x = Conv2D(\n              filters[j], kernel,\n              strides=strides, use_bias=not self.with_batchnorm,\n              padding=\"same\",\n              kernel_initializer=self.kernel_initializer,\n              bias_initializer=self.bias_initializer,\n              name=\"conv2d_\" + kernel_suffix)(x)\n          if not (\n              self.with_maxpooling and self.use_xnornet_trick and\n              j == len(filters)-1 and k == len(kernels)-1\n          ):\n            if self.with_batchnorm:\n              x = BatchNormalization(\n                  beta_initializer=self.bias_initializer,\n                  name=\"bn_\" + kernel_suffix)(x)\n            x = Activation(\"relu\", name=\"act_\" + kernel_suffix)(x)\n      if self.with_maxpooling:\n        x = MaxPooling2D(2, 2, name=\"mp_\" + name_suffix)(x)\n        # this is a trick from xnornet to enable full binary or ternary\n        # networks to be after maxpooling.\n        if self.use_xnornet_trick:\n          x = BatchNormalization(\n              beta_initializer=self.bias_initializer,\n              name=\"mp_bn_\" + name_suffix)(x)\n          x = Activation(\"relu\", name=\"mp_act_\" + name_suffix)(x)\n      if self.dropout_rate > 0:\n        x = Dropout(self.dropout_rate, name=\"drop_\" + name_suffix)(x)\n\n    if x.shape.as_list()[1] > 1:\n      x = Flatten(name=\"flatten\")(x)\n      x = Dense(\n          self.nb_classes,\n          kernel_initializer=self.kernel_initializer,\n          bias_initializer=self.bias_initializer,\n          name=\"dense\")(x)\n      x = Activation(\"softmax\", name=\"softmax\")(x)\n    else:\n      x = Conv2D(\n          self.nb_classes, 1, strides=1, padding=\"same\",\n          kernel_initializer=self.kernel_initializer,\n          bias_initializer=self.bias_initializer,\n          name=\"dense\")(x)\n      x = Activation(\"softmax\", name=\"softmax\")(x)\n      x = Flatten(name=\"flatten\")(x)\n\n    model = Model(inputs=[x_in], outputs=[x])\n\n    return model\n\nif __name__ == \"__main__\":\n  import os\n\n  os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n  os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n\n  model = ConvBlockNetwork(\n      shape=(64, 64, 1),\n      nb_classes=10,\n      kernel_size=3,\n      filters=[16, [32]*3, 48, 64, 128],\n      dropout_rate=0.0,\n      with_maxpooling=False,\n      with_batchnorm=True,\n      use_separable=\"mobilenet\",\n      use_xnornet_trick=True\n  ).build()\n\n  model.summary()\n"
  },
  {
    "path": "qkeras/autoqkeras/examples/run/plot_history.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Plots history of runs when running in scheduler mode.\"\"\"\n\nimport glob\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfilenames = glob.glob(\"log_*.csv\")\nfilenames.sort()\n\nblock_sizes = int(np.ceil(np.sqrt(len(filenames))))\n\nfor i in range(len(filenames)):\n  history = pd.read_csv(filenames[i])\n  title = \"block_\" + str(i)\n  fig = plt.subplot(block_sizes, block_sizes, i + 1, title=title)\n  ax1 = fig\n  ax1.set_xlabel(\"trial\")\n  ax1.set_ylabel(\"score / accuracy\")\n  plt1 = ax1.plot(history[\"score\"], \"ro-\", label=\"score\")\n  plt2 = ax1.plot(history[\"accuracy\"], \"go-\", label=\"accuracy\")\n  plt3 = ax1.plot(history[\"val_accuracy\"], \"bo-\", label=\"val_accuracy\")\n\n  ax2 = ax1.twinx()\n  ax2.set_ylabel(\"energy\", color=\"m\")\n  plt4 = ax2.plot(history[\"trial_size\"], \"mo-\", label=\"trial_size\")\n\n  plts = plt1+plt2+plt3+plt4\n  labs = [l.get_label() for l in plts]\n\n  ax1.legend(plts, labs, loc=0)\nplt.show()\n"
  },
  {
    "path": "qkeras/autoqkeras/forgiving_metrics/__init__.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\nfrom .forgiving_bits import ForgivingFactorBits\nfrom .forgiving_energy import ForgivingFactorPower\nfrom .forgiving_factor import ForgivingFactor\n\nforgiving_factor = {\n    \"bits\": ForgivingFactorBits,\n    \"energy\": ForgivingFactorPower\n}\n"
  },
  {
    "path": "qkeras/autoqkeras/forgiving_metrics/forgiving_bits.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements forgiving factor metrics bit model size in bits.\"\"\"\n\nimport numpy as np\nimport six\nfrom qkeras.autoqkeras.forgiving_metrics.forgiving_factor import ForgivingFactor   # pylint: disable=line-too-long\nfrom qkeras import get_quantizer\n\n\nclass ForgivingFactorBits(ForgivingFactor):\n  \"\"\"Implements forgiving factor with target as number of bits.\"\"\"\n\n  def __init__(\n      self, delta_p, delta_n, rate, stress=1.0,\n      input_bits=8, output_bits=8, ref_bits=8, config=None):\n    self.stress = stress\n    self.input_bits = input_bits\n    self.output_bits = output_bits\n    self.ref_bits = ref_bits\n    self.ref_size = {}\n    self.config = config if config else {}\n\n    super().__init__(delta_p, delta_n, rate)\n\n  def _param_size(self, layer):\n    \"\"\"Computes size of parameters of a layer in bits.\"\"\"\n    t_size = self.ref_bits\n    parameter_size = 0\n    # we only compute parameter sizes for these layers, and BatchNormalization\n    # is a special case because it exports mean and beta that is absorbed by\n    # previous or next layer. As mean and beta will be compressed into a single\n    # value, we actually only need to take care of the shape.\n    if layer.__class__.__name__ in [\n        \"Dense\", \"Conv2D\", \"Conv1D\", \"DepthwiseConv2D\"]:\n      for w in layer.get_weights():\n        parameter_size += t_size * np.prod(w.shape)\n    elif layer.__class__.__name__ in [\n        \"QDense\", \"QConv2D\", \"QConv1D\", \"QDepthwiseConv2D\"]:\n      for i, w in enumerate(layer.get_weights()):\n        if layer.get_quantizers()[i]:\n          bits = layer.get_quantizers()[i].bits\n        else:\n          bits = t_size\n        parameter_size += bits * np.prod(w.shape)\n    elif layer.__class__.__name__ in [\"BatchNormalization\"]:\n      # scale\n      index = -1\n      parameter_size += t_size * np.prod(layer.get_weights()[index].shape)\n      # center (bias)\n      if layer.center:\n        index = int(bool(layer.scale))\n        parameter_size += t_size * np.prod(layer.get_weights()[index].shape)\n    elif layer.__class__.__name__ in [\"QBatchNormalization\"]:\n      # scale\n      index = -1\n      bits = 6\n      parameter_size += bits * np.prod(layer.get_weights()[index].shape)\n      # center (bias)\n      if layer.center:\n        bits = 5\n        index = int(bool(layer.scale))\n        parameter_size += bits * np.prod(layer.get_weights()[index].shape)\n    return parameter_size\n\n  def _act_size(self, layer):\n    \"\"\"Computes size of activations of a layer in bits.\"\"\"\n    i_size = self.input_bits\n    o_size = self.output_bits\n    t_size = self.ref_bits\n    output_size = np.prod(layer.output.shape[1:])\n    # we compute activation sizes for inputs and outputs\n    if layer.__class__.__name__ in [\"InputLayer\"]:\n      return i_size * output_size\n    elif layer.__class__.__name__ in [\n        \"Dense\", \"Conv2D\", \"Conv1D\", \"DepthwiseConv2D\"]:\n      if layer.activation is not None and layer.activation.__name__ != \"linear\":\n        return t_size * output_size\n      else:\n        return 0\n    elif layer.__class__.__name__ in [\n        \"QDense\", \"QConv2D\", \"QConv1D\", \"QDepthwiseConv2D\"]:\n      if layer.activation is None:\n        is_softmax = False\n        is_linear = False\n      else:\n        if isinstance(layer.activation, six.string_types):\n          is_softmax = layer.activation == \"softmax\"\n          is_linear = layer.activation == \"linear\"\n        elif hasattr(layer.activation, \"__name__\"):\n          is_softmax = layer.activation.__name__ == \"softmax\"\n          is_linear = layer.activation.__name__ == \"linear\"\n        else:\n          is_softmax = False\n          is_linear = False\n\n        if is_softmax:\n          bits = o_size\n        elif is_linear:\n          bits = 0\n        else:\n          assert not isinstance(layer.activation, six.string_types)\n          if hasattr(layer.activation, \"bits\"):\n            bits = layer.activation.bits\n          else:\n            bits = t_size\n\n        return bits * np.prod(layer.output.shape.as_list()[1:])\n    elif layer.__class__.__name__ in [\"QActivation\", \"Activation\"]:\n      if isinstance(layer.activation, six.string_types):\n        is_linear = layer.activation == \"linear\"\n        is_softmax = layer.activation == \"softmax\"\n        is_sigmoid = layer.activation == \"sigmoid\"\n      else:\n        is_linear = layer.activation.__name__ == \"linear\"\n        is_softmax = layer.activation.__name__ == \"softmax\"\n        is_sigmoid = layer.activation.__name__ == \"sigmoid\"\n\n      if is_linear:\n        bits = 0\n      elif is_softmax or is_sigmoid:\n        bits = o_size\n      else:\n        if isinstance(layer.activation, six.string_types):\n          activation = get_quantizer(layer.activation)\n        else:\n          activation = layer.activation\n        if hasattr(activation, \"bits\"):\n          bits = activation.bits\n        else:\n          bits = t_size\n      return bits * output_size\n    return 0\n\n  def compute_model_size(self, model):\n    \"\"\"Computes size of model.\"\"\"\n\n    a_size = 0\n    p_size = 0\n    total_size = 0\n    model_size_dict = {}\n    for layer in model.layers:\n      layer_name = layer.__class__.__name__\n      layer_config = self.config.get(\n          layer_name, self.config.get(\"default\", None))\n      if layer_config:\n        parameters = self._param_size(layer)\n        activations = self._act_size(layer)\n        p_weight = (\"parameters\" in layer_config)\n        a_weight = (\"activations\" in layer_config)\n        total = p_weight * parameters + a_weight * activations\n        model_size_dict[layer.name] = {\n            \"parameters\": parameters,\n            \"activations\": activations,\n            \"total\": total\n        }\n        a_size += a_weight * activations\n        p_size += p_weight * parameters\n        total_size += total\n\n    return (total_size, p_size, a_size, model_size_dict)\n\n  def get_reference(self, model):\n    if not hasattr(self, \"reference_size\"):\n      cached_result = self.compute_model_size(model)\n      self.reference_size = cached_result[0] * self.stress\n      self.ref_p = cached_result[1]\n      self.ref_a = cached_result[2]\n      self.reference_size_dict = cached_result[3]\n\n    return self.reference_size\n\n  def get_reference_stats(self):\n    return self.reference_size_dict\n\n  def get_trial(self, model):\n    \"\"\"Computes size of quantization trial.\"\"\"\n\n    result = self.compute_model_size(model)\n    self.trial_size = result[0]\n    self.total_p_bits = result[1]\n    self.total_a_bits = result[2]\n    self.trial_size_dict = result[3]\n\n    return self.trial_size\n\n  def get_total_factor(self):\n    \"\"\"we adjust the learning rate by size reduction.\"\"\"\n    ref_total = self.ref_a + self.ref_p\n    trial_total = self.total_a_bits + self.total_p_bits\n    return (trial_total - ref_total) / ref_total\n\n  def print_stats(self):\n    \"\"\"Prints statistics of current model.\"\"\"\n    str_format = (\n        \"stats: delta_p={} delta_n={} rate={} trial_size={} reference_size={}\\n\"\n        \"       delta={:.2f}%\"\n    )\n\n    print(\n        str_format.format(\n            self.delta_p, self.delta_n, self.rate, self.trial_size,\n            int(self.reference_size), 100*self.delta())\n    )\n\n    a_percentage = np.round(\n        100.0 * (self.total_a_bits - self.ref_a) / self.ref_a, 2)\n    p_percentage = np.round(\n        100.0 * (self.total_p_bits - self.ref_p) / self.ref_p, 2)\n    ref_total = self.ref_a + self.ref_p\n    trial_total = self.total_a_bits + self.total_p_bits\n    total_percentage = np.round(\n        100.0 * (trial_total - ref_total) / ref_total, 2)\n\n    print(\n        (\n            \"       a_bits={}/{} ({:.2f}%) p_bits={}/{} ({:.2f}%)\\n\"\n            \"       total={}/{} ({:.2f}%)\"\n        ).format(\n            int(self.total_a_bits), int(self.ref_a), a_percentage,\n            int(self.total_p_bits), int(self.ref_p), p_percentage,\n            int(trial_total), int(ref_total), total_percentage\n        ))\n"
  },
  {
    "path": "qkeras/autoqkeras/forgiving_metrics/forgiving_energy.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements forgiving factor metrics for energy consumption.\"\"\"\n\nimport json\nimport numpy as np\nfrom qkeras.autoqkeras.forgiving_metrics.forgiving_factor import ForgivingFactor   # pylint: disable=line-too-long\nfrom qkeras.qtools import run_qtools\nfrom qkeras.qtools import settings as qtools_settings\n\n\nclass ForgivingFactorPower(ForgivingFactor):\n  \"\"\"Get Power cost of a given model.\"\"\"\n\n  def __init__(self, delta_p, delta_n, rate, stress=1.0, **kwargs):\n\n    # input parameters:\n    # delta_p, delta_n, rate: same as parent class\n    # stress: stress level to shift reference curve\n    # process: technology process to use in configuration (horowitz, ...)\n    # parameters_on_memory: whether to store parameters in dram, sram, or fixed\n    # activations_on_memory: store activations in dram, sram\n    # min_sram_size: minimum sram size in number of bits\n    # rd_wr_on_io: whether load data from dram to sram (consider sram as a cache\n    #   for dram. If false, we will assume data will be already in SRAM\n    # config_json: if None, use qtools/config_json by default\n    #   define default source quantizers;\n    #   default quantizers for intermediate variables if no quantizer provided\n    #   parameters for energy calculation\n    # source_quantizers: quantizer for model input\n    # trained_model: whether model has been trained already, which is\n    #   needed to compute tighter bounds for qBatchNorm Power estimation.\n    # reference_internal: size to use for weight/bias/activation in\n    #   get_reference energy calculation (int8, fp16, fp32)\n    # reference_accumulator: accumulator and multiplier type in get_reference\n    #   energy calculation\n    # keras_layer_quantizer: quantizer for keras layers in hybrid models\n\n    super().__init__(delta_p, delta_n, rate)\n\n    self.stress = stress\n    # process: horowitz... - must be present in config_json\n    self.process = kwargs.get(\"process\", \"horowitz\")\n    # parameters_on_memory: fixed, sram, dram\n    self.parameters_on_memory = kwargs.get(\n        \"parameters_on_memory\", [\"fixed\"] * 2)\n    # activations_on_memory: sram, dram\n    self.activations_on_memory = kwargs.get(\n        \"activations_on_memory\", [\"dram\"] * 2\n    )\n    self.min_sram_size = kwargs.get(\"min_sram_size\", [0] * 2)\n    # rd_wr_on_io: true/false\n    self.rd_wr_on_io = kwargs.get(\"rd_wr_on_io\", [True] * 2)\n    self.config_json = kwargs.get(\"config_json\", None)\n    self.source_quantizers = kwargs.get(\"source_quantizers\", None)\n    # trained_model: true/false\n    self.trained_model = kwargs.get(\"trained_model\", False)\n    # reference_internal: int8, fp16, fp32\n    self.reference_internal = kwargs.get(\"reference_internal\", \"fp32\")\n    # reference_internal: int8, int16, int32, fp16, fp32\n    self.reference_accumulator = kwargs.get(\"reference_accumulator\", \"fp32\")\n\n    self.reference_size = None\n\n    # energy_dict is a dictionary that lists energy consumption for each layer\n    # format:\n    #  {\n    #     \"layer0_name\":\n    #     {\n    #        \"mem_cost\": 148171,\n    #        \"op_cost\": 0\n    #     },\n    #     \"layer1_name\":\n    #     {\n    #         \"mem_cost\": 179923,\n    #         \"op_cost\": 34\n    #     },\n    #     ...\n    #\n    #     \"total_cost\": 328129\n    #  }\n\n    self.ref_energy_dict = None\n    self.trial_energy_dict = None\n\n    assert self.parameters_on_memory[0] in [\"dram\", \"sram\", \"fixed\"]\n    assert self.parameters_on_memory[1] in [\"dram\", \"sram\", \"fixed\"]\n    assert self.activations_on_memory[0] in [\"dram\", \"sram\", \"fixed\"]\n    assert self.activations_on_memory[1] in [\"dram\", \"sram\", \"fixed\"]\n    assert self.reference_internal in [\"fp16\", \"fp32\", \"int8\"]\n    assert self.reference_accumulator in [\"int16\", \"int32\", \"fp16\", \"fp32\"]\n\n  def get_reference(self, model):\n    # we only want to compute reference once\n    if self.reference_size is not None:\n      return self.reference_size * self.stress\n\n    q = run_qtools.QTools(\n        model, process=self.process,\n        source_quantizers=self.reference_internal,\n        is_inference=self.trained_model,\n        weights_path=None,\n        keras_quantizer=self.reference_internal,\n        keras_accumulator=self.reference_accumulator,\n        for_reference=True)\n\n    energy_dict = q.pe(\n        weights_on_memory=self.parameters_on_memory[0],\n        activations_on_memory=self.activations_on_memory[0],\n        min_sram_size=self.min_sram_size[0],\n        rd_wr_on_io=self.rd_wr_on_io[0])\n\n    self.ref_energy_dict = energy_dict\n    self.reference_size = q.extract_energy_sum(\n        qtools_settings.cfg.include_energy, energy_dict)\n\n    self.reference_energy_profile = q.extract_energy_profile(\n        qtools_settings.cfg.include_energy, energy_dict)\n\n    return self.reference_size * self.stress\n\n  def get_trial(self, model):\n    \"\"\"Computes size of quantization trial.\"\"\"\n\n    q = run_qtools.QTools(\n        model, process=self.process,\n        source_quantizers=self.source_quantizers,\n        is_inference=self.trained_model,\n        weights_path=None,\n        keras_quantizer=self.reference_internal,\n        keras_accumulator=self.reference_accumulator,\n        for_reference=False)\n\n    energy_dict = q.pe(\n        weights_on_memory=self.parameters_on_memory[1],\n        activations_on_memory=self.activations_on_memory[1],\n        min_sram_size=self.min_sram_size[1],\n        rd_wr_on_io=self.rd_wr_on_io[1])\n\n    self.trial_energy_dict = energy_dict\n    # self.trial_size = energy_dict[\"total_cost\"]\n    self.trial_size = q.extract_energy_sum(\n        qtools_settings.cfg.include_energy, energy_dict)\n\n    self.trial_energy_profile = q.extract_energy_profile(\n        qtools_settings.cfg.include_energy, energy_dict)\n\n    return self.trial_size\n\n  def get_total_factor(self):\n    \"\"\"we adjust the learning rate by size reduction.\"\"\"\n    return (self.trial_size - self.reference_size) / self.reference_size\n\n  def get_reference_stats(self):\n    return self.reference_energy_profile\n\n  def get_trial_stats(self):\n    return self.trial_energy_profile\n\n  def print_stats(self, verbosity=0):\n    \"\"\"Prints statistics of current model.\"\"\"\n\n    delta = self.delta()\n\n    if (self.ref_energy_dict and self.trial_energy_dict):\n      str_format = (\n          \"stats: delta_p={} delta_n={} rate={} trial_size={} \"\n          \"reference_size={}\\n\"\n          \"       delta={:.2f}%\"\n      )\n\n      print(\n          str_format.format(\n              self.delta_p, self.delta_n, self.rate, self.trial_size,\n              int(self.reference_size), 100 * delta)\n      )\n\n    if verbosity > 0 and self.ref_energy_dict:\n      print(\"Reference Cost Distribution:\")\n      dict_to_json = json.dumps(self.ref_energy_dict, indent=4)\n      print(dict_to_json)\n\n    if verbosity > 0 and self.trial_energy_dict:\n      print(\"Trial Cost Distribution:\")\n      dict_to_json = json.dumps(self.trial_energy_dict, indent=4)\n      print(dict_to_json)\n\n    if (self.ref_energy_dict and self.trial_energy_dict):\n      print(\"Total Cost Reduction:\")\n      reduction_percentage = np.round(\n          100.0 * (self.trial_size - self.reference_size) /\n          self.reference_size, 2)\n\n      print(\n          (\"       {} vs {} ({:.2f}%)\").format(\n              int(self.trial_size), int(self.reference_size),\n              reduction_percentage\n          ))\n"
  },
  {
    "path": "qkeras/autoqkeras/forgiving_metrics/forgiving_factor.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements forgiving factor metrics.\"\"\"\n\nimport numpy as np\n\n\nclass ForgivingFactor:\n  \"\"\"Base class. Should never be invoked.\"\"\"\n\n  def __init__(self, delta_p, delta_n, rate):\n    self.delta_p = np.float32(delta_p) / 100.0\n    self.delta_n = np.float32(delta_n) / 100.0\n    self.rate = np.float32(rate)\n\n  def get_reference(self, model):\n    \"\"\"Computes reference size of model.\"\"\"\n\n    raise Exception(\"class not implemented.\")\n\n  def get_trial(self, model, schema):\n    \"\"\"Computes size of quantization trial.\"\"\"\n\n    raise Exception(\"class not implemented.\")\n\n  def delta(self):\n    return np.where(\n        self.trial_size < self.reference_size,\n        self.delta_p * (np.log(self.reference_size/self.trial_size) /\n                        np.log(self.rate)),\n        self.delta_n * (np.log(self.reference_size/self.trial_size) /\n                        np.log(self.rate)))\n\n"
  },
  {
    "path": "qkeras/autoqkeras/quantization_config.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Definition of default quantization configuration.\"\"\"\n\ndefault_quantization_config = {\n    \"kernel\": {\n        \"binary\": 1,\n        \"stochastic_binary\": 1,\n        \"ternary\": 2,\n        \"stochastic_ternary\": 2,\n        \"quantized_bits(2,1,1,alpha=1.0)\": 2,\n        \"quantized_bits(4,0,1)\": 4,\n        \"quantized_bits(8,0,1)\": 8,\n        \"quantized_po2(4,1)\": 4\n    },\n    \"bias\": {\n        \"quantized_bits(4,0,1)\": 4,\n        \"quantized_bits(8,3,1)\": 8,\n        \"quantized_po2(4,8)\": 4\n    },\n    \"activation\": {\n        \"binary\": 1,\n        \"binary(alpha='auto_po2')\": 1,\n        \"ternary\": 2,\n        \"quantized_relu(3,1)\": 3,\n        \"quantized_relu(4,2)\": 4,\n        \"quantized_relu(8,2)\": 8,\n        \"quantized_relu(8,4)\": 8,\n        \"quantized_relu(16,8)\": 16,\n        \"quantized_relu_po2(4,4)\": 4\n    },\n    \"linear\": {\n        \"binary\": 1,\n        \"ternary\": 2,\n        \"quantized_bits(4,1)\": 4,\n        \"quantized_bits(8,2)\": 8,\n        \"quantized_bits(16,10)\": 16,\n        \"quantized_po2(6,4)\": 6\n    }\n}\n"
  },
  {
    "path": "qkeras/autoqkeras/tests/test_forgiving_factor.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\nimport pytest\nfrom tensorflow.keras.layers import *   # pylint: disable=wildcard-import\nfrom tensorflow.keras.models import Model\nfrom qkeras import *   # pylint: disable=wildcard-import\nfrom qkeras.autoqkeras.forgiving_metrics import ForgivingFactorBits   # pylint: disable=line-too-long\nfrom qkeras.utils import model_quantize\n\n\ndef get_model():\n  \"\"\"Returns sample model.\"\"\"\n  xi = Input((28, 28, 1), name=\"input\")   # pylint: disable=undefined-variable\n  x = Conv2D(32, 3, strides=1, padding=\"same\", name=\"c1\")(xi)   # pylint: disable=undefined-variable\n  x = BatchNormalization(name=\"b1\")(x)   # pylint: disable=undefined-variable\n  x = Activation(\"relu\", name=\"a1\")(x)   # pylint: disable=undefined-variable\n  x = MaxPooling2D(2, 2, name=\"mp1\")(x)   # pylint: disable=undefined-variable\n  x = QConv2D(32, 3, kernel_quantizer=\"binary\", bias_quantizer=\"binary\",   # pylint: disable=undefined-variable\n              strides=1, padding=\"same\", name=\"c2\")(x)\n  x = QBatchNormalization(name=\"b2\")(x)   # pylint: disable=undefined-variable\n  x = QActivation(\"binary\", name=\"a2\")(x)   # pylint: disable=undefined-variable\n  x = MaxPooling2D(2, 2, name=\"mp2\")(x)   # pylint: disable=undefined-variable\n  x = QConv2D(32, 3, kernel_quantizer=\"ternary\", bias_quantizer=\"ternary\",   # pylint: disable=undefined-variable\n              strides=1, padding=\"same\", activation=\"binary\", name=\"c3\")(x)\n  x = Flatten(name=\"flatten\")(x)   # pylint: disable=undefined-variable\n  x = Dense(1, name=\"dense\", activation=\"softmax\")(x)   # pylint: disable=undefined-variable\n\n  model = Model(inputs=xi, outputs=x)\n\n  return model\n\n\ndef test_forgiving_factor_bits():\n  \"\"\"Tests forgiving factor bits.\"\"\"\n  delta_p = 8.0\n  delta_n = 8.0\n  rate = 2.0\n  stress = 1.0\n  input_bits = 8\n  output_bits = 8\n  ref_bits = 8\n\n  config = {\n      \"QDense\": [\"parameters\", \"activations\"],\n      \"Dense\": [\"parameters\", \"activations\"],\n      \"QConv2D\": [\"parameters\", \"activations\"],\n      \"Conv2D\": [\"parameters\", \"activations\"],\n      \"DepthwiseConv2D\": [\"parameters\", \"activations\"],\n      \"QDepthwiseConv2D\": [\"parameters\", \"activations\"],\n      \"Activation\": [\"activations\"],\n      \"QActivation\": [\"activations\"],\n      \"QBatchNormalization\": [\"parameters\"],\n      \"BatchNormalization\": [\"parameters\"],\n      \"default\": [\"activations\"],\n  }\n\n  model = get_model()\n\n  ffb = ForgivingFactorBits(\n      delta_p, delta_n, rate, stress,\n      input_bits, output_bits, ref_bits,\n      config\n  )\n\n  cached_result = ffb.compute_model_size(model)\n  ref_size = cached_result[0]\n  ref_p = cached_result[1]\n  ref_a = cached_result[2]\n\n  assert ref_size == 258544\n  assert ref_p == 43720\n  assert ref_a == 214824\n\n\ndef test_new_forgiving_factor():\n  \"\"\"Tests forgiving factor.\"\"\"\n  delta_p = 8.0\n  delta_n = 8.0\n  rate = 2.0\n  stress = 1.0\n  input_bits = 8\n  output_bits = 8\n  ref_bits = 8\n\n  config = {\n      \"QDense\": [\"parameters\", \"activations\"],\n      \"Dense\": [\"parameters\", \"activations\"],\n      \"QConv2D\": [\"parameters\", \"activations\"],\n      \"Conv2D\": [\"parameters\", \"activations\"],\n      \"DepthwiseConv2D\": [\"parameters\", \"activations\"],\n      \"QDepthwiseConv2D\": [\"parameters\", \"activations\"],\n      \"Activation\": [\"activations\"],\n      \"QActivation\": [\"activations\"],\n      \"QBatchNormalization\": [\"parameters\"],\n      \"BatchNormalization\": [\"parameters\"],\n      \"default\": [\"activations\"]\n  }\n\n  model = get_model()\n\n  model.use_legacy_config = True\n\n  ffb = ForgivingFactorBits(\n      delta_p, delta_n, rate, stress,\n      input_bits, output_bits, ref_bits,\n      config\n  )\n\n  cached_result = ffb.compute_model_size(model)\n  ref_size = cached_result[0]\n  ref_p = cached_result[1]\n  ref_a = cached_result[2]\n  ref_size_dict = cached_result[3]\n\n  assert ref_size == 258544\n  assert ref_p == 43720\n  assert ref_a == 214824\n\n  q_dict = {\n      \"c1\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"quantized_bits(4)\"\n      }\n  }\n\n  q_model = model_quantize(model, q_dict, 4)\n\n  cached_result = ffb.compute_model_size(q_model)\n  trial_size_dict = cached_result[3]\n\n  for name in trial_size_dict:\n    if name != \"c1\":\n      assert trial_size_dict[name] == ref_size_dict[name]\n  assert trial_size_dict[\"c1\"][\"parameters\"] == 416\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n\n\n\n"
  },
  {
    "path": "qkeras/autoqkeras/utils.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements utility functions for support of auto-quantization.\"\"\"\n\nimport json\nimport tensorflow as tf\n\n\nQ_SEQUENCE_LAYERS = [\"QSimpleRNN\", \"QLSTM\", \"QGRU\", \"QBidirectional\"]\n\ndef print_qmodel_summary(q_model):\n  \"\"\"Prints quantized model summary.\"\"\"\n\n  for layer in q_model.layers:\n    if (layer.__class__.__name__ == \"QActivation\" or\n        layer.__class__.__name__ == \"QAdaptiveActivation\"):\n      print(\"{:20} {}\".format(layer.name, str(layer.activation)))\n    elif (\n        hasattr(layer, \"get_quantizers\") and\n        layer.__class__.__name__ != \"QBatchNormalization\"\n    ):\n      print(\"{:20} \".format(layer.name), end=\"\")\n      if \"Dense\" in layer.__class__.__name__:\n        print(\"u={} \".format(layer.units), end=\"\")\n      elif layer.__class__.__name__ in [\n          \"Conv2D\", \"QConv2D\", \"Conv1D\", \"QConv1D\",\n          \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n        print(\"f={} \".format(layer.filters), end=\"\")\n      quantizers = layer.get_quantizers()\n      for q in range(len(quantizers)):\n        if quantizers[q] is not None:\n          print(\"{} \".format(str(quantizers[q])), end=\"\")\n      if hasattr(layer, \"recurrent_activation\"):\n        print(\"recurrent act={}\".format(layer.recurrent_activation), end=\"\")\n      if (\n          layer.activation is not None and\n          not (\n              hasattr(layer.activation, \"__name__\") and\n              layer.activation.__name__ == \"linear\"\n          )\n      ):\n        print(\"act={}\".format(layer.activation), end=\"\")\n      print()\n    elif layer.__class__.__name__ == \"QBatchNormalization\":\n      print(\"{:20} QBN, mean={}\".format(layer.name,\n          str(tf.keras.backend.eval(layer.moving_mean))), end=\"\")\n      print()\n    elif layer.__class__.__name__ == \"BatchNormalization\":\n      print(\"{:20} is normal keras bn layer\".format(layer.name), end=\"\")\n      print()\n\n  print()\n\n\ndef get_quantization_dictionary(q_model):\n  \"\"\"Returns quantization dictionary.\"\"\"\n\n  q_dict = {}\n  for layer in q_model.layers:\n    if hasattr(layer, \"get_quantization_config\"):\n      q_dict[layer.name] = layer.get_quantization_config()\n\n  return q_dict\n\n\ndef save_quantization_dict(fn, q_model):\n  \"\"\"Saves quantization dictionary as json object in disk.\"\"\"\n  q_dict = get_quantization_dictionary(q_model)\n  json_dict = json.dumps(q_dict)\n\n  f = open(fn, \"w\")\n  f.write(json_dict + \"\\n\")\n  f.close()\n\n"
  },
  {
    "path": "qkeras/b2t.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements total/partial Binary to Thermometer decoder.\"\"\"\n\nfrom tensorflow.keras.utils import to_categorical\nimport numpy as np\n\n\ndef BinaryToThermometer(\n    x, classes, value_range, with_residue=False, merge_with_channels=False,\n    use_two_hot_encoding=False):\n\n  \"\"\"Converts binary to one-hot (with scales).\n\n  Given input matrix x with values (for example) 0, 1, 2, 3, 4, 5, 6, 7, create\n  a number of classes as follows:\n\n  classes=2, value_range=8, with_residue=0\n\n  A true one-hot representation, and the remaining bits are truncated, using\n  one bit representation.\n\n  0 - [1,0] 1 - [1,0] 2 - [1,0] 3 - [1,0]\n  4 - [0,1] 5 - [0,1] 6 - [0,1] 7 - [0,1]\n\n  classes=2, value_range=8, with_residue=1\n\n  In this case, the residue is added to the one-hot class, and the class will\n  use 2 bits (for the remainder) + 1 bit (for the one hot)\n\n  0 - [1,0] 1 - [1.25,0] 2 - [1.5,0] 3 - [1.75,0]\n  4 - [0,1] 5 - [0,1.25] 6 - [0,1.5] 7 - [0,1.75]\n\n  Arguments:\n    x: the input vector we want to convert. typically its dimension will be\n      (B,H,W,C) for an image, or (B,T,C) or (B,C) for for a 1D signal, where\n      B=batch, H=height, W=width, C=channels or features, T=time for time\n      series.\n    classes: the number of classes to (or log2(classes) bits) to use of the\n      values.\n    value_range: max(x) - min(x) over all possible x values (e.g. for 8 bits,\n      we would use 256 here).\n    with_residue: if true, we split the value range into two sets and add\n      the decimal fraction of the set to the one-hot representation for partial\n      thermometer representation.\n    merge_with_channels: if True, we will not create a separate dimension\n      for the resulting matrix, but we will merge this dimension with\n      the last dimension.\n    use_two_hot_encoding: if true, we will distribute the weight between\n      the current value and the next one to make sure the numbers will always\n      be < 1.\n\n  Returns:\n    Converted x with classes with the last shape being C*classes.\n\n  \"\"\"\n\n  # just make sure we are processing floats so that we can compute fractional\n  # values\n\n  x = x.astype(np.float32)\n\n  # the number of ranges are equal to the span of the original values\n  # divided by the number of target classes.\n  #\n  # for example, if value_range is 256 and number of classes is 16, we have\n  # 16 values (remaining 4 bits to redistribute).\n\n  ranges = value_range/classes\n  x_floor = np.floor(x / ranges)\n\n  if use_two_hot_encoding:\n    x_ceil = np.ceil(x / ranges)\n\n  if with_residue:\n    x_mod_f = (x - x_floor * ranges) / ranges\n\n  # convert values to categorical. if use_two_hot_encoding, we may\n  # end up with one more class because we need to distribute the\n  # remaining bits to the saturation class. For example, if we have\n  # value_range = 4 (0,1,2,3) and classes = 2, if we use_two_hot_encoding\n  # we will have the classes 0, 1, 2, where for the number 3, we will\n  # allocate 0.5 to bin 1 and 0.5 to bin 2 (namelly 3 = 0.5 * (2**2 + 2**1)).\n\n  xc_f = to_categorical(x_floor, classes + use_two_hot_encoding)\n\n  if with_residue:\n    xc_f_m = xc_f == 1\n\n    if use_two_hot_encoding:\n      xc_c = to_categorical(x_ceil, classes + use_two_hot_encoding)\n      xc_c_m = xc_c == 1\n      if np.any(xc_c_m):\n        xc_c[xc_c_m] = x_mod_f.reshape(xc_c[xc_c_m].shape)\n      if np.any(xc_f_m):\n        xc_f[xc_f_m] = (1.0 - x_mod_f.reshape(xc_f[xc_f_m].shape))\n      xc_f += xc_c\n    else:\n      if np.any(xc_f_m):\n        xc_f[xc_f_m] += x_mod_f.reshape(xc_f[xc_f_m].shape)\n\n  if merge_with_channels and len(xc_f.shape) != len(x.shape):\n    sz = xc_f.shape\n    sz = sz[:-2] + (sz[-2] * sz[-1],)\n    xc_f = xc_f.reshape(sz)\n\n  return xc_f\n\n"
  },
  {
    "path": "qkeras/base_quantizer.py",
    "content": "# Copyright 2025 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nimport tensorflow.compat.v2 as tf\nimport tensorflow.keras.backend as K\n\n\ndef _create_variable_name(attr_name, var_name=None):\n  \"\"\"Creates variable name.\n\n  Arguments:\n    attr_name: string. attribute name\n    var_name: string. variable name\n\n  Returns:\n    string. variable name\n  \"\"\"\n\n  if var_name:\n    return var_name + \"/\" + attr_name\n\n  # This naming scheme is to solve a problem of a layer having more than\n  # one quantizer can have multiple qnoise_factor variables with the same\n  # name of \"qnoise_factor\".\n  return attr_name + \"_\" + str(K.get_uid(attr_name))\n\n\nclass BaseQuantizer(tf.Module):\n  \"\"\"Base quantizer.\n\n  Defines behavior all quantizers should follow.\n  \"\"\"\n\n  def __init__(self):\n    self.built = False\n\n  def build(self, var_name=None, use_variables=False):\n    if use_variables:\n      if hasattr(self, \"qnoise_factor\"):\n        self.qnoise_factor = tf.Variable(\n            lambda: tf.constant(self.qnoise_factor, dtype=tf.float32),\n            name=_create_variable_name(\"qnoise_factor\", var_name=var_name),\n            dtype=tf.float32,\n            trainable=False,\n        )\n    self.built = True\n\n  def _set_trainable_parameter(self):\n    pass\n\n  def update_qnoise_factor(self, qnoise_factor):\n    \"\"\"Update qnoise_factor.\"\"\"\n    if isinstance(self.qnoise_factor, tf.Variable):\n      # self.qnoise_factor is a tf.Variable.\n      # This is to update self.qnoise_factor during training.\n      self.qnoise_factor.assign(qnoise_factor)\n    else:\n      if isinstance(qnoise_factor, tf.Variable):\n        # self.qnoise_factor is a numpy variable, and qnoise_factor is a\n        # tf.Variable.\n        self.qnoise_factor = qnoise_factor.eval()\n      else:\n        # self.qnoise_factor and qnoise_factor are numpy variables.\n        # This is to set self.qnoise_factor before building\n        # (creating tf.Variable) it.\n        self.qnoise_factor = qnoise_factor\n\n  # Override not to expose the quantizer variables.\n  @property\n  def variables(self):\n    return ()\n\n  # Override not to expose the quantizer variables.\n  @property\n  def trainable_variables(self):\n    return ()\n\n  # Override not to expose the quantizer variables.\n  @property\n  def non_trainable_variables(self):\n    return ()\n"
  },
  {
    "path": "qkeras/bn_folding_utils.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Utility functions for folding batchnorm with qconv/qdense layers.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport networkx as nx\nimport tensorflow as tf\nfrom six.moves import range\nfrom tensorflow.keras.models import clone_model\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras import Input\n\nfrom .qconvolutional import QConv2D\nfrom .qconvolutional import QDepthwiseConv2D\nfrom .qtools import generate_layer_data_type_map as gen_map\nfrom .qtools import qgraph\n\n\ndef convert_folded_layer_to_unfolded(layer):\n  \"\"\"Replace a source batchnorm folded layer with a non-folded layer.\n\n  Args:\n    layer: keras/qkeras layer type. Source layer to be replaced with\n\n  Returns:\n    new layer instance\n  \"\"\"\n\n  # get layer config from the composite layer\n  config = layer.get_config()\n  # set layer config for QConv2D layer by first creating a tmp\n  # QConv2D object and generate template for its config\n  if layer.__class__.__name__ == \"QConv2DBatchnorm\":\n    new_layer = QConv2D(filters=1, kernel_size=(2, 2), use_bias=True)\n  elif layer.__class__.__name__ == \"QDepthwiseConv2DBatchnorm\":\n    new_layer = QDepthwiseConv2D(kernel_size=(2, 2), use_bias=True)\n  else:\n    # TODO(lishanok): will extend to QDense in the future\n    assert ValueError, \"%s is not supported!\" % layer.__class__.__name__\n\n  new_layer_cfg = new_layer.get_config()\n\n  # set qconv2d config according to the values in the composite layer\n  for (key, _) in new_layer_cfg.items():\n    if key in config.keys():\n      new_layer_cfg[key] = config[key]\n\n  # in case use_bias is False in the composite layer,\n  #  we need to set it True because we have folded bias\n  new_layer_cfg[\"use_bias\"] = True\n\n  # create a non-folded, e.g., qconv2d layer from config and replace\n  # old layer with it\n  if layer.__class__.__name__ == \"QConv2DBatchnorm\":\n    new_layer = QConv2D.from_config(new_layer_cfg)\n  elif layer.__class__.__name__ == \"QDepthwiseConv2DBatchnorm\":\n    new_layer = QDepthwiseConv2D.from_config(new_layer_cfg)\n  else:\n    raise ValueError(\"Unsupported layer conversion {}\".format(layer.name))\n\n  return new_layer\n\n\ndef unfold_model(model):\n  \"\"\"Convert a model with batchnorm folded layer to a normal model.\n\n  \"Normal\" here refers to a model without composite folded layer such as\n  QConv2DBatchnorm layer.\n  This function replace the folded layers with a normal QConv/QDense\n  layer. It aslo sets the weights in the normal layer with the folded weights\n  in the folded layer. Model architecture could be either sequential or\n  non-sequential.\n\n  Arguments:\n    model: keras object, model with folded layers.\n\n  Returns:\n    A model that replaces folded layers (e.g., QConv2DBatchnorm) with normal\n      qkeras layers (e.g., QConv2D). This model can be passed on to hardware\n      generator so that hardware doesn't see batch normalization\n      parameters.\n  \"\"\"\n\n  def _convert_folded_layer(layer):\n    if layer.__class__.__name__ in [\n        \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n      new_layer = convert_folded_layer_to_unfolded(layer)\n    else:\n      new_layer = layer.__class__.from_config(layer.get_config())\n\n    new_layer.build(layer.input_shape)\n    return new_layer\n\n  def _clone_weights(src_layer, new_layer):\n    if (src_layer.__class__.__name__ == \"QConv2DBatchnorm\") and (\n        new_layer.__class__.__name__ == \"QConv2D\"):\n      src_weights = src_layer.get_folded_weights()\n      # transfer weights from folded layer to the target layer\n      folded_kernel_quantized = (\n          src_weights[0].numpy())\n      folded_bias_quantized = (\n          src_weights[1].numpy())\n      new_layer.set_weights([folded_kernel_quantized, folded_bias_quantized])\n\n    elif (src_layer.__class__.__name__ == \"QDepthwiseConv2DBatchnorm\") and (\n        new_layer.__class__.__name__ == \"QDepthwiseConv2D\"):\n      # transfer weights from folded layer to the target layer\n      src_weights = src_layer.get_folded_weights()\n      folded_depthwise_kernel_quantized = src_weights[0].numpy()\n      folded_bias_quantized = src_weights[1].numpy()\n      new_layer.set_weights(\n          [folded_depthwise_kernel_quantized, folded_bias_quantized])\n    else:\n      new_layer.set_weights(src_layer.get_weights())\n\n  inp = Input(shape=model.input_shape[1:])\n  cloned_model = clone_model(\n      model, input_tensors=inp, clone_function=_convert_folded_layer)\n\n  # replace weights\n  for (src_layer, new_layer) in zip(model.layers, cloned_model.layers):\n    _clone_weights(src_layer, new_layer)\n\n  return cloned_model\n\n\ndef populate_bias_quantizer_from_accumulator(model, source_quantizers):\n  \"\"\"Populate the bias quantizer from accumulator type.\n\n  When user set bias_quantizer=None for layers(e.g.,\n  QConv2DBatchnorm), this function generates the accumulator type of\n  the layer MAC op and set it as the bias quantizer.\n  Such step is skipped if user provided a specific bias quantizer type.\n\n  Args:\n    model: keras/qkeras model object. If the model doesn't contain any batchnorm\n      folded layer or if the bias quanizer type in the folded layer is already\n      given, no operation needed. Else we generate the bias quantizer type and\n      set it in model.\n\n    source_quantizers: list of qkeras quantizers. A list of quantizer types\n      for model inputs.\n\n  Returns:\n    keras model object\n  \"\"\"\n  default_quantizer = \"quantized_bits(8, 0, 1)\"\n\n  # if source_quantizers is None, CreateGraph will use default_quantizer\n  (graph, source_quantizer_list) = qgraph.CreateGraph(\n      model, source_quantizers, default_quantizer)\n  qgraph.GraphPropagateActivationsToEdges(graph)\n\n  # generate the quantizer types of each layer. For folded layers, if bias\n  # quantizer is not given by user, this function will generate the accumulator\n  # type and set it as the bias quantizer type.\n  is_inference = False\n  keras_quantizer = \"quantized_bits(8, 0, 1)\"\n  keras_accumulator = \"quantized_bits(8, 0, 1)\"\n  for_reference = False\n  layer_map = gen_map.generate_layer_data_type_map(\n      graph, source_quantizer_list, is_inference,\n      keras_quantizer, keras_accumulator, for_reference)\n\n  for layer in model.layers:\n    # TODO(lishanok): extend to other layer types if necessary\n    if layer.__class__.__name__ in [\n        \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n      if not layer.bias_quantizer:\n        # if user didn't specify the bias quantizer, we set it as the\n        # MAC accumulator type of the current layer's MAC operation\n        qtools_bias_quantizer = layer_map[\"layer_data_type_map\"][\n            layer].bias_quantizer\n\n        if tf.is_tensor(qtools_bias_quantizer.int_bits):\n          qtools_bias_quantizer.int_bits = (\n              qtools_bias_quantizer.int_bits.numpy())\n\n        layer.bias_quantizer = (\n            qtools_bias_quantizer.convert_to_qkeras_quantizer())\n\n        layer.bias_quantizer_internal = layer.bias_quantizer\n        if layer.__class__.__name__ == \"QConv2DBatchnorm\":\n          layer.quantizers = [layer.kernel_quantizer_internal,\n                              layer.bias_quantizer_internal]\n        elif layer.__class__.__name__ == \"QDepthwiseConv2DBatchnorm\":\n          layer.quantizers = [layer.depthwise_quantizer_internal,\n                              layer.bias_quantizer_internal]\n  return model\n"
  },
  {
    "path": "qkeras/callbacks.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport os\nimport tensorflow as tf\n\n\nclass QNoiseScheduler(tf.keras.callbacks.Callback):\n  \"\"\"Schedules the gradual quantization noise training for each step (or epoch).\n\n     It updates the qnoise_factor in the quantizers to gradually introduce the\n     quantization noise during training.\n\n     The idea was adopted from \"https://arxiv.org/pdf/1903.01061.pdf\"\n  \"\"\"\n\n  def __init__(self,\n               start,\n               finish,\n               freq_type=\"epoch\",\n               update_freq=1,\n               initial_step_or_epoch=0,\n               exponent=3.0,\n               use_ste=True,\n               log_dir=None):\n    \"\"\"Initializes this QNoiseScheduler.\n\n    Args:\n      start: Int. The step (epoch) to start the gradual training.\n      finish: Int. The step (epoch) to finish the gradual training. When the\n        start and the finish are equal, the qnoise_factor will be 1.0 in the\n        beginning of the training.\n      freq_type: Str. \"step\" or \"epoch\". It sets the qnoise_factor update\n        frequency type.\n      update_freq: Int. Updating frequency of the qnoise_factor.\n      initial_step_or_epoch:  Int. Step or epoch at which to start training.\n      exponent: Float. It is the exponent in the qnoise_factor calculation. It\n        controls the rate of the gradual qnoise_factor change.\n      use_ste: Bool. Whether to use \"straight-through estimator\" (STE) method or\n        not.\n      log_dir: Str. log directory to save qnoise_factor every epoch end.\n    \"\"\"\n    super().__init__()\n\n    self.start = start\n    self.finish = finish\n    if start > finish:\n      raise ValueError(\n          (\"start {} must be greater than finish {}\").format(start, finish))\n    supported_freq_type = [\"step\", \"epoch\"]\n    if freq_type not in supported_freq_type:\n      raise ValueError((\"Invalid frequency type {}. only {} are \"\n                        \"supported.\").format(freq_type, supported_freq_type))\n    self.freq_type = freq_type\n    self.update_freq = update_freq\n    self.initial_step_or_epoch = initial_step_or_epoch\n    self.exponent = exponent\n    self.qnoise_factor = None\n    self.use_ste = use_ste\n    self.quantizers = None\n    self.summary_writer = None\n    if log_dir:\n      self.summary_writer = tf.summary.create_file_writer(log_dir)\n    self.num_iters = np.array(0, dtype=\"int64\")\n\n  def calculate_qnoise_factor(self, freq):\n    \"\"\"Returns calculated qnoise_factor based on the current step (epoch) and\n    the schedule parameters.\n\n    Args:\n      freq: The current step (or epoch) to calculate the qnoise_factor.\n\n    Returns:\n      qnoise_factor : calculated qnoise_factor.\n    \"\"\"\n    if freq < self.start:\n      qnoise_factor = 0.0\n    elif freq <= self.finish and self.start != self.finish:\n      val = float(self.finish - freq) / float(self.finish - self.start)\n      qnoise_factor = 1.0 - np.power(val, self.exponent)\n    else:\n      qnoise_factor = 1.0\n\n    return qnoise_factor\n\n  def set_qnoise_factor(self, quantizer, qnoise_factor):\n    \"\"\"Set self.qnoise_factor and update the qnoise_factor of the quantizer.\"\"\"\n\n    # Updating the qnoise_factor of the quantizer.\n    quantizer.update_qnoise_factor(qnoise_factor)\n    # Updating the qnoise_factor of the callback.\n    self.qnoise_factor = qnoise_factor\n\n  def set_quantizers(self):\n    \"\"\"Set quantizers to update the qnoise_factor.\n\n    This must be called before building the quantizers.\n    \"\"\"\n    for quantizer in self.quantizers:\n      if hasattr(quantizer, \"use_ste\"):\n        quantizer.use_ste = self.use_ste\n      if hasattr(quantizer, \"use_variables\"):\n        quantizer.use_variables = True\n      if hasattr(quantizer, \"built\"):\n        # If the quantizer has been built but not using tf.Variable then it\n        # builds again to create tf.Variables.\n        if quantizer.built and not isinstance(quantizer.qnoise_factor,\n                                                 tf.Variable):\n          quantizer.build(use_variables=True)\n\n      # Set the qnoise_factor to 0.0 to pretrain without quantization.\n      self.set_qnoise_factor(quantizer, qnoise_factor=0.0)\n\n  def get_quantizers(self, model):\n    \"\"\"Returns a list of quantizers with qnoise_factor in the model.\n\n    Args:\n      model: model to get a list of quantizers with qnoise_factor.\n\n    Returns:\n      A list of quantizers with the qnoise_factor variable.\n    \"\"\"\n    all_quantizers = []\n    for layer in model.layers:\n      # A list of attributes holding the quantizer(s).\n      for attr in [\"quantizers\", \"quantizer\"]:\n        if hasattr(layer, attr):\n          quantizers = getattr(layer, attr)\n          quantizers = quantizers if attr == \"quantizers\" else [quantizers]\n          for quantizer in quantizers:\n            if hasattr(quantizer, \"qnoise_factor\"):\n              all_quantizers.append(quantizer)\n\n    return all_quantizers\n\n  def update_qnoise_factor(self, freq):\n    \"\"\"Update the qnoise_factor of the model.\n\n    Args:\n      freq: The current step (epoch) to calculate the qnoise_factor.\n    \"\"\"\n    # Update the qnoise_factor at the frequency of self.update_freq.\n    if freq % self.update_freq != 0:\n      self.num_iters += 1\n      return\n\n    new_qnoise_factor = self.calculate_qnoise_factor(freq)\n    for quantizer in self.quantizers:\n      # Updates the qnoise factors of the quantizers in the model.\n      self.set_qnoise_factor(quantizer, new_qnoise_factor)\n    self.num_iters += 1\n\n  def on_train_begin(self, logs=None):\n    if not self.quantizers:\n      # Build a list of quantizers which is used for updating qnoise_factor.\n      self.quantizers = self.get_quantizers(self.model)\n      self.set_quantizers()\n\n  def on_epoch_begin(self, epoch, logs=None):\n    if self.freq_type == \"epoch\":\n      self.update_qnoise_factor(self.initial_step_or_epoch + self.num_iters)\n\n  def on_epoch_end(self, epoch, logs=None):\n    if self.summary_writer:\n      with self.summary_writer.as_default():\n        tf.summary.scalar(\"qnoise_factor\", data=self.qnoise_factor, step=epoch)\n\n  def on_train_batch_begin(self, batch, logs=None):\n    if self.freq_type == \"step\":\n      self.update_qnoise_factor(self.initial_step_or_epoch + self.num_iters)\n"
  },
  {
    "path": "qkeras/codebook.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\" Clustering based quantizers \"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom sklearn.cluster import KMeans\nfrom tqdm import tqdm\n\n\ndef create_in_out_table(km, quantizer):\n  \"\"\"Create [in, out] table needed to map compressed activations to codebook\n  values. Given v: in_table[out_table[v]] => codebook value of v\n\n  Arguments:\n    km: KMeans model\n    quantizer: quantizer function to apply to out_table\n\n  Returns\n    in_table: conversion of compressed table indexes to n-bit numbers\n    out_table: conversion of n-bit output activations to compressed table\n      indexes\n  \"\"\"\n  in_table = km.cluster_centers_.flatten()\n  qrange = quantizer.range().reshape(-1, 1).astype(np.float32)\n  out_table = km.predict(qrange).ravel()\n  return in_table, out_table\n\n\ndef activation_compression(model, compile_config, activation_indexes, bits,\n                           X_train, y_train, X_test, y_test, sample_size=1.0):\n  \"\"\"This function applies clustering based non-uniform quantization inspired by\n  https://arxiv.org/pdf/1911.02079.pdf\n\n  model: Keras model\n  compile_config: Dictionary of arguments to be passed to model.compile()\n    for all submodels\n  activation_indexes: Index list of layers to be quantized. This will\n    used to split the model and create submodels\n  bits: Number of bits to compress activations to. This will\n    results in 2**bits codebook values\n  X_train, y_train: training data used to fit clustering algorithm\n  X_test, y_test: validation data\n  sample_size:\n    fraction of training data activations to be used when computing\n    codebook values\n\n  Returns:\n    cb_tables: [in, out] tables. See create_in_out_table docs\n    models: list of keras submodels\n    km_models: list of KMeans fitted models\n  \"\"\"\n  assert len(activation_indexes) > 0\n  assert 0.0 < sample_size <= 1.0\n  # n_init=10 maintains the same behavior as legacy versions of sklearn. This\n  # was changed to \"auto\" in sklearn 1.4.\n  km_models = [KMeans(2**bits, n_init=10)] * len(activation_indexes)\n  cb_tables = [[]] * len(activation_indexes)\n  models = []\n  x = x_in = model.layers[0].output\n  for i in range(1, len(model.layers)):\n    layer = model.layers[i]\n    x = layer(x)\n    if i in activation_indexes or i == len(model.layers) - 1:\n      print(\"\\nCreating submodel...\")\n      models.append(Model([x_in], [x]))\n      x = x_in = Input(layer.output[0].shape,\n                       batch_size=layer.output.shape[0],\n                       dtype=layer.output.dtype)\n      models[-1].compile(**compile_config)\n      print(models[-1].summary())\n  print('\\nsample_size: ', sample_size)\n  x = X_train\n  for i, model in enumerate(models[:-1]):\n    print(f'fitting km[{i}]...')\n    x = model.predict(x)\n    km = km_models[i]\n    temp = x.flatten().reshape(-1, 1)\n    if sample_size < 1.0:\n      idxs = np.random.choice(x.shape[0], size=int(sample_size * x.shape[0]))\n      temp = temp[idxs]\n    km.fit(temp)\n    quantizer = getattr(model.layers[-1], 'quantizer',\n                        getattr(model.layers[-1], 'activation'))\n    km.cluster_centers_ = quantizer(km.cluster_centers_).numpy()\n    km.cluster_centers_.sort(axis=0)\n    cb_tables[i] = create_in_out_table(km, quantizer)\n  x = X_test\n  for i, model in enumerate(models[:-1]):\n    x = model.predict(x)\n    km = km_models[i]\n    preds = km.predict(x.flatten().reshape(-1, 1))\n    x = km.cluster_centers_[preds].reshape(x.shape)\n    n_unique = np.unique(x.flatten()).shape[0]\n    print(f\"Number of unique activations: {n_unique}\")\n    assert n_unique <= 2**bits\n\n  print('\\nEvaluating...')\n  models[-1].evaluate(x, y_test, verbose=2)\n  return cb_tables, models, km_models\n\n\ndef weight_compression(weights, bits, axis=0, quantizer=None):\n  \"\"\"Creates an in, out table that maps weight values to their codebook values.\n  Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf\n\n  Arguments:\n    weights: Numpy array\n    bits: Number of bits to compress weights to. This will\n      results in 2**bits codebook values\n    axis: axis to apply quantization by\n    quantizer: quantizer function that will be applied to codebook values\n\n  Returns:\n    index_table: array of indices that maps to codebook values for all weights\n    codebook_table: array of codebook values\n  \"\"\"\n  assert bits <= 8\n  n = 2**bits\n  index_table = []\n  codebook_table = np.zeros((weights.shape[axis], n))\n  km_models = [None] * weights.shape[axis]\n\n  for i, w in tqdm(enumerate(np.split(weights, weights.shape[axis], axis))):\n    original_shape = w.shape\n    w = w.ravel()\n    km = KMeans(n, n_init=10)\n    km.fit(w.reshape(-1, 1))\n    if quantizer:\n      km.cluster_centers_ = quantizer(km.cluster_centers_).numpy()\n    km.cluster_centers_.sort(axis=0)\n\n    km_models[i] = km\n    codebook_table[i, :] = km.cluster_centers_.flatten()\n    preds = km.predict(w.reshape(-1, 1))\n    index_table.append(preds.reshape(original_shape))\n\n  index_table = np.concatenate(index_table, axis)\n  return index_table, codebook_table\n\n\ndef two_tier_embedding_compression(embeddings, bits, quantizer=None):\n  \"\"\" Creates tables that maps embedding values to their codebook values.\n  Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf\n\n  Arguments:\n    weights: Numpy array\n    bits: Number of bits to compress weights to. This will\n      results in 2**bits codebook values\n    quantizer: quantizer function that will be applied to codebook values\n\n  Returns:\n    index_table: array of indices that maps to codebook values\n    cluster_index_table: array that maps each row to the codebook table\n      index\n    codebook_table: array of codebook values\n    quantized_embeddings: Numpy array MxN of quantized weights\n  \"\"\"\n  assert bits <= 8\n  n = 2**bits\n  quantized_embeddings = embeddings.copy()\n  index_table = np.zeros(embeddings.shape, dtype=np.uint8)\n  cluster_index_table = np.zeros(index_table.shape[0], dtype=np.uint8)\n  codebook_table = np.zeros((n, n))\n\n  km1 = KMeans(n, n_init=10)\n  km1.fit(embeddings)\n  tier1 = km1.predict(embeddings)\n\n  km_models = [0] * n\n  block_sizes = [0] * n\n  for block_label in tqdm(range(n)):\n    mask = block_label == tier1\n    indices = np.arange(embeddings.shape[0])[mask]\n    block = embeddings[mask]\n    km2 = KMeans(n, n_init=10)\n    km2.fit(block.flatten().reshape(-1, 1))\n    if quantizer:\n      km2.cluster_centers_ = quantizer(km2.cluster_centers_).numpy()\n    km2.cluster_centers_.sort(axis=0)\n\n    km_models[block_label] = km2\n    codebook_table[block_label, :] = km2.cluster_centers_.flatten()\n    cluster_index_table[indices] = block_label\n    block_sizes[block_label] = block.shape[0]\n    for i in indices:\n      preds = km2.predict(embeddings[i, :].reshape(-1, 1))\n      index_table[indices, :] = preds\n      quantized_embeddings[i, :] = km2.cluster_centers_[preds].flatten()\n  print('block_sizes:', block_sizes)\n  return index_table, cluster_index_table, codebook_table, quantized_embeddings\n"
  },
  {
    "path": "qkeras/estimate.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Definition of quantization package.\"\"\"\n\n# Some parts of the code were taken from\n#\n# https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\n#\n# and follows several papers.\n#\n#    https://arxiv.org/pdf/1609.07061.pdf\n#\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom collections import defaultdict\n\nimport numpy as np\nimport tensorflow.compat.v1 as tf\nfrom absl import logging\n\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import InputLayer\nfrom tensorflow.keras.models import Model\n\nfrom .qlayers import QActivation\nfrom .qlayers import QAdaptiveActivation\nfrom .qlayers import QDense\nfrom .qconvolutional import QConv1D\nfrom .qconvolutional import QConv2D\nfrom .qconvolutional import QDepthwiseConv2D\nfrom .qconvolutional import QSeparableConv2D\nfrom .qpooling import QAveragePooling2D\nfrom .quantizers import quantized_bits\nfrom .quantizers import quantized_relu\nfrom .quantizers import quantized_tanh\nfrom .quantizers import quantized_ulaw\nfrom .bn_folding_utils import unfold_model\nfrom .utils import get_model_sparsity\n\n\ndef analyze_accumulator(in_model, x, verbose=False):\n  \"\"\"Analyzes the distribution of weights to specify size of accumulators.\n\n     Computes the maximum number of bits for the accumulator assuming the\n     inputs have a distribution given by the dictionary x.\n\n     for each output channel i:\n       max_positive_value[i] = sum(w[i]) + bias[i] for the positive weights\n       max_negative_value[i] = sum(w[i]) + bias[i] for the negative weights\n\n     max_value = max(\n            max_positive_value[i] * positive(x) +\n            max_negative_value[i] * negative(x),\n\n         - (max_negative_value[i] * positive(x) +\n            max_positive_value[i] * negative(x))\n     )\n\n     accumulator_size = ceil( log2( max_value ) )\n\n     x right now is a dictionary of the form:\n\n     { layer_name: (min_value, max_value) }\n\n     in the future, we want to provide a sample and compute this automatically\n\n  Arguments:\n    in_model: keras model object, model to be evaluated\n    x: dictionary of the form: { layer_name: (min_value, max_value) }\n       input distribution\n    verbose: boolean, if true, print statistics messages\n\n  Returns:\n    dictionary containing { layer_name: accumulator_size }\n  \"\"\"\n\n  # this function converts a folded model to a \"normal\" model. It replace folded\n  # layers (e.g., QConv2dBatchnorm) layer with qconv2d layer whenever possible.\n  model = unfold_model(in_model)\n\n  acc_sizes = {}\n\n  for layer in model.layers:\n    if (isinstance(layer, QDepthwiseConv2D) or\n        isinstance(layer, QConv2D) or\n        isinstance(layer, QConv1D) or\n        isinstance(layer, QDense)):\n      weights = layer.get_weights()\n      k = weights[0]\n      if layer.use_bias:\n        b = weights[1]\n      else:\n        b = np.zeros((k.shape[-1],), dtype=np.float32)\n\n      all_bits = []\n      nbits = []\n      for i in range(k.shape[1]):\n        # compute sum of positive weights\n        npp = np.sum(k[..., i] * (k[..., i] > 0)) + (b[i] > 0) * b[i]\n\n        # compute sum of negative weights\n        nnn = np.sum(k[..., i] * (k[..., i] < 0)) + (b[i] < 0) * b[i]\n\n        # largest value is\n        #   npp * largest positive - nnn * largest_negative or\n        #   nnn * largest_positive - npp * largest_negative\n\n        x_min = x[layer.name][0]\n        x_max = x[layer.name][1]\n\n        n1 = npp * (x_max > 0) * x_max + nnn * (x_min < 0) * x_min\n        n0 = - (nnn * (x_max > 0) * x_max + npp * (x_min < 0) * x_min)\n\n        if n1 > n0:\n          nbits.append(n1)\n        else:\n          nbits.append(n0)\n\n        all_bits.append((n1, n0))\n\n      max_bits = int(np.ceil(np.log2(max(nbits))))\n      acc_sizes[layer.name] = max_bits\n\n      if verbose:\n        print()\n        print(layer.name, \"- input range:\", x[layer.name])\n        print(\"  max value:\", np.amax(k))\n        print(\"  min value:\", np.amin(k))\n        print(\"  most positive sum:\", np.amax(np.array(all_bits)[:, 0]))\n        print(\"  most negative sum:\", -np.amax(np.array(all_bits)[:, 1]))\n        print(\"  number of bits:\", max_bits)\n\n  if verbose:\n    print()\n\n  return acc_sizes\n\n\ndef analyze_accumulator_from_sample(\n    in_model, x_sample, mode=\"conservative\", verbose=False):\n  \"\"\"Extracts range of inputs of quantized layers from samples.\"\"\"\n\n  # mode is one of \"conservative\", \"sampled\"\n  if mode not in [\"conservative\", \"sampled\"]:\n    ValueError(\"'mode' has to be 'conservative' or 'sampled'\")\n\n  # this function converts a folded model to a \"normal\" model. It replace folded\n  # layers (e.g., QConv2DBatchnorm) layer with qconv2d layer whenever possible.\n  model = unfold_model(in_model)\n\n  # get layer names of quantized layers (QDense and QConv2D)\n  layer_names = [\n      layer.name for layer in model.layers\n      if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or\n          isinstance(layer, QConv1D) or isinstance(layer, QDense))\n  ]\n\n  # sampled mode: just apply x_sample and check the outputs\n\n  if mode == \"sampled\":\n\n    outputs = [\n        layer.output for layer in model.layers\n        if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or\n            isinstance(layer, QConv1D) or isinstance(layer, QDense))\n    ]\n\n    eval_outputs = Model(inputs=model.inputs, outputs=outputs)\n\n    # predict values for all inputs to quantized layers\n\n    values = eval_outputs.predict(x_sample)\n\n    acc_sizes = {}\n\n    for name, value in zip(layer_names, values):\n      max_value = np.amax(np.abs(value))\n      if max_value != 0:\n        acc_sizes[name] = int(np.ceil(np.log2(max_value)))\n      else:\n        acc_sizes[name] = 0\n\n    return acc_sizes\n\n  # get inputs of quantized layers (QDense and QConv2D\n  # we use Activation(\"linear\") to trick keras and tensorflow\n  # to avoid direct connections of inputs and any other\n  # artifacts.\n\n  outputs = [\n      Activation(\"linear\")(layer.input) for layer in model.layers\n      if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or\n          isinstance(layer, QConv1D) or isinstance(layer, QDense))\n  ]\n\n  eval_inputs = Model(inputs=model.inputs, outputs=outputs)\n\n  # predict values for all inputs to quantized layers\n\n  values = eval_inputs.predict(x_sample)\n\n  x_dict = {}\n\n  for name, value in zip(layer_names, values):\n    x_dict[name] = (np.amin(value), np.amax(value))\n\n  return analyze_accumulator(model, x_dict, verbose)\n\n\ndef get_quant_mode(quant):\n  \"\"\"Returns the quantizer mode, number of bits and if it is a signed number.\"\"\"\n\n  #  qb(n)[0] +/-,exp[1] t(-1,0,+1)[2] b(-1,+1)[3] b(0,1)[4]\n  #  entry is tuple:\n  #    (instance name, mode #(above), number of bits (-1 means check class),\n  #     sign bit)\n\n  modes = [\n      # depending on the number of bits, quantized_bits may be 2, 2\n      (\"quantized_bits\", 0, -1, 1),\n      (\"bernoulli\", 4, 1, 0),\n      (\"stochastic_ternary\", 2, 2, 1),\n      (\"ternary\", 2, 2, 1),\n      (\"stochastic_binary\", 3, 1, 1),\n      (\"binary\", 3, 1, 1),\n      # depending on the number of bits, quantized_relu may be 4, 1\n      (\"quantized_relu\", 0, -1, 0),\n      # depending on the number of bits, quantized_tanh may be 2, 2\n      (\"quantized_ulaw\", 0, -1, 1),\n      (\"quantized_tanh\", 0, -1, 1),\n      (\"quantized_po2\", 1, -1, 1),\n      (\"quantized_relu_po2\", 1, -1, 0),\n      (\"float\", 5, 32, 1)\n  ]\n\n  for (inst, mode, bits, sign) in modes:\n    if not quant or getattr(quant, \"__name__\", None) == \"linear\":\n      # if quantizer not specified or linear, we use float type\n      if inst == \"float\":\n        return (mode, bits, sign)\n\n    elif quant.__class__.__name__ == inst:\n      if bits == -1:\n        bits = int(quant.bits)\n        if (\n            isinstance(quant, quantized_bits) or\n            isinstance(quant, quantized_tanh) or\n            isinstance(quant, quantized_ulaw)):\n          if bits == 2 and int(quant.integer) == 1:\n            mode = 2\n        elif isinstance(quant, quantized_relu):\n          if bits == 1 and int(quant.integer) == 1:\n            mode = 4\n      return (mode, bits, sign)\n  raise ValueError(\"Quantizer {} Not Found\".format(quant))\n\n\ndef get_operation_type(layer, output_cache):\n  \"\"\"Checks quantizers around layer and weights to get operation type.\n\n  Determines operator strenght according to the following table.\n                                      x\n                     qb(n)   +/-,exp  t(-1,0,+1) b(-1,+1) b(0,1) float\n      qb(n)            *     << >>,-     ?,-       ?,-       ?    *\n      +/-,exp        << >>,-   +         ?,-        ^      ?,-    *\n    w t(-1,0,+1)      ?,-     ?,-        ?,^       ?,^      ^     *\n      b(-1,+1)        ?,-      ^         ?,^        ^       ^     *\n      b(0,1)           ?      ?,-         ^         ^       ^     *\n      float           *        *          *        *        *     *\n\n  Arguments:\n    layer: layer in Keras to determine the operation strength.\n    output_cache: cache of input tensor bit sizes.\n\n  Returns:\n    One of \"mult\", \"fmult\", \"adder\", \"barrel\", \"mux\", \"xor\".\n    Note: \"mult\" represents quantized bit multiplier, \"fmult\" represents\n          floating point multiplier.\n  \"\"\"\n\n  wx_table = [\n      [\"mult\", \"barrel\", \"mux\", \"mux\", \"mux\", \"fmult\"],\n      [\"barrel\", \"adder\", \"mux\", \"xor\", \"mux\", \"fmult\"],\n      [\"mux\", \"mux\", \"mux\", \"mux\", \"xor\", \"fmult\"],\n      [\"mux\", \"xor\", \"mux\", \"xor\", \"xor\", \"fmult\"],\n      [\"mux\", \"mux\", \"xor\", \"xor\", \"xor\", \"fmult\"],\n      [\"fmult\", \"fmult\", \"fmult\", \"fmult\", \"fmult\", \"fmult\"],\n  ]\n\n  # check if this is a quantized layers (QDense, QConv, QDepthwise)\n  if hasattr(layer, \"get_quantizers\"):\n    w_quant = layer.get_quantizers()[0]\n    w_mode, w_bits, w_sign = get_quant_mode(w_quant)\n    if w_mode == \"float\":\n      logging.warning(\"%s kernel is unquantized!\", layer.name)\n\n    # for the input, get tensor input and search the cache that associates\n    # the quantizer with a tensor\n    if output_cache.get(layer.input.experimental_ref(), None) is not None:\n      x_mode, x_bits, x_sign = get_quant_mode(\n          output_cache.get(layer.input.experimental_ref()))\n      if x_mode == \"float\":\n        logging.warning(\"%s input is unquantized!\", layer.name)\n    else:\n      print(\"cannot determine presently model for {}\".format(layer.name))\n      return \"null\", (w_mode, -1), (w_bits, -1), (w_sign, -1)\n    mode = wx_table[w_mode][x_mode]\n    return mode, (w_mode, x_mode), (w_bits, x_bits), (w_sign, x_sign)\n\n  raise ValueError(\"Cannot find suitable quantization candidates for {}\".format(\n      layer.name))\n\n\ndef create_activation_cache(model):\n  \"\"\"Creates an activation cache for the tensors of a model.\"\"\"\n\n  input_quantizer = quantized_relu(8, 0)\n\n  output_cache = {}\n\n  # If using a Sequential model, the input layer is hidden. Therefore, add the\n  # input quantization to the cache if the first layer is not an input layer\n  if not isinstance(model.layers[0], InputLayer):\n    output_cache[model.layers[0].input.experimental_ref()] = input_quantizer\n\n  # cache graph tensors' activations\n\n  for l in model.layers:\n    output_cache[l.output.experimental_ref()] = l\n    if isinstance(l, QActivation) or isinstance(l, QAdaptiveActivation) :\n      output_cache[l.output.experimental_ref()] = l.quantizer\n    elif isinstance(l, InputLayer):\n      # assume the input is 8-bit positive value\n      output_cache[l.output.experimental_ref()] = input_quantizer\n    elif l.__class__.__name__ in [\n        \"QDense\", \"QConv2D\", \"QConv1D\", \"QDepthwiseConv2D\"\n    ]:\n      output_cache[l.output.experimental_ref()] = l.activation\n    else:\n      if isinstance(l.input, list):\n        # right now, we just get the first one - we assume this is the leading\n        # one.\n        all_q = [\n            output_cache.get(l.input[i].experimental_ref())\n            for i in range(len(l.input))\n        ]\n        q = all_q[0]\n      else:\n        q = output_cache.get(l.input.experimental_ref(), None)\n      output_cache[l.output.experimental_ref()] = q\n      if q is None:\n        raise ValueError(\"Unknown operation in {}\".format(l.name))\n\n  return output_cache\n\n\ndef extract_model_operations(in_model):\n  \"\"\"Determines types of operations for convolutions.\"\"\"\n\n  model = unfold_model(in_model)\n  cache_q = create_activation_cache(model)\n  cache_o = {}\n\n  operations = {}\n\n  for layer in model.layers:\n\n    if layer.__class__.__name__ == \"InputLayer\":\n      continue\n\n    if isinstance(layer.input, list):\n      input_shape = [\n          cache_o.get(layer.input[i].experimental_ref(),\n                      layer.input[i].get_shape())\n          for i in range(len(layer.input))\n      ]\n    else:\n      input_shape = cache_o.get(layer.input.experimental_ref(),\n                                layer.input.get_shape())\n\n    # Check if the inputs are a list of Dimensions\n    if isinstance(input_shape, list):\n      # Iterate though all of the input shapes and extract the dimension values\n      for i, dim in enumerate(input_shape):\n        if isinstance(dim[0], tf.Dimension):\n          shape = [None]\n          for j in range(1, len(dim)):\n            shape.append(dim[j] if isinstance(dim[j], int) else dim[j].value)\n          input_shape[i] = tuple(shape)\n\n    output_shape = layer.compute_output_shape(input_shape)\n\n    cache_o[layer.output.experimental_ref()] = output_shape\n\n    if layer.__class__.__name__ not in [\"QDense\", \"QConv2D\", \"QConv1D\",\n                                        \"QDepthwiseConv2D\", \"QSeparableConv1D\",\n                                        \"QSeparableConv2D\"]:\n      continue\n\n    if layer.__class__.__name__ in [\"QConv2D\"]:\n\n      _, _, _, channels_i = input_shape\n\n      _, height_o, width_o, channels_o = output_shape\n\n      weight = layer.get_weights()[0]\n\n\n      kernel_h, kernel_w, _, _ = weight.shape\n\n      number_of_operations = (\n          height_o * width_o * channels_o * kernel_h * kernel_w * channels_i)\n\n      number_of_weights = (kernel_h * kernel_w * channels_o * channels_i)\n\n      number_of_bias = 0\n      if len(layer.get_weights()) > 1:\n        number_of_bias = layer.get_weights()[1].shape[0]\n\n      weight_quant, bias_quant = layer.get_quantizers()\n      weight_type = get_quant_mode(weight_quant)\n      bias_type = get_quant_mode(bias_quant)\n\n      if weight_type[0] == \"float\":\n        logging.warning(\"%s kernel is unquantized!\", layer.name)\n      if bias_type[0] == \"float\":\n        logging.warning(\"%s bias is unquantized!\", layer.name)\n\n    elif layer.__class__.__name__ in [\"QConv1D\"]:\n\n      _, _, channels_i = input_shape\n\n      _, time_o, channels_o = output_shape\n\n      weight = layer.get_weights()[0]\n\n      kernel_length, _, _ = weight.shape\n\n      number_of_operations = (\n          time_o * channels_o * kernel_length * channels_i)\n\n      number_of_weights = (kernel_length * channels_o * channels_i)\n      number_of_bias = 0\n      if len(layer.get_weights()) > 1:\n        number_of_bias = layer.get_weights()[1].shape[0]\n\n      weight_quant, bias_quant = layer.get_quantizers()\n      weight_type = get_quant_mode(weight_quant)\n      bias_type = get_quant_mode(bias_quant)\n\n      if weight_type[0] == \"float\":\n        logging.warning(\"%s kernel is unquantized!\", layer.name)\n      if bias_type[0] == \"float\":\n        logging.warning(\"%s bias is unquantized!\", layer.name)\n\n    elif layer.__class__.__name__ in [\"QDepthwiseConv2D\"]:\n\n      _, _, _, channels_i = input_shape\n\n      _, height_o, width_o, channels_o = output_shape\n\n      weight_1 = layer.get_weights()[0]\n\n      kernel_h, kernel_w, _, _ = weight_1.shape\n\n      number_of_operations = (\n          kernel_h * kernel_w * height_o * width_o * channels_i)\n\n      number_of_weights = (kernel_h * kernel_w * channels_o * channels_i)\n\n      number_of_bias = 0\n      if len(layer.get_weights()) > 1:\n        number_of_bias = layer.get_weights()[1].shape[0]\n\n      weight_quant, bias_quant = layer.get_quantizers()\n      weight_type = get_quant_mode(weight_quant)\n      bias_type = get_quant_mode(bias_quant)\n\n      if weight_type[0]  == \"float\":\n        logging.warning(\"%s kernel is unquantized!\", layer.name)\n      if bias_type[0]  == \"float\":\n        logging.warning(\"%s bias is unquantized!\", layer.name)\n\n    elif layer.__class__.__name__ in [\"QSeparableConv1D\"]:\n\n      _, _, channels_i = input_shape\n\n      _, time_o, channels_o = output_shape\n\n      weight_1 = layer.get_weights()[0]\n\n      kernel_length, _, _ = weight_1.shape\n\n      number_of_operations = (\n          kernel_length * time_o * channels_i + \n          time_o * channels_o)\n\n      number_of_weights = [\n        kernel_length * channels_i, \n        channels_o * channels_i]\n\n      number_of_bias = 0\n      if len(layer.get_weights()) > 2:\n        number_of_bias = layer.get_weights()[2].shape[0]\n\n      depthwise_quant, pointwise_quant, bias_quant = layer.get_quantizers()\n      depthwise_type = get_quant_mode(depthwise_quant)\n      pointwise_type = get_quant_mode(pointwise_quant)\n      weight_type = [depthwise_type, pointwise_type]\n      bias_type = get_quant_mode(bias_quant)\n\n      if depthwise_type[0] == \"float\":\n        logging.warning(\"%s depthwise kernel is unquantized!\", layer.name)\n      if pointwise_type[0] == \"float\":\n        logging.warning(\"%s pointwise kernel is unquantized!\", layer.name)\n      if bias_type[0] == \"float\":\n        logging.warning(\"%s bias is unquantized!\", layer.name)\n\n    elif layer.__class__.__name__ in [\"QSeparableConv2D\"]:\n\n      _, _, _, channels_i = input_shape\n\n      _, height_o, width_o, channels_o = output_shape\n\n      weight_1 = layer.get_weights()[0]\n\n      kernel_h, kernel_w, _, _ = weight_1.shape\n\n      number_of_operations = (\n          kernel_h * kernel_w * height_o * width_o * channels_i + \n          height_o * width_o * channels_o)\n\n      number_of_weights = [\n        kernel_h * kernel_w * channels_i,\n        channels_o * channels_i]\n\n      number_of_bias = 0\n      if len(layer.get_weights()) > 2:\n        number_of_bias = layer.get_weights()[2].shape[0]\n\n      depthwise_quant, pointwise_quant, bias_quant = layer.get_quantizers()\n      depthwise_type = get_quant_mode(depthwise_quant)\n      pointwise_type = get_quant_mode(pointwise_quant)\n      weight_type = [depthwise_type, pointwise_type]\n      bias_type = get_quant_mode(bias_quant)\n\n      if depthwise_type[0] == \"float\":\n        logging.warning(\"%s depthwise kernel is unquantized!\", layer.name)\n      if pointwise_type[0] == \"float\":\n        logging.warning(\"%s pointwise kernel is unquantized!\", layer.name)\n      if bias_type[0] == \"float\":\n        logging.warning(\"%s bias is unquantized!\", layer.name)\n\n    elif layer.__class__.__name__ in [\"QDense\"]:\n\n      # Find the input and output shapes out of all possible dimensions.\n      # Usually the first shape dimension will be the batch size, and the second\n      # shape dimension will be the number of channels. However, if the\n      # Dense layer is in Squeeze-and-Excite, the first shape dimension\n      # will be the batch size, the second and third shape dimension will be the\n      # spatial sizes (should both be 1), and the fourth shape dimensions will\n      # be the number of channels\n      ishape = np.array([i for i in input_shape if i is not None])\n      assert sum(ishape > 1) == 1, \"Tensor shape has multiple >1 size dims\"\n      size_i = np.max(ishape)\n\n      oshape = np.array([i for i in output_shape if i is not None])\n      assert sum(oshape > 1) == 1, \"Tensor shape has multiple >1 size dims\"\n      size_o = np.max(oshape)\n\n      number_of_operations = int(size_i * size_o)\n\n      number_of_weights = size_i * size_o\n      number_of_bias = 0\n      if len(layer.get_weights()) > 1:\n        number_of_bias = layer.get_weights()[1].shape[0]\n\n      weight_quant, bias_quant = layer.get_quantizers()\n      weight_type = get_quant_mode(weight_quant)\n      bias_type = get_quant_mode(bias_quant)\n\n      if weight_type[0] == \"float\":\n        logging.warnings(\"%s kernel is unquantized!\", layer.name)\n      if bias_type[0] == \"float\":\n        logging.warnings(\"%s bias is unquantized!\", layer.name)\n\n    # \"number_of_operations\" is tensor_shape.Dimension type\n    operations[layer.name] = {\n        \"type\":\n            get_operation_type(layer, cache_q),\n        \"number_of_operations\":\n            number_of_operations if isinstance(number_of_operations, int) else\n            number_of_operations.value,\n        \"number_of_weights\":\n            number_of_weights,\n            # if isinstance(number_of_weights, int) else number_of_weights.value,\n        \"number_of_bias\":\n            number_of_bias,\n            # if isinstance(number_of_bias, int) else number_of_bias.value,\n        \"type_of_weights\":\n            weight_type,\n        \"type_of_bias\":\n            bias_type,\n    }\n\n  return operations\n\n\ndef print_qstats(model):\n  \"\"\"Prints quantization statistics for the model.\"\"\"\n\n  model_ops = extract_model_operations(model)\n\n  ops_table = defaultdict(lambda: 0)\n\n  print(\"\")\n  print(\"Number of operations in model:\")\n  for name in sorted(model_ops):\n    mode, _, sizes, signs = model_ops[name][\"type\"]\n    number = model_ops[name][\"number_of_operations\"]\n    sign = \"s\" if sum(signs) > 0 else \"u\"\n    op_name = sign + mode + \"_\" + str(sizes[0]) + \"_\" + str(sizes[1])\n    ops_table[op_name] += number\n    print(\"    {:30}: {:5} ({})\".format(str(name), str(number), str(op_name)))\n\n  print(\"\")\n  print(\"Number of operation types in model:\")\n  for key in sorted(ops_table.keys()):\n    if ops_table[key] > 0:\n      print(\"    {:30}: {}\".format(key, ops_table[key]))\n\n  print(\"\")\n  print(\"Weight profiling:\")\n  total_bits = 0\n  for name in sorted(model_ops):\n    weight_type = model_ops[name][\"type_of_weights\"]\n    n_weights = model_ops[name][\"number_of_weights\"]\n    if isinstance(weight_type, list):\n      for i, (w_type, w_number) in enumerate(zip(weight_type, n_weights)):\n        _, w_sizes, _ = w_type\n        total_bits += w_number * w_sizes\n        print(\"    {:30} : {:5} ({}-bit unit)\".format(\n            str(name) + \"_weights_\" + str(i), str(w_number), str(w_sizes)))\n    else:\n      _, w_sizes, _ = weight_type\n      total_bits += n_weights * w_sizes\n      print(\"    {:30} : {:5} ({}-bit unit)\".format(\n          str(name) + \"_weights\", str(n_weights), str(w_sizes)))\n    _, b_sizes, _ = model_ops[name][\"type_of_bias\"]\n    b_number = model_ops[name][\"number_of_bias\"]\n    total_bits += b_number * b_sizes\n    print(\"    {:30} : {:5} ({}-bit unit)\".format(\n        str(name) + \"_bias\", str(b_number), str(b_sizes)))\n  print(\"    \" + (\"-\"*40))\n  print(\"    {:30} : {:5}\".format(\"Total Bits\", total_bits))\n\n  print(\"\")\n  print(\"Weight sparsity:\")\n  total_sparsity, per_layer = get_model_sparsity(model, per_layer=True)\n  for layer in per_layer:\n    print(\"    {:30} : {:.4f}\".format(str(layer[0]), layer[1]))\n  print(\"    \" + (\"-\"*40))\n  print(\"    {:30} : {:.4f}\".format(\"Total Sparsity\", total_sparsity))\n"
  },
  {
    "path": "qkeras/experimental/quantizers/__init__.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Exports experimental quantizers.\"\"\"\n\nimport tensorflow as tf\n\nfrom qkeras.experimental.quantizers.quantizers_po2 import quantized_bits_learnable_po2  \nfrom qkeras.experimental.quantizers.quantizers_po2 import quantized_bits_msqe_po2  \n\n__version__ = \"0.9.0\"\n"
  },
  {
    "path": "qkeras/experimental/quantizers/quantizers_po2.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Power-of-2 quantizers based on https://arxiv.org/pdf/2210.03671.pdf.\n\n  Example usages:\n  < MSQE-based quantizer >\n    Default (using the second moments MSQE optimization and the outlier mask):\n      quantized_bits_msqe_po2(bits=4)\n    Per-channel quantization:\n      quantized_bits_msqe_po2(bits=4, scale_axis=3, per_channel_scale=True)\n\n  < Gradient-based (learnable) quantizer >\n    Default (using the MSQE round (Round-to-Lower-MSQE)):\n      quantized_bits_learnable_po2(bits=4)\n    Per-channel quantization:\n      quantized_bits_learnable_po2(bits=4, scale_axis=3, per_channel_scale=True)\n    Relu activation (the MSQE round is not supported for non-variable tensors):\n      quantized_bits_learnable_po2(bits=4, keep_negative=False,\n      use_second_moments_msqe_opt=False, use_po2_scale_msqe_round=False)\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport re\nimport numpy as np\nfrom six.moves import range\nimport tensorflow as tf\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.layers import Layer\n\n\ndef _update_ema_variable(variable, new_val, ema_decay, is_initialized,\n                         should_update):\n  \"\"\"Updates exponentially moving average (EMA) of a tf.Variable.\n\n    This function directly updates the variable.\n\n  Args:\n    variable: A tf.Variable to be updated.\n    new_val: A tensor with a new value to update 'variable'. Its shape is same\n      as 'variable'.\n    ema_decay: A scalar python float or tensor. EMA decay factor.\n    is_initialized: A scalar tensor indicating whether 'variable' has been\n      initialized or not.\n    should_update: A scalar python bool or tensor indicating whether to update\n      'variable' or not.\n  \"\"\"\n  if not tf.is_tensor(should_update):\n    should_update = tf.convert_to_tensor(should_update)\n\n  val_to_update = ema_decay * variable + (1.0 - ema_decay) * new_val\n  val_to_update = tf.cond(is_initialized, lambda: val_to_update,\n                          lambda: new_val)\n  val_to_update = tf.cond(should_update, lambda: val_to_update,\n                          lambda: variable)\n  variable.assign(val_to_update)\n\n\ndef _get_scaling_axis(scale_axis, len_axis):\n  \"\"\"Gets the axis to perform scaling with.\n\n  Args:\n    scale_axis: an integer scalar tensor or None to get which axis to calculate\n      scale from. If None, the scaling axis is set based on the image data\n      format.\n    len_axis: an integer scalar tensor of the dimension of the tensor to be\n      quantized.\n\n  Returns:\n    A list of axes to be quantized together.\n  \"\"\"\n\n  if scale_axis is not None:\n    axis = list(range(scale_axis))\n    axis += list(range(scale_axis + 1, len_axis))\n  else:\n    if K.image_data_format() == \"channels_last\":\n      axis = list(range(len_axis - 1))\n    else:\n      axis = list(range(1, len_axis))\n  return axis\n\n\ndef _get_msqe_scale(x,\n                    q,\n                    scale_axis=None,\n                    per_channel_scale=True,\n                    msqe_weight=None):\n  \"\"\"Gets scaling factor for scaling the tensor per channel.\n\n  It uses a linear least squares method to find the scaling factor.\n  (https://en.wikipedia.org/wiki/Linear_least_squares)\n\n  Args:\n     x: A tensor object. Its elements are in float.\n     q: A tensor object. Its elements are in quantized format of x.\n     scale_axis: which axis to calculate scale from\n     per_channel_scale: A bool. Whether to perform per-channel scaling or not.\n     msqe_weight: A tensor object or None. Its elements are in float, which are\n     used to perform weighted least squares optimization. If None, it performs\n     non-weighted least squares optimization.\n\n  Returns:\n    A scaling factor tensor or scalar for scaling tensor per channel or per\n    layer.\n  \"\"\"\n  # in different tensorflow version (e.g., 2.4)\n  # x.shape is a tuple which doesn't have as_list() method\n  try:\n    x_shape = x.shape.as_list()\n  except AttributeError:\n    x_shape = list(x.shape)\n\n  len_axis = len(x_shape)\n\n  if msqe_weight is not None:\n    sqrt_msqe_weight = tf.math.sqrt(msqe_weight)\n    x = tf.math.multiply(x, sqrt_msqe_weight)\n    q = tf.math.multiply(q, sqrt_msqe_weight)\n\n  if not per_channel_scale:\n    qx = K.mean(q * x, keepdims=True)\n    qq = K.mean(q * q, keepdims=True)\n  else:\n    if len_axis > 1:\n      axis = _get_scaling_axis(scale_axis, len_axis)\n      qx = K.mean(tf.math.multiply(q, x), axis=axis, keepdims=True)\n      qq = K.mean(tf.math.multiply(q, q), axis=axis, keepdims=True)\n    else:\n      # No summing (averaging) along the channel axis to get per-channel\n      # scales.\n      qx = tf.math.multiply(q, x)\n      qq = tf.math.multiply(q, q)\n\n  scale = qx / (qq + K.epsilon())\n\n  # Rounds the exponent to the nearest integer for power-of-2 scale.\n  return K.pow(2.0, tf.math.rint(K.log(scale + K.epsilon()) / np.log(2.0)))\n\n\nclass BaseQuantizerPO2(Layer):  # pylint: disable=invalid-name\n  \"\"\"This is the base class from which all power-of-2 quantizers inherit, which\n  is based on the reference paper (https://arxiv.org/pdf/2210.03671.pdf).\n\n  Attributes:\n    bits: Integer, number of bits to perform quantization.\n    keep_negative: Boolean, if true, it keeps negative values and sets the\n      quantization levels symmetrically around 0. If false, negative numbers is\n      clipped to 0.\n    scale_axis: Integer, which axis to calculate scale from.\n    per_channel_scale: Boolean, whether to perform per-channel (true) or\n      per-layer (false) quantization.\n    init_scale: Float or None, initial scale factor to initialize the scale with\n      (if None, it will be initialized based on the first inputs.).\n    use_second_moments_msqe_opt: Bool, whether to use the second moments based\n      MSQE optimization or not. The second moments is used as a weighting factor\n      to calculate the quantization error.\n    second_moments_ema_decay: Float, EMA decay factor for the second moments\n      update.\n    use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight.\n    use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask.\n    use_stable_scale_exponent: Bool, whether to use exponentially moving\n      averaged (\"stable\") scale exponent or not. Note: there is a tf.Variable\n        (self.switch_to_stable_scale) that controls when to apply the stable\n        scale exponent (i.e., if use_stable_scale_exponent is true and\n        self.switch_to_stable_scale is false, the stable scale exponent is\n        updated but not used.).\n    stable_scale_ema_decay: Float, EMA decay factor for the stable scale update.\n    is_gradient_based: Bool, whether to optimize the scale_exponent from the\n      gradients or not (i.e, if true, self.scale_exponent is set to be\n      \"trainable\".)\n  \"\"\"\n\n  def __init__(self,\n               bits=4,\n               keep_negative=True,\n               scale_axis=None,\n               per_channel_scale=False,\n               init_scale=None,\n               use_second_moments_msqe_opt=False,\n               second_moments_ema_decay=0.999,\n               use_sqrt_of_msqe_weight=True,\n               use_outlier_mask_msqe_weight=True,\n               use_stable_scale_exponent=False,\n               stable_scale_ema_decay=0.99,\n               is_gradient_based=True,\n               **kwargs):\n\n    self.bits = bits\n    self.keep_negative = keep_negative\n    self.scale_axis = scale_axis\n    self.per_channel_scale = per_channel_scale\n    self.init_scale = init_scale\n    self.use_second_moments_msqe_opt = use_second_moments_msqe_opt\n    self.second_moments_ema_decay = second_moments_ema_decay\n    self.use_sqrt_of_msqe_weight = use_sqrt_of_msqe_weight\n    self.use_outlier_mask_msqe_weight = use_outlier_mask_msqe_weight\n    self.use_stable_scale_exponent = use_stable_scale_exponent\n    self.stable_scale_ema_decay = stable_scale_ema_decay\n    self.is_gradient_based = is_gradient_based\n    self.alpha = \"auto_po2\"\n\n    # scale exponent to be learned.\n    self.scale_exponent = None\n    # Stores the power-of-2 scale factor used for quantization.\n    self.scale = None\n    # Axes to perform reduce sum (mean) operation.\n    self.reduce_axes = None\n    # Running averaged gradient variances of the input\n    self.msqe_weight = None\n    # A knob to switch to \"stable_scale_exponent\".\n    self.switch_to_stable_scale = None\n    # variable holding the running averaged scale exponent\n    self.stable_scale_exponent = None\n    # Indicator variable whether to update stable_scale_exponent or not. This\n    # can be used as an indicator whether it is in training or not.\n    self.should_update_stable_scale_exponent = None\n    # The assignments from \"kwargs\" are to restore from the config.\n    # The maximum quantization level of negative numbers.\n    self.qn = kwargs.pop(\"qn\") if \"qn\" in kwargs else None\n    # The maximum quantization level of positive numbers.\n    self.qp = kwargs.pop(\"qp\") if \"qp\" in kwargs else None\n    # Axes scaled together.\n    self.scaled_axes = kwargs.pop(\n        \"scaled_axes\") if \"scaled_axes\" in kwargs else None\n\n    super().__init__(**kwargs)\n\n  def build(self, input_shape):\n    \"\"\"Creates and initializes variables.\"\"\"\n    # Number of quantization levels.\n    levels = tf.math.pow(2.0, tf.cast(self.bits, dtype=tf.float32)) - 1\n\n    # Sets the number of quantization levels for the negative and positive\n    # ranges.\n    if self.keep_negative:\n      # Sets them symmetric about 0 to reduce the quantization induced bias.\n      self.qn = float((levels - 1.0) / 2.0)\n      self.qp = float((levels - 1.0) / 2.0)\n    else:\n      self.qn = 0.0\n      self.qp = float(levels)\n\n    if self.init_scale is None:\n      init_scale_exponent = 0.0\n      init_scale = 1.0\n    else:\n      init_scale = self.init_scale + K.epsilon()\n      init_scale_exponent = tf.math.log(init_scale) / tf.math.log(2.0)\n\n    if self.scale_axis is None:\n      self.scale_axis = self._get_scale_axis(input_shape)\n\n    self.scaled_axes = self._get_scaled_axes(self.scale_axis, input_shape)\n\n    if self.per_channel_scale:\n      scale_exponent_shape = tf.TensorShape([\n          input_shape[i] if i == self.scale_axis else 1\n          for i in range(len(input_shape))\n      ])\n    else:\n      scale_exponent_shape = [1 for i in range(len(input_shape))]\n\n    # Creates the scale exponent variable to be learned.\n    self.scale_exponent = tf.Variable(\n        lambda: tf.constant(\n            init_scale_exponent, shape=scale_exponent_shape, dtype=tf.float32),\n        trainable=self.is_gradient_based,\n        synchronization=tf.VariableSynchronization.ON_READ,\n        aggregation=tf.compat.v1.VariableAggregation.MEAN,\n        name=\"scale_exponent\")\n\n    # \"self.scale\" is not a trainable variable which gets assigned not learned.\n    self.scale = tf.Variable(\n        lambda: tf.constant(\n            init_scale, shape=scale_exponent_shape, dtype=tf.float32),\n        trainable=False,\n        synchronization=tf.VariableSynchronization.ON_READ,\n        aggregation=tf.compat.v1.VariableAggregation.MEAN,\n        name=\"scale\")\n\n    self.reduce_axes = [\n        i for i in range(len(self.scale_exponent.shape))\n        if self.scale_exponent.shape[i] == 1\n    ]\n\n    if self.use_second_moments_msqe_opt:\n      msqe_weight_shape = tf.TensorShape(\n          [1 if s is None else s for s in input_shape])\n      self.msqe_weight = tf.Variable(\n          lambda: tf.ones(shape=msqe_weight_shape),\n          trainable=False,\n          dtype=tf.float32,\n          name=\"msqe_weight\")\n\n    if self.use_stable_scale_exponent:\n      self.stable_scale_exponent = tf.Variable(\n          lambda: tf.zeros_like(self.scale_exponent),\n          dtype=tf.float32,\n          trainable=False,\n          synchronization=tf.VariableSynchronization.ON_READ,\n          aggregation=tf.compat.v1.VariableAggregation.MEAN,\n          name=\"stable_scale_exponent\")\n      self.switch_to_stable_scale = tf.Variable(\n          False, trainable=False, name=\"switch_to_stable_scale\")\n      self.should_update_stable_scale_exponent = tf.Variable(\n          False, trainable=False, name=\"should_update_stable_scale_exponent\")\n\n    # Inidicator variable for initializing variables (e.g, the scale exponent\n    # etc.).\n    self.is_initialized = tf.Variable(\n        False, trainable=False, name=\"is_initialized\")\n\n  def call(self, inputs, msqe_weight=None):\n    \"\"\"Returns a fake quantized tensor of 'inputs'.\n\n    Args:\n      inputs: A tensor to be fake quantized.\n      msqe_weight: A tensor which is used in the scale optimization to weight\n        the MSQE (Mean Squared Quantization Error) of individual input elements.\n        Its shape is same as 'inputs' and its dtype is `float32` If None, it\n        will be set by \"self._get_msqe_weight\" (this should be left as None\n        unless you explicitly assign its value in a different way.).\n\n    Returns:\n      A tensor of fake quantized input. Its shape is same as 'inputs' and its\n      dtype is `float32`.\n    \"\"\"\n    if not self.keep_negative:\n      # Quantize only positive values (e.g. relu activation).\n      inputs = tf.keras.activations.relu(inputs)\n\n    if self.use_second_moments_msqe_opt:\n      return self._update_second_moments_msqe_weight(\n          self._quantize(inputs, msqe_weight=msqe_weight), inputs)\n\n    return self._quantize(inputs, msqe_weight=msqe_weight)\n\n  def _quantize(self, inputs, msqe_weight=None):\n    \"\"\"Returns (fake) quantized inputs and optimizes the scaling factor.\n\n    Args:\n      inputs: A tensor to be fake quantized and used in optimizing the scaling\n        factor.\n      msqe_weight: A tensor or None, which is used in the MSQE optimizations.\n\n    Returns:\n      A tensor of fake quantized inputs.\n    \"\"\"\n    # Initialize self.scale_exponent (it is initialized only once).\n    self._initialize_scale_exponent(inputs)\n\n    scale = self._get_scale(inputs, msqe_weight=msqe_weight)\n\n    if self.use_stable_scale_exponent:\n      # Only outputs the stable scale when 'self.switch_to_stable_scale' is set\n      # to true, which is false by default.\n      scale = self._get_stable_scale(scale)\n\n    # Stores the scaling factors used for quantization.\n    self.scale.assign(scale)\n\n    # Perform rounding.\n    inputs_rounded = self._round_quant(inputs / scale)\n\n    # Perform clipping.\n    inputs_clipped = self._clip_quant(inputs_rounded)\n    inputs_quant = scale * inputs_clipped\n\n    # Update initialization indicator.\n    self.is_initialized.assign(True)\n\n    return inputs_quant\n\n  @tf.custom_gradient\n  def _update_second_moments_msqe_weight(self, input_quantized, inputs):\n    \"\"\"Updates the second moments of the gradients respect to the inputs.\n\n    Args:\n      input_quantized: A tensor which is the output from 'self._quantize' method\n        (fake quantized input).\n      inputs: A tensor which is the input to 'self._quantize' method.\n\n    Returns:\n      'input_quantized', the upstream gradient of 'input_quantized', and the\n      gradients (zeros) of 'inputs'\n    \"\"\"\n\n    def grad(upstream_grad):\n      \"\"\"Calculates and updates the second moments of the gradients.\"\"\"\n      # Get a mask for clipped inputs (i.e., 1.0 for rounded inputs and\n      # 0.0 for clipped inputs). self.scale is the previously used scaling\n      # factors.\n      clip_error_mask = self._get_clipped_inputs_mask(inputs, self.scale)\n      # Calculate the second moments of the gradients respect to 'inputs' that\n      # is clip_error_mask * upstream_grad.\n      second_moments = clip_error_mask * upstream_grad * upstream_grad\n\n      # Update the second moments\n      _update_ema_variable(\n          self.msqe_weight,\n          second_moments,\n          self.second_moments_ema_decay,\n          self.is_initialized,\n          should_update=True)\n      return upstream_grad, tf.zeros_like(inputs)\n\n    return input_quantized, grad\n\n  @abc.abstractmethod\n  def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None):\n    \"\"\"Returns power-of-2 scaling factors for quantization.\n\n    Args:\n      inputs: A tensor to be used to optimize the scale value.\n      reduce_axes: A list of axes to be summed (averaged) over.\n      msqe_weight: A tensor which is used in scale optimization to weight the\n        MSQE (Mean Squared Quantization Error) of individual input elements. Its\n        shape is same as 'inputs' and its dtype is `float32`.\n\n    Returns:\n      A tensor of power-of-2 scaling factors. Its shape is same as\n      'self.scale_exponent' and its dtype is `float32`.\n    \"\"\"\n    raise NotImplementedError\n\n  @abc.abstractmethod\n  def _get_init_scale_exponent(self, inputs):\n    \"\"\"Returns a scale exponent tensor to initialize \"self.scale_exponent\".\n\n    Args:\n      inputs: A tensor to be used to calculate initial scale exponent values.\n\n    Returns:\n      A tensor of scale exponent. Its shape is same as 'self.scale_exponent' and\n      its dtype is `float32`.\n    \"\"\"\n    raise NotImplementedError\n\n  @abc.abstractmethod\n  def _get_outlier_mask(self, inputs):\n    \"\"\"Returns a tensor to suppress outliers in the input for MSQE optimizations.\n\n    Args:\n      inputs: A tensor to be used to generate the outlier mask.\n\n    Returns:\n      A tensor to mask out the outliers of the inputs. Its shape is same as\n      'inputs' and its dtype is `float32`.\n    \"\"\"\n    raise NotImplementedError\n\n  def _get_msqe_weight(self, inputs=None):\n    \"\"\"Returns weighting factors for MSQE optimizations.\n\n    Args:\n      inputs: A tensor to be used to generate the outlier mask.\n\n    Returns:\n      A tensor to be used as weighting factors for MSQE optimizations or None.\n      Note: it is assumed that when 'None' is returned, no weighting factors\n      will be applied for MSQE optimizations.\n\n    Raises:\n      ValueError: if 'inputs' is None when self.use_outlier_mask_msqe_weight is\n      True.\n    \"\"\"\n    if self.use_outlier_mask_msqe_weight and inputs is None:\n      raise ValueError(\n          f\"inputs must not be None if self.use_outlier_mask_msqe_weight is\"\n          f\" True.\")\n\n    if self.msqe_weight is None:\n      # Only returns the outlier mask\n      return self._get_outlier_mask(\n          inputs) if self.use_outlier_mask_msqe_weight else None\n\n    msqe_weight = self.msqe_weight\n\n    if self.use_sqrt_of_msqe_weight:\n      # To use square rooted msqe_weight\n      msqe_weight = tf.math.sqrt(msqe_weight)\n\n    if self.use_outlier_mask_msqe_weight:\n      # Returns the outlier mask modulated msqe_weight\n      msqe_weight = msqe_weight * self._get_outlier_mask(inputs)\n\n    return msqe_weight\n\n  def _get_stable_scale(self, scale):\n    \"\"\"Updates and returns power-of-2 'stable' scaling factors.\n\n    It updates the exponential moving average (EMA) of the scale exponent when\n    self.should_update_stable_scale_exponent is true and\n    self.switch_to_stable_scale is false, and returns scaling factor based on\n    the stable (EMAed) scale exponent when self.switch_to_stable_scale is set\n    true else returns passed-in 'scale'.\n\n    Args:\n      scale: A tensor of power-of-2 scaling factors.\n\n    Returns:\n      A tensor of power-of-2 scaling factors.\n    \"\"\"\n    # Freezes updating exponential moving average of self.stable_scale_exponent\n    # when self.should_update_stable_scale_exponent is false or\n    # self.switch_to_stable_scale is set True.\n    should_update = tf.logical_and(self.should_update_stable_scale_exponent,\n                                   not self.switch_to_stable_scale)\n    # Update the stable (EMAed) scale exponent.\n    # Note: when 'self.is_initialized' is false, 'self.stable_scale_exponent' is\n    # assigned with the scale exponent of the 'scale' input otherwise it is\n    # updated with exponential moving average.\n    stable_scale = self._update_stable_scale_exponent(scale, should_update,\n                                                      self.is_initialized)\n    # Use the stable scale only when self.switch_to_stable_scale is set True.\n    scale = tf.cond(self.switch_to_stable_scale, lambda: stable_scale,\n                    lambda: scale)\n    return scale\n\n  def _update_stable_scale_exponent(self, scale, should_update, is_initialized):\n    \"\"\"Updates and returns stable (EMAed) power-of-2 scaling factors.\n\n    It performs exponential moving average on the scale exponent, not on the\n    scale itself.\n\n    Args:\n      scale: a tensor to be used to update exponential moving average of scale\n        exponents.\n      should_update: A bool. Whether to update exponential moving average of\n        scale exponents.\n      is_initialized: A bool. Whether to initialize the stable scale exponent.\n\n    Returns:\n      A tensor of (stable) power-of-2 scaling factors\n    \"\"\"\n    scale_exponent = self._get_po2_scale_exponent(scale)\n    _update_ema_variable(\n        self.stable_scale_exponent,\n        scale_exponent,\n        ema_decay=self.stable_scale_ema_decay,\n        is_initialized=is_initialized,\n        should_update=should_update)\n    return tf.math.pow(2.0, tf.math.rint(self.stable_scale_exponent))\n\n  def _initialize_scale_exponent(self, inputs):\n    \"\"\"Initializes the scale exponent only once.\n\n    It only initializes 'self.scale_exponent' once when there is no preset\n    initial scaling factor (i.e., self.init_scale is None).\n\n    Args:\n      inputs: A tensor, where the initial scale exponent is based on.\n    \"\"\"\n    update_cond = tf.math.logical_and(not self.is_initialized,\n                                      self.init_scale is None)\n    scale_exponent_to_init = tf.cond(\n        update_cond,\n        lambda: tf.stop_gradient(self._get_init_scale_exponent(inputs)),\n        lambda: self.scale_exponent)\n    self.scale_exponent.assign(scale_exponent_to_init)\n\n  def _get_clipped_inputs_mask(self, inputs, scale):\n    \"\"\"Returns a tensor to mask out the clipped inputs.\n\n    The mask has 1.0 for the rounded inputs and 0.0 for the clipped inputs.\n\n    Args:\n      inputs: A tensor to get the clipping mask from.\n      scale: A tensor of the scaling factor.\n\n    Returns:\n      A tensor to mask out the clipped inputs.\n    \"\"\"\n    inputs_rounded = tf.math.rint(inputs / scale)\n    clip_error_mask = tf.math.logical_and(\n        tf.less_equal(inputs_rounded, self.qp),\n        tf.greater_equal(inputs_rounded, -self.qn))\n    return tf.cast(clip_error_mask, tf.float32)\n\n  def _get_scale_axis(self, input_shape):\n    \"\"\"Returns the scaling axis based on the input shape.\n\n    Args:\n      input_shape: a tuple of integers which is the size of the input channels.\n\n    Returns:\n      A scalar value.\n    \"\"\"\n    if K.image_data_format() == \"channels_last\":\n      scale_axis = (len(input_shape) - 1) if len(input_shape) else 0\n    else:\n      scale_axis = 1 if input_shape[0] is None else 0\n    return scale_axis\n\n  def _get_scaled_axes(self, scale_axis, input_shape):\n    \"\"\"Returns the axes scaled together.\n\n    Args:\n      scale_axis: an integer of the scaling axis.\n      input_shape: a tuple of integers which is the size of the input channels.\n\n    Returns:\n      A list of integers.\n    \"\"\"\n    if self.per_channel_scale:\n      scaled_axes = list(range(scale_axis))\n    else:\n      scaled_axes = list(range(len(input_shape)))\n    return scaled_axes\n\n  def _clip_quant(self, inputs):\n    \"\"\"Returns clipped inputs (scale-normalized) by the quantization levels.\n\n    Args:\n      inputs: A tensor (scale-normalized input value).\n\n    Returns:\n      A tensor clipped by the quantization levels.\n    \"\"\"\n    return tf.minimum(tf.maximum(inputs, -self.qn), self.qp)\n\n  def _round_quant(self, inputs):\n    \"\"\"Returns rounded inputs using a straight-through estimator (STE).\n\n    Args:\n      inputs: A tensor to be rounded.\n\n    Returns:\n      A tensor through a straight-through estimator.\n    \"\"\"\n    return inputs + tf.stop_gradient(-inputs + tf.math.rint(inputs))\n\n  def _simple_quantize(self, inputs, scale, should_return_q=False):\n    \"\"\"Returns quantized inputs without a straight-through estimator (STE).\n\n    Args:\n      inputs: A tensor to be quantized.\n      scale: A tensor of the scaling factor.\n      should_return_q: if true, quantized inputs in integer will be also\n        returned.\n\n    Returns:\n      A tensor of fake quantized inputs (, a tensor of quantized inputs)\n    \"\"\"\n    inputs_rounded = tf.math.rint(inputs / scale)\n    inputs_clipped = self._clip_quant(inputs_rounded)\n    if should_return_q:\n      return scale * inputs_clipped, inputs_clipped\n    else:\n      return scale * inputs_clipped\n\n  def _get_po2_scale(self, scale):\n    \"\"\"Returns power-of-2 constrained scaling factors.\n\n    Args:\n      scale: A tensor to be power-of-2 constrained.\n\n    Returns:\n      A tensor (power-of-2 constrained scaling factor).\n    \"\"\"\n    return tf.math.pow(2.0, self._get_po2_scale_exponent(scale))\n\n  def _get_po2_scale_exponent(self, scale):\n    \"\"\"Returns power-of-2 constrained scale exponent.\n\n    Args:\n      scale: A tensor to get power-of-2 scale exponent from.\n\n    Returns:\n      A tensor constrained to be in integer values.\n    \"\"\"\n    scale_exponent = tf.math.log(scale + K.epsilon()) / tf.math.log(2.0)\n    return tf.round(scale_exponent)\n\n  def _calculate_msqe(self, x, xq, reduce_axes=None, msqe_weight=None):\n    \"\"\"Returns the mean squared quantization error (MSQE).\n\n    Args:\n      x: a tensor of the original inputs.\n      xq: a tensor of the fake quantized inputs.\n      reduce_axes: A list of axes to be summed (averaged) over or None. If None,\n        self.reduce_axes will be used.\n      msqe_weight: A tensor or None. If None, no weighting is applied in the\n        MSQE calculation.\n\n    Returns:\n      A tensor of the MSQE\n    \"\"\"\n    if reduce_axes is None:\n      reduce_axes = self.reduce_axes\n    msqe = tf.math.pow(x - xq, 2.0)\n    if msqe_weight is not None:\n      msqe *= msqe_weight\n    return tf.reduce_sum(msqe, axis=reduce_axes, keepdims=True)\n\n  def _calculate_msqe_inputs(self,\n                             inputs,\n                             scale,\n                             reduce_axes=None,\n                             msqe_weight=None):\n    \"\"\"Returns the mean squared quantization error (MSQE) of the inputs.\n\n    Args:\n      inputs: a tensor to calculate the MSQE from.\n      scale: a tensor to scale (quantize) the input with.\n      reduce_axes: A list of axes to be summed (averaged) over or None. If None,\n        self.reduce_axes will be used.\n      msqe_weight: A tensor or None. If None, no weighting is applied in the\n        MSQE calculation.\n\n    Returns:\n      A tensor of the MSQE\n    \"\"\"\n    inputs_quant = self._simple_quantize(inputs, scale)\n    return self._calculate_msqe(\n        inputs, inputs_quant, reduce_axes=reduce_axes, msqe_weight=msqe_weight)\n\n  def _least_squares_msqe_scale(self,\n                                inputs,\n                                scale,\n                                reduce_axes=None,\n                                msqe_weight=None,\n                                num_lls_iters=3,\n                                should_return_msqe=False):\n    \"\"\"Returns power-of-2 scaling factors from linear least squares regression.\n\n    Args:\n      inputs: a tensor to optimize the scaling factor from.\n      scale: a tensor to be used as initial  quantize the input with.\n      reduce_axes: A list of axes to be summed (averaged) over or None. If None,\n        self.reduce_axes will be used.\n      msqe_weight: A tensor or None. If None, no weighting is applied in the\n        linear least squares regression.\n      num_lls_iters: An integer. Number of linear least squares regression\n        iterations.\n      should_return_msqe: A bool. Whether to return the MSQE of the inputs.\n\n    Returns:\n      A tensor of power-of-2 scaling factors (, a tensor of the MSQE)\n    \"\"\"\n    if reduce_axes is None:\n      reduce_axes = self.reduce_axes\n\n    best_scale = tf.identity(scale)\n    xq, q = self._simple_quantize(inputs, best_scale, should_return_q=True)\n    best_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)\n\n    for _ in range(num_lls_iters):\n      # performs linear least squares regression\n      new_scale = _get_msqe_scale(\n          x=inputs,\n          q=q,\n          scale_axis=self.scale_axis,\n          per_channel_scale=self.per_channel_scale,\n          msqe_weight=msqe_weight)\n      xq, q = self._simple_quantize(inputs, new_scale, should_return_q=True)\n      new_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)\n\n      # Update the best scale and the best msqe\n      best_scale = tf.where(new_msqe < best_msqe, new_scale, best_scale)\n      best_msqe = tf.where(new_msqe < best_msqe, new_msqe, best_msqe)\n\n    if should_return_msqe:\n      return best_scale, best_msqe\n    else:\n      return best_scale\n\n  def _line_search_msqe_scale(self,\n                              inputs,\n                              scale,\n                              reduce_axes=None,\n                              msqe_weight=None,\n                              line_search_range=6,\n                              should_return_msqe=False):\n    \"\"\"Returns power-of-2 scaling factors from line search.\n\n    Args:\n      inputs: a tensor to optimize the scaling factor from.\n      scale: a tensor to be used as initial  quantize the input with.\n      reduce_axes: A list of axes to be summed (averaged) over or None. If None,\n        self.reduce_axes will be used.\n      msqe_weight: A tensor or None. If None, no weighting is applied in the\n        line search.\n      line_search_range: An integer. Search range of the line search.\n      should_return_msqe: A bool. Whether to return the MSQE of the inputs.\n\n    Returns:\n      A tensor of power-of-2 scaling factors (, a tensor of the MSQE)\n    \"\"\"\n    if reduce_axes is None:\n      reduce_axes = self.reduce_axes\n\n    best_scale = tf.identity(scale)\n    xq = self._simple_quantize(inputs, best_scale)\n    best_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)\n    best_scale_exponent = self._get_po2_scale_exponent(best_scale)\n\n    # PO2 exponent search offsets\n    end_range = line_search_range // 2 + 1\n    po2_exponent_offsets = [i for i in range(-end_range+1,end_range) if i != 0]\n    for exp_offset in po2_exponent_offsets:\n      # Optimize scale\n      new_scale = tf.math.pow(2.0, best_scale_exponent + exp_offset)\n      xq = self._simple_quantize(inputs, new_scale)\n      new_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)\n      # Update the best scale and msqe\n      best_scale = tf.where(new_msqe < best_msqe, new_scale, best_scale)\n      best_msqe = tf.where(new_msqe < best_msqe, new_msqe, best_msqe)\n\n    if should_return_msqe:\n      return best_scale, best_msqe\n    else:\n      return best_scale\n\n  def _optimize_msqe_scale(self,\n                           inputs,\n                           scale,\n                           reduce_axes=None,\n                           msqe_weight=None,\n                           num_lls_iters=None,\n                           should_line_search=True,\n                           line_search_range=None):\n    \"\"\"Returns optimized power-of-2 scaling factors.\n\n    It performs an iterative linear least squares regression and an optional\n    line search to find optimal power-of-2 scaling factors for the given inputs\n    from the initial scaling factors ('scale').\n\n    Args:\n      inputs: a tensor to find optimal power-of-2 scaling factors for.\n      scale: a tensor to be used as initial scaling factors.\n      reduce_axes: A list of axes to be summed (averaged) over or None. If None,\n        self.reduce_axes will be used.\n      msqe_weight: A tensor or None. If None, no weighting is applied in the\n        optimizations.\n      num_lls_iters: An integer. Number of linear least squares regression\n        iterations.\n      should_line_search: A bool. Whether to perform a line search.\n      line_search_range: An integer. Search range of the line search.\n\n    Returns:\n      A tensor of power-of-2 scaling factors, A tensor of the MSQE\n    \"\"\"\n    if reduce_axes is None:\n      reduce_axes = self.reduce_axes\n    if num_lls_iters is None:\n      num_lls_iters = self.num_lls_iters\n    if line_search_range is None:\n      line_search_range = self.line_search_range\n\n    scale, msqe = self._least_squares_msqe_scale(\n        inputs,\n        scale,\n        reduce_axes=self.reduce_axes,\n        msqe_weight=msqe_weight,\n        num_lls_iters=num_lls_iters,\n        should_return_msqe=True)\n\n    if should_line_search:\n      scale, msqe = self._line_search_msqe_scale(\n          inputs,\n          scale,\n          reduce_axes=self.reduce_axes,\n          msqe_weight=msqe_weight,\n          line_search_range=line_search_range,\n          should_return_msqe=True)\n\n    # Having an additional '_get_po2_scale' is just to make sure returning\n    # scaling factors are in power-of-2.\n    return self._get_po2_scale(scale), msqe\n\n  def max(self):\n    \"\"\"Returns the maximum value that the quantizer can represent.\"\"\"\n    if hasattr(self, \"is_initialized\") and self.is_initialized.numpy():\n      return self._get_scale() * self.qp\n    else:\n      return 1.0\n\n  def min(self):\n    \"\"\"Returns the minimum value that the quantizer can represent.\"\"\"\n    if self.keep_negative:\n      if hasattr(self, \"is_initialized\") and self.is_initialized.numpy():\n        return self._get_scale() * (-self.qn)\n      else:\n        return -1.0\n    else:\n      return 0.0\n\n\nclass quantized_bits_learnable_po2(BaseQuantizerPO2):  # pylint: disable=invalid-name\n  \"\"\"Quantizes the number to a number of bits by learnable scale factors.\n  For more details, see https://arxiv.org/abs/2210.03671.\n\n  The implementation was inspired by \"TRAINED QUANTIZATION THRESHOLDS FOR\n  ACCURATE AND EFFICIENT FIXED-POINT INFERENCE OF DEEP NEURAL NETWORKS\"\n  (https://arxiv.org/pdf/1903.08066.pdf).\n\n  Attributes:\n    bits: Integer, number of bits to perform quantization.\n    keep_negative: Boolean, if true, it keeps negative values and sets the\n      quantization levels symmetrically around 0. If false, negative numbers is\n      clipped to 0.\n    scale_axis: Integer, which axis to calculate scale from.\n    per_channel_scale: Boolean, whether to perform per-channel (true) or\n      per-layer (false) quantization.\n    init_scale: Float or None, initial scale factor to initialize the scale with\n      (if None, it will be initialized based on the first inputs.).\n    use_second_moments_msqe_opt: Bool, whether to use the second moments based\n      MSQE optimization or not.\n    second_moments_ema_decay: Float, EMA decay factor for the second moments\n      update.\n    use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight.\n    use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask.\n    use_stable_scale_exponent: Bool, whether to use exponentially moving\n      averaged (\"stable\") scale exponent or not. Note: there is a tf.Variable\n        (self.switch_to_stable_scale) that controls when to apply the stable\n        scale exponent (i.e., if use_stable_scale_exponent is true and\n        self.switch_to_stable_scale is false, the stable scale exponent is\n        updated but not used.).\n    stable_scale_ema_decay: Float, EMA decay factor for the stable scale update.\n    min_init_scale: float or None. minimum initial scale value. If None, the\n      initial scale value is not bounded by a minimum value. It is useful to\n      prevent zero initial scale value for inputs with all zeros (e.g., bias).\n    use_po2_scale_ceil: Bool, whether to use ceil function for constraining\n      power-of-2 scale exponents. If false, round function is used instead.\n    use_po2_scale_msqe_round: Bool, whether to use MSQE rounding function for\n      constraining power-of-2 scale exponents. Note: MSQE rounding has\n        precedence over ceil and round function.\n  \"\"\"\n\n  def __init__(self,\n               bits=4,\n               keep_negative=True,\n               scale_axis=None,\n               per_channel_scale=False,\n               init_scale=None,\n               use_second_moments_msqe_opt=True,\n               second_moments_ema_decay=0.999,\n               use_sqrt_of_msqe_weight=True,\n               use_outlier_mask_msqe_weight=True,\n               use_stable_scale_exponent=False,\n               stable_scale_ema_decay=0.99,\n               min_init_scale=0.00001,\n               use_po2_scale_ceil=True,\n               use_po2_scale_msqe_round=True,\n               **kwargs):\n\n    self.min_init_scale = min_init_scale\n    self.use_po2_scale_ceil = use_po2_scale_ceil\n    self.use_po2_scale_msqe_round = use_po2_scale_msqe_round\n\n    # An indicator variable to control usage of MSQE rounding function, which is\n    # set to true by default (i.e, if use_po2_scale_msqe_round is true, MSQE\n    # rounding is used by default based on self.switch_to_msqe_round.). It can\n    # be used to delay using MSQE rounding.\n    self.switch_to_msqe_round = None\n\n    super().__init__(\n        bits=bits,\n        keep_negative=keep_negative,\n        scale_axis=scale_axis,\n        per_channel_scale=per_channel_scale,\n        init_scale=init_scale,\n        use_second_moments_msqe_opt=use_second_moments_msqe_opt,\n        second_moments_ema_decay=second_moments_ema_decay,\n        use_sqrt_of_msqe_weight=use_sqrt_of_msqe_weight,\n        use_outlier_mask_msqe_weight=use_outlier_mask_msqe_weight,\n        use_stable_scale_exponent=use_stable_scale_exponent,\n        stable_scale_ema_decay=stable_scale_ema_decay,\n        is_gradient_based=True,\n        **kwargs)\n\n  def __str__(self):\n    # Convert Tensors to printable strings by converting to a numpy array and\n    # then using regex to remove brackets when there is only one integer bit.\n    ptn, repl = r\"\\[(\\d)\\]\", r\"\\g<1>\"\n    bits = re.sub(\n        ptn, repl,\n        str(self.bits.numpy() if isinstance(self.bits, tf.Variable) else self\n            .bits))\n\n    flags = []\n    flags.append(\"bits=\" + str(int(bits)))\n    flags.append(\"keep_negative=\" + str(self.keep_negative))\n    flags.append(\"scale_axis=\" + str(self.scale_axis))\n    flags.append(\"per_channel_scale=\" + str(self.per_channel_scale))\n    flags.append(\"init_scale=\" + str(self.init_scale))\n    flags.append(\"use_second_moments_msqe_opt=\" +\n                 str(self.use_second_moments_msqe_opt))\n    flags.append(\"second_moments_ema_decay=\" +\n                 str(self.second_moments_ema_decay))\n    flags.append(\"use_outlier_mask_msqe_weight=\" +\n                 str(self.use_outlier_mask_msqe_weight))\n    flags.append(\"use_sqrt_of_msqe_weight=\" + str(self.use_sqrt_of_msqe_weight))\n    flags.append(\"use_stable_scale_exponent=\" +\n                 str(self.use_stable_scale_exponent))\n    flags.append(\"stable_scale_ema_decay=\" + str(self.stable_scale_ema_decay))\n    flags.append(\"min_init_scale=\" + str(self.min_init_scale))\n    flags.append(\"use_po2_scale_ceil=\" + str(self.use_po2_scale_ceil))\n    flags.append(\"use_po2_scale_msqe_round=\" +\n                 str(self.use_po2_scale_msqe_round))\n    return \"quantized_bits_learnable_po2(\" + \",\".join(flags) + \")\"\n\n  def build(self, input_shape):\n    \"\"\"Creates and initializes variables.\"\"\"\n    super().build(input_shape)\n\n    if self.use_po2_scale_msqe_round:\n      self.switch_to_msqe_round = tf.Variable(\n          True, trainable=False, name=\"switch_to_msqe_round\")\n\n  def _get_init_scale_exponent(self, inputs):\n    \"\"\"Returns inputs distribution based initial scale exponent values.\n\n    Args:\n      inputs: A tensor to be used to calculate initial scale exponent values.\n\n    Returns:\n      A tensor of initial scale exponent values.\n    \"\"\"\n    std = tf.math.reduce_std(inputs, axis=self.reduce_axes, keepdims=True)\n    # Uses 3 sigma percentile to get scale\n    scale = 3.0 * std / tf.cast(self.qp, dtype=tf.float32)\n\n    # Prevents zero scale values for inputs with all zeros (e.g., bias).\n    if self.min_init_scale is not None:\n      scale = tf.math.maximum(scale, self.min_init_scale)\n\n    # Returns scale exponent\n    return tf.math.log(scale) / tf.math.log(2.0)\n\n  def _get_outlier_mask(self, inputs):\n    \"\"\"Returns a tensor to mask outliers in the input for MSQE optimizations.\n\n    The outlier threshold is based on the (unconstrained) output dynamic range\n    of the quantizer.\n\n    Args:\n      inputs: A tensor to be used to generate the outlier mask.\n\n    Returns:\n      A tensor to mask out the outliers of the inputs. Its shape is same as\n      'inputs' and its dtype is `float32`.\n    \"\"\"\n    # Calculates the output (unconstrained) dynamic range of the quantizer (i.e.\n    # , self.scale_exponent is not power-of-2 constrained.).\n    outlier_threshold = tf.math.pow(2.0, self.scale_exponent) * (self.qp + 0.5)\n    return tf.where(\n        abs(inputs) <= outlier_threshold,\n        tf.ones_like(inputs, dtype=tf.float32),\n        tf.zeros_like(inputs, dtype=tf.float32))\n\n  def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None):\n    \"\"\"Returns power-of-2 scaling factors for quantization.\n\n    Args:\n      inputs: A tensor to be used for MSQE rounding. Note: ceil and round\n        functions do not use the inputs.\n      reduce_axes: A list of axes to be summed (averaged) over.\n      msqe_weight: A tensor which is used in scale optimization to weight the\n        MSQE (Mean Squared Quantization Error) of individual input elements. Its\n        shape is same as 'inputs' and its dtype is `float32`.\n\n    Returns:\n      A tensor of power-of-2 scaling factors.\n    \"\"\"\n    if self.use_po2_scale_ceil:\n      scale_exponent = tf.math.ceil(self.scale_exponent)\n    else:\n      scale_exponent = tf.math.rint(self.scale_exponent)\n\n    # MSQE rounding requires the inputs to optimize the scale exponent.\n    if self.use_po2_scale_msqe_round and inputs is not None:\n      scale_exponent_msqe = self.msqe_round(\n          inputs=inputs,\n          scale_exponent=self.scale_exponent,\n          reduce_axes=reduce_axes,\n          msqe_weight=msqe_weight)\n\n      # Control when to use MSQE rounding. Note: self.switch_to_msqe_round is\n      # set to true by default.\n      scale_exponent = tf.cond(self.switch_to_msqe_round,\n                               lambda: scale_exponent_msqe,\n                               lambda: scale_exponent)\n\n    # Apply STE\n    scale_exponent = self.scale_exponent + tf.stop_gradient(scale_exponent -\n                                                            self.scale_exponent)\n    return tf.math.pow(2.0, scale_exponent)\n\n  def msqe_round(self,\n                 inputs,\n                 scale_exponent,\n                 reduce_axes=None,\n                 msqe_weight=None):\n    \"\"\"Returns MSQE-wise optimum power-of-2 scale exponents.\n\n    Args:\n      inputs: A tensor, MSQE rounding is based on.\n      scale_exponent: A tensor, learnable scale exponents which are not\n        constrained in power-of-2.\n      reduce_axes: A list of axes to be summed (averaged) over or None. If None,\n        self.reduce_axes is used.\n      msqe_weight: A tensor which is used to weight MSQE rounding or None. If\n        None, a tensor (or None) from self._get_msqe_weight is used.\n\n    Returns:\n      A tensor of power-of-2 scale exponents.\n    \"\"\"\n    if reduce_axes is None:\n      reduce_axes = self.reduce_axes\n\n    if msqe_weight is None:\n      # Returned msqe_weight can be None.\n      msqe_weight = self._get_msqe_weight(inputs)\n\n    # floor\n    scale_exponent_floor = tf.math.floor(scale_exponent)\n    msqe_floor = self._calculate_msqe_inputs(\n        inputs=inputs,\n        scale=tf.math.pow(2.0, scale_exponent_floor),\n        reduce_axes=reduce_axes,\n        msqe_weight=msqe_weight)\n\n    # ceil\n    scale_exponent_ceil = tf.math.ceil(scale_exponent)\n    msqe_ceil = self._calculate_msqe_inputs(\n        inputs=inputs,\n        scale=tf.math.pow(2.0, scale_exponent_ceil),\n        reduce_axes=reduce_axes,\n        msqe_weight=msqe_weight)\n\n    return tf.where(msqe_floor < msqe_ceil, scale_exponent_floor,\n                    scale_exponent_ceil)\n\n  def get_config(self):\n    config = {\n        \"bits\": self.bits,\n        \"keep_negative\": self.keep_negative,\n        \"scale_axis\": self.scale_axis,\n        \"per_channel_scale\": self.per_channel_scale,\n        \"init_scale\": self.init_scale,\n        \"use_second_moments_msqe_opt\": self.use_second_moments_msqe_opt,\n        \"second_moments_ema_decay\": self.second_moments_ema_decay,\n        \"use_outlier_mask_msqe_weight\": self.use_outlier_mask_msqe_weight,\n        \"use_sqrt_of_msqe_weight\": self.use_sqrt_of_msqe_weight,\n        \"use_stable_scale_exponent\": self.use_stable_scale_exponent,\n        \"stable_scale_ema_decay\": self.stable_scale_ema_decay,\n        \"min_init_scale\": self.min_init_scale,\n        \"use_po2_scale_ceil\": self.use_po2_scale_ceil,\n        \"use_po2_scale_msqe_round\": self.use_po2_scale_msqe_round,\n        \"qn\": self.qn,\n        \"qp\": self.qp,\n        \"scaled_axes\": self.scaled_axes,\n    }\n    base_config = super().get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n\nclass quantized_bits_msqe_po2(BaseQuantizerPO2):  # pylint: disable=invalid-name\n  \"\"\"Quantizes the number to a number of bits by MSQE based scaling factors.\n  For more details, see https://arxiv.org/abs/2210.03671.\n\n  Attributes:\n    bits: Integer, number of bits to perform quantization.\n    keep_negative: Boolean, if true, it keeps negative values and sets the\n      quantization levels symmetrically around 0. If false, negative numbers is\n      clipped to 0.\n    scale_axis: Integer, which axis to calculate scale from.\n    per_channel_scale: Boolean, whether to perform per-channel (true) or\n      per-layer (false) quantization.\n    init_scale: Float or None, initial scale factor to initialize the scale with\n      (if None, it will be initialized based on the first inputs.).\n    use_second_moments_msqe_opt: Bool, whether to use the second moments based\n      MSQE optimization or not.\n    second_moments_ema_decay: Float, EMA decay factor for the second moments\n      update.\n    use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight.\n    use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask.\n    use_stable_scale_exponent: Bool, whether to use exponentially moving\n      averaged (\"stable\") scale exponent or not. Note: there is a tf.Variable\n        (self.switch_to_stable_scale) that controls when to apply the stable\n        scale exponent (i.e., if use_stable_scale_exponent is true and\n        self.switch_to_stable_scale is false, the stable scale exponent is\n        updated but not used.).\n    stable_scale_ema_decay: Float, EMA decay factor for the stable scale update.\n    outlier_mask_sigma: Float, sigma to apply for the outlier masking threshold.\n    num_lls_iters: An integer. Number of linear least squares regression\n      iterations.\n    should_line_search: A bool. Whether to perform a line search.\n    line_search_range: An integer. Search range of the line search.\n  \"\"\"\n\n  def __init__(self,\n               bits=4,\n               keep_negative=True,\n               scale_axis=None,\n               per_channel_scale=False,\n               init_scale=None,\n               use_second_moments_msqe_opt=True,\n               second_moments_ema_decay=0.999,\n               use_sqrt_of_msqe_weight=True,\n               use_outlier_mask_msqe_weight=True,\n               use_stable_scale_exponent=False,\n               stable_scale_ema_decay=0.99,\n               outlier_mask_sigma=2.0,\n               num_lls_iters=3,\n               should_line_search=True,\n               line_search_range=6,\n               **kwargs):\n\n    self.outlier_mask_sigma = outlier_mask_sigma\n    self.num_lls_iters = num_lls_iters\n    self.should_line_search = should_line_search\n    self.line_search_range = line_search_range\n\n    super().__init__(\n        bits=bits,\n        keep_negative=keep_negative,\n        scale_axis=scale_axis,\n        per_channel_scale=per_channel_scale,\n        init_scale=init_scale,\n        use_second_moments_msqe_opt=use_second_moments_msqe_opt,\n        second_moments_ema_decay=second_moments_ema_decay,\n        use_sqrt_of_msqe_weight=use_sqrt_of_msqe_weight,\n        use_outlier_mask_msqe_weight=use_outlier_mask_msqe_weight,\n        use_stable_scale_exponent=use_stable_scale_exponent,\n        stable_scale_ema_decay=stable_scale_ema_decay,\n        is_gradient_based=False,\n        **kwargs)\n\n  def __str__(self):\n    # Convert Tensors to printable strings by converting to a numpy array and\n    # then using regex to remove brackets when there is only one integer bit.\n    ptn, repl = r\"\\[(\\d)\\]\", r\"\\g<1>\"\n    bits = re.sub(\n        ptn, repl,\n        str(self.bits.numpy() if isinstance(self.bits, tf.Variable) else self\n            .bits))\n\n    flags = []\n    flags.append(\"bits=\" + str(int(bits)))\n    flags.append(\"keep_negative=\" + str(self.keep_negative))\n    flags.append(\"scale_axis=\" + str(self.scale_axis))\n    flags.append(\"per_channel_scale=\" + str(self.per_channel_scale))\n    flags.append(\"init_scale=\" + str(self.init_scale))\n    flags.append(\"use_second_moments_msqe_opt=\" +\n                 str(self.use_second_moments_msqe_opt))\n    flags.append(\"second_moments_ema_decay=\" +\n                 str(self.second_moments_ema_decay))\n    flags.append(\"use_sqrt_of_msqe_weight=\" + str(self.use_sqrt_of_msqe_weight))\n    flags.append(\"use_outlier_mask_msqe_weight=\" +\n                 str(self.use_outlier_mask_msqe_weight))\n    flags.append(\"use_stable_scale_exponent=\" +\n                 str(self.use_stable_scale_exponent))\n    flags.append(\"stable_scale_ema_decay=\" + str(self.stable_scale_ema_decay))\n    flags.append(\"outlier_mask_sigma=\" + str(self.outlier_mask_sigma))\n    flags.append(\"num_lls_iters=\" + str(self.num_lls_iters))\n    flags.append(\"should_line_search=\" + str(self.should_line_search))\n    flags.append(\"line_search_range=\" + str(self.line_search_range))\n    return \"quantized_bits_msqe_po2(\" + \",\".join(flags) + \")\"\n\n  def _get_init_scale_exponent(self, inputs):\n    \"\"\"Returns min and max of the inputs based initial scale exponent values.\n\n    Args:\n      inputs: A tensor to be used to calculate initial scale exponent values.\n\n    Returns:\n      A tensor of initial scale exponent values.\n    \"\"\"\n    scale = K.max(\n        abs(inputs), axis=self.scaled_axes, keepdims=True) / tf.cast(\n            self.qp, dtype=tf.float32)\n    return self._get_po2_scale_exponent(scale)\n\n  def _get_outlier_mask(self, inputs):\n    \"\"\"Returns a tensor to mask outliers in the input for MSQE optimizations.\n\n    The outlier threshold is based on the inputs distribution.\n\n    Args:\n      inputs: A tensor to be used to generate the outlier mask.\n\n    Returns:\n      A tensor to mask out the outliers of the inputs. Its shape is same as\n      'inputs' and its dtype is `float32`.\n    \"\"\"\n    std = tf.math.reduce_std(inputs, axis=self.reduce_axes, keepdims=True)\n    outlier_threshold = self.outlier_mask_sigma * std\n    return tf.where(\n        abs(inputs) <= outlier_threshold, tf.ones_like(inputs),\n        tf.zeros_like(inputs))\n\n  def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None):\n    \"\"\"Returns power-of-2 scaling factors for quantization.\n\n    Args:\n      inputs: A tensor to be used to optimize the scale value.\n      reduce_axes: A list of axes to be summed (averaged) over.\n      msqe_weight: A tensor which is used in scale optimization to weight the\n        MSQE (Mean Squared Quantization Error) of individual input elements. Its\n        shape is same as 'inputs' and its dtype is `float32`.\n\n    Returns:\n      A tensor of power-of-2 scaling factors. Its shape is same as\n      'self.scale_exponent' and its dtype is `float32`.\n    \"\"\"\n    if inputs is None:\n      return self._get_po2_scale(self.scale)\n\n    if reduce_axes is None:\n      reduce_axes = self.reduce_axes\n\n    if msqe_weight is None:\n      msqe_weight = self._get_msqe_weight(inputs)\n\n    scale, _ = self._optimize_msqe_scale(\n        inputs,\n        tf.math.pow(2.0, tf.round(self.scale_exponent)),\n        reduce_axes=reduce_axes,\n        msqe_weight=msqe_weight,\n        num_lls_iters=self.num_lls_iters,\n        should_line_search=self.should_line_search,\n        line_search_range=self.line_search_range,\n    )\n    self.scale_exponent.assign(self._get_po2_scale_exponent(scale))\n    return scale\n\n  def get_config(self):\n    config = {\n        \"bits\": self.bits,\n        \"keep_negative\": self.keep_negative,\n        \"scale_axis\": self.scale_axis,\n        \"per_channel_scale\": self.per_channel_scale,\n        \"init_scale\": self.init_scale,\n        \"use_second_moments_msqe_opt\": self.use_second_moments_msqe_opt,\n        \"second_moments_ema_decay\": self.second_moments_ema_decay,\n        \"use_sqrt_of_msqe_weight\": self.use_sqrt_of_msqe_weight,\n        \"use_outlier_mask_msqe_weight\": self.use_outlier_mask_msqe_weight,\n        \"use_stable_scale_exponent\": self.use_stable_scale_exponent,\n        \"stable_scale_ema_decay\": self.stable_scale_ema_decay,\n        \"outlier_mask_sigma\": self.outlier_mask_sigma,\n        \"num_lls_iters\": self.num_lls_iters,\n        \"should_line_search\": self.should_line_search,\n        \"line_search_range\": self.line_search_range,\n        \"qn\": self.qn,\n        \"qp\": self.qp,\n        \"scaled_axes\": self.scaled_axes,\n    }\n    base_config = super().get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n"
  },
  {
    "path": "qkeras/qconv2d_batchnorm.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Fold batchnormalization with previous QConv2D layers.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nfrom six.moves import range\nimport tensorflow as tf\nfrom tensorflow.keras import layers\nfrom tensorflow.keras.models import Model\n\nfrom .qconvolutional import QConv2D\nfrom .quantizers import *\nfrom tensorflow.python.framework import smart_cond as tf_utils\nfrom tensorflow.python.ops import math_ops\n\ntf.compat.v2.enable_v2_behavior()\n\n\n# TODO(lishanok): Create an abstract folding parent class\nclass QConv2DBatchnorm(QConv2D):\n  \"\"\"Fold batchnormalization with a previous qconv2d layer.\"\"\"\n\n  def __init__(\n      self,\n      # qconv2d params\n      filters,\n      kernel_size,\n      strides=(1, 1),\n      padding=\"valid\",\n      data_format=\"channels_last\",\n      dilation_rate=(1, 1),\n      activation=None,\n      use_bias=True,\n      kernel_initializer=\"he_normal\",\n      bias_initializer=\"zeros\",\n      kernel_regularizer=None,\n      bias_regularizer=None,\n      activity_regularizer=None,\n      kernel_constraint=None,\n      bias_constraint=None,\n      kernel_quantizer=None,\n      bias_quantizer=None,\n\n      # batchnorm params\n      axis=-1,\n      momentum=0.99,\n      epsilon=0.001,\n      center=True,\n      scale=True,\n      beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\",\n      moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\",\n      beta_regularizer=None,\n      gamma_regularizer=None,\n      beta_constraint=None,\n      gamma_constraint=None,\n      renorm=False,\n      renorm_clipping=None,\n      renorm_momentum=0.99,\n      fused=None,\n      trainable=True,\n      virtual_batch_size=None,\n      adjustment=None,\n\n      # other params\n      ema_freeze_delay=None,\n      folding_mode=\"ema_stats_folding\",\n      **kwargs):\n    \"\"\"Initialize a composite layer that folds conv2d and batch normalization.\n\n    The first group of parameters correponds to the initialization parameters\n      of a qconv2d layer. check qkeras.qconvolutional.qconv2d for details.\n\n    The 2nd group of parameters corresponds to the initialization parameters\n      of a BatchNormalization layer. Check keras.layers.normalization.BatchNorma\n      lizationBase for details.\n\n    The 3rd group of parameters corresponds to the initialization parameters\n      specific to this class.\n\n      ema_freeze_delay: int. number of steps before batch normalization mv_mean\n        and mv_variance will be frozen and used in the folded layer.\n      folding_mode: string\n        \"ema_stats_folding\": mimic tflite which uses the ema statistics to\n          fold the kernel to suppress quantization induced jitter then performs\n          the correction to have a similar effect of using the current batch\n          statistics.\n        \"batch_stats_folding\": use batch mean and variance to fold kernel first;\n          after enough training steps switch to moving_mean and moving_variance\n          for kernel folding.\n    \"\"\"\n\n    # intialization the qconv2d part of the composite layer\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        dilation_rate=dilation_rate,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        kernel_constraint=kernel_constraint,\n        bias_constraint=bias_constraint,\n        kernel_quantizer=kernel_quantizer,\n        bias_quantizer=bias_quantizer,\n        **kwargs\n    )\n\n    # initialization of batchnorm part of the composite layer\n    self.batchnorm = layers.BatchNormalization(\n        axis=axis, momentum=momentum, epsilon=epsilon, center=center,\n        scale=scale, beta_initializer=beta_initializer,\n        gamma_initializer=gamma_initializer,\n        moving_mean_initializer=moving_mean_initializer,\n        moving_variance_initializer=moving_variance_initializer,\n        beta_regularizer=beta_regularizer,\n        gamma_regularizer=gamma_regularizer,\n        beta_constraint=beta_constraint, gamma_constraint=gamma_constraint,\n        renorm=renorm, renorm_clipping=renorm_clipping, \n        renorm_momentum=renorm_momentum, fused=fused, trainable=trainable,\n        virtual_batch_size=virtual_batch_size, adjustment=adjustment)\n\n    self.ema_freeze_delay = ema_freeze_delay\n    assert folding_mode in [\"ema_stats_folding\", \"batch_stats_folding\"]\n    self.folding_mode = folding_mode\n\n  def build(self, input_shape):\n    super(QConv2DBatchnorm, self).build(input_shape)\n\n    # self._iteration (i.e., training_steps) is initialized with -1. When\n    # loading ckpt, it can load the number of training steps that have been\n    # previously trainied. If start training from scratch.\n    # TODO(lishanok): develop a way to count iterations outside layer\n    self._iteration = tf.Variable(-1, trainable=False, name=\"iteration\",\n                                  dtype=tf.int64)\n\n  def call(self, inputs, training=None):\n\n    # numpy value, mark the layer is in training\n    training = self.batchnorm._get_training_value(training)  # pylint: disable=protected-access\n\n    # checking if to update batchnorm params\n    if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0):\n      # if ema_freeze_delay is None or a negative value, do not freeze bn stats\n      bn_training = tf.cast(training, dtype=bool)\n    else:\n      bn_training = tf.math.logical_and(training, tf.math.less_equal(\n          self._iteration, self.ema_freeze_delay))\n\n    kernel = self.kernel\n\n    # run conv to produce output for the following batchnorm\n    conv_outputs = tf.keras.backend.conv2d(\n        inputs,\n        kernel,\n        strides=self.strides,\n        padding=self.padding,\n        data_format=self.data_format,\n        dilation_rate=self.dilation_rate)\n\n    if self.use_bias:\n      bias = self.bias\n      conv_outputs = tf.keras.backend.bias_add(\n          conv_outputs, bias, data_format=self.data_format)\n    else:\n      bias = 0\n\n    _ = self.batchnorm(conv_outputs, training=bn_training)\n\n    self._iteration.assign_add(tf_utils.smart_cond(\n        training, lambda: tf.constant(1, tf.int64),\n        lambda: tf.constant(0, tf.int64)))\n\n    # calcuate mean and variance from current batch\n    bn_shape = conv_outputs.shape\n    ndims = len(bn_shape)\n    reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis]\n    keep_dims = len(self.batchnorm.axis) > 1\n    mean, variance = self.batchnorm._moments(  # pylint: disable=protected-access\n        math_ops.cast(conv_outputs, self.batchnorm._param_dtype),  # pylint: disable=protected-access\n        reduction_axes,\n        keep_dims=keep_dims)\n    # get batchnorm weights\n    gamma = self.batchnorm.gamma\n    beta = self.batchnorm.beta\n    moving_mean = self.batchnorm.moving_mean\n    moving_variance = self.batchnorm.moving_variance\n\n    if self.folding_mode == \"batch_stats_folding\":\n      # using batch mean and variance in the initial training stage\n      # after sufficient training, switch to moving mean and variance\n      new_mean = tf_utils.smart_cond(\n          bn_training, lambda: mean, lambda: moving_mean)\n      new_variance = tf_utils.smart_cond(\n          bn_training, lambda: variance, lambda: moving_variance)\n\n      # get the inversion factor so that we replace division by multiplication\n      inv = math_ops.rsqrt(new_variance + self.batchnorm.epsilon)\n      if gamma is not None:\n        inv *= gamma\n      # fold bias with bn stats\n      folded_bias = inv * (bias - new_mean) + beta\n\n    elif self.folding_mode == \"ema_stats_folding\":\n      # We always scale the weights with a correction factor to the long term\n      # statistics prior to quantization. This ensures that there is no jitter\n      # in the quantized weights due to batch to batch variation. During the\n      # initial phase of training, we undo the scaling of the weights so that\n      # outputs are identical to regular batch normalization. We also modify\n      # the bias terms correspondingly. After sufficient training, switch from\n      # using batch statistics to long term moving averages for batch\n      # normalization.\n\n      # use batch stats for calcuating bias before bn freeze, and use moving\n      # stats after bn freeze\n      mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)\n      batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)\n\n      if gamma is not None:\n        mv_inv *= gamma\n        batch_inv *= gamma\n      folded_bias = tf_utils.smart_cond(\n          bn_training,\n          lambda: batch_inv * (bias - mean) + beta,\n          lambda: mv_inv * (bias - moving_mean) + beta)\n      # moving stats is always used to fold kernel in tflite; before bn freeze\n      # an additional correction factor will be applied to the conv2d output\n      inv = mv_inv\n    else:\n      assert ValueError\n\n    # wrap conv kernel with bn parameters\n    folded_kernel = inv * kernel\n    # quantize the folded kernel\n    if self.kernel_quantizer is not None:\n      q_folded_kernel = self.kernel_quantizer_internal(folded_kernel)\n    else:\n      q_folded_kernel = folded_kernel\n\n    # If loaded from a ckpt, bias_quantizer is the ckpt value\n    # Else if bias_quantizer not specified, bias\n    #   quantizer is None and we need to calculate bias quantizer\n    #   type according to accumulator type. User can call\n    #   bn_folding_utils.populate_bias_quantizer_from_accumulator(\n    #      model, input_quantizer_list]) to populate such bias quantizer.\n    if self.bias_quantizer_internal is not None:\n      q_folded_bias = self.bias_quantizer_internal(folded_bias)\n    else:\n      q_folded_bias = folded_bias\n\n    applied_kernel = q_folded_kernel\n    applied_bias = q_folded_bias\n\n    # calculate conv2d output using the quantized folded kernel\n    folded_outputs = tf.keras.backend.conv2d(\n        inputs,\n        applied_kernel,\n        strides=self.strides,\n        padding=self.padding,\n        data_format=self.data_format,\n        dilation_rate=self.dilation_rate)\n    if training is True and self.folding_mode == \"ema_stats_folding\":\n      batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)\n      y_corr = tf_utils.smart_cond(\n          bn_training,\n          lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) *\n                   math_ops.rsqrt(variance + self.batchnorm.epsilon)),\n          lambda: tf.constant(1.0, shape=moving_variance.shape))\n      folded_outputs = math_ops.mul(folded_outputs, y_corr)\n\n    folded_outputs = tf.keras.backend.bias_add(\n        folded_outputs,\n        applied_bias,\n        data_format=self.data_format)\n    if self.activation is not None:\n      return self.activation(folded_outputs)\n\n    return folded_outputs\n\n  def get_config(self):\n    base_config = super().get_config()\n    bn_config = self.batchnorm.get_config()\n    config = {\"ema_freeze_delay\": self.ema_freeze_delay,\n              \"folding_mode\": self.folding_mode}\n    name = base_config[\"name\"]\n    out_config = dict(\n        list(base_config.items())\n        + list(bn_config.items()) + list(config.items()))\n\n    # names from different config override each other; use the base layer name\n    # as the this layer's config name\n    out_config[\"name\"] = name\n    return out_config\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\": str(self.kernel_quantizer_internal),\n        \"bias_quantizer\": str(self.bias_quantizer_internal),\n        \"activation\": str(self.activation),\n        \"filters\": str(self.filters)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_folded_weights(self):\n    \"\"\"Function to get the batchnorm folded weights.\n\n    This function converts the weights by folding batchnorm parameters into\n    the weight of QConv2D. The high-level equation:\n\n    W_fold = gamma * W / sqrt(variance + epsilon)\n    bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta\n    \"\"\"\n\n    kernel = self.kernel\n    if self.use_bias:\n      bias = self.bias\n    else:\n      bias = 0\n\n    # get batchnorm weights and moving stats\n    gamma = self.batchnorm.gamma\n    beta = self.batchnorm.beta\n    moving_mean = self.batchnorm.moving_mean\n    moving_variance = self.batchnorm.moving_variance\n    # get the inversion factor so that we replace division by multiplication\n    inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)\n    if gamma is not None:\n      inv *= gamma\n\n    # wrap conv kernel and bias with bn parameters\n    folded_kernel = inv * kernel\n    folded_bias = inv * (bias - moving_mean) + beta\n\n    return [folded_kernel, folded_bias]\n"
  },
  {
    "path": "qkeras/qconvolutional.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport warnings\n\nimport numpy as np\nimport tensorflow as tf\nfrom tensorflow.keras import constraints\nfrom tensorflow.keras import initializers\nfrom tensorflow.keras import regularizers\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Conv1D\nfrom tensorflow.keras.layers import Conv2D\nfrom tensorflow.keras.layers import Conv2DTranspose\nfrom tensorflow.keras.layers import DepthwiseConv2D\nfrom tensorflow.keras.layers import Dropout\nfrom tensorflow.keras.layers import InputSpec\nfrom tensorflow.keras.layers import SeparableConv1D\nfrom tensorflow.keras.layers import SeparableConv2D\n\nfrom .qlayers import get_auto_range_constraint_initializer\nfrom .qlayers import QActivation\nfrom .quantizers import get_quantized_initializer\nfrom .quantizers import get_quantizer\nfrom tensorflow.python.eager import context\nfrom tensorflow.python.ops import array_ops\n# from tensorflow.python.ops import array_ops\nfrom tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer\n\n\ndef deconv_output_length(\n    input_length,\n    filter_size,\n    padding,\n    output_padding=None,\n    stride=0,\n    dilation=1,\n):\n  \"\"\"Determines output length of a transposed convolution given input length.\n\n  Args:\n      input_length: Integer.\n      filter_size: Integer.\n      padding: one of `\"same\"`, `\"valid\"`, `\"full\"`.\n      output_padding: Integer, amount of padding along the output dimension.\n        Can be set to `None` in which case the output length is inferred.\n      stride: Integer.\n      dilation: Integer.\n\n  Returns:\n      The output length (integer).\n  \"\"\"\n  assert padding in {\"same\", \"valid\", \"full\"}\n  if input_length is None:\n    return None\n\n  # Get the dilated kernel size\n  filter_size = filter_size + (filter_size - 1) * (dilation - 1)\n  pad = 0\n  length = 0\n\n  # Infer length if output padding is None, else compute the exact length\n  if output_padding is None:\n    if padding == \"valid\":\n      length = input_length * stride + max(filter_size - stride, 0)\n    elif padding == \"full\":\n      length = input_length * stride - (stride + filter_size - 2)\n    elif padding == \"same\":\n      length = input_length * stride\n  else:\n    if padding == \"same\":\n      pad = filter_size // 2\n    elif padding == \"valid\":\n      pad = 0\n    elif padding == \"full\":\n      pad = filter_size - 1\n\n    length = (\n        (input_length - 1) * stride + filter_size - 2 * pad + output_padding\n    )\n  return length\n\n\nclass QConv1D(Conv1D, PrunableLayer):\n  \"\"\"1D convolution layer (e.g. spatial convolution over images).\"\"\"\n\n  # most of these parameters follow the implementation of Conv1D in Keras,\n  # with the exception of kernel_range, bias_range, kernel_quantizer\n  # and bias_quantizer, and kernel_initializer.\n  #\n  # kernel_quantizer: quantizer function/class for kernel\n  # bias_quantizer: quantizer function/class for bias\n  # kernel_range/bias_ranger: for quantizer functions whose values\n  #   can go over [-1,+1], these values are used to set the clipping\n  #   value of kernels and biases, respectively, instead of using the\n  #   constraints specified by the user.\n  #\n  # we refer the reader to the documentation of Conv1D in Keras for the\n  # other parameters.\n  #\n\n  def __init__(self,\n               filters,\n               kernel_size,\n               strides=1,\n               padding=\"valid\",\n               dilation_rate=1,\n               activation=None,\n               use_bias=True,\n               kernel_initializer=\"he_normal\",\n               bias_initializer=\"zeros\",\n               kernel_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               kernel_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               bias_quantizer=None,\n               kernel_range=None,\n               bias_range=None,\n               **kwargs):\n\n    if kernel_range is not None:\n      warnings.warn(\"kernel_range is deprecated in QConv1D layer.\")\n\n    if bias_range is not None:\n      warnings.warn(\"bias_range is deprecated in QConv1D layer.\")\n\n    self.kernel_range = kernel_range\n    self.bias_range = bias_range\n\n    self.kernel_quantizer = kernel_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.kernel_quantizer_internal, self.bias_quantizer_internal\n    ]\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        dilation_rate=dilation_rate,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        kernel_constraint=kernel_constraint,\n        bias_constraint=bias_constraint,\n        **kwargs\n    )\n\n  def call(self, inputs):\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n\n    outputs = tf.keras.backend.conv1d(\n        inputs,\n        quantized_kernel,\n        strides=self.strides[0],\n        padding=self.padding,\n        data_format=self.data_format,\n        dilation_rate=self.dilation_rate[0])\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      outputs = tf.keras.backend.bias_add(\n          outputs, quantized_bias, data_format=self.data_format)\n\n    if self.activation is not None:\n      return self.activation(outputs)\n    return outputs\n\n  def get_config(self):\n    config = {\n        \"kernel_quantizer\": constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"kernel_range\": self.kernel_range,\n        \"bias_range\": self.bias_range,\n    }\n    base_config = super(QConv1D, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\":\n            str(self.kernel_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"activation\":\n            str(self.activation),\n        \"filters\" : str(self.filters)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.kernel]\n\n\nclass QConv2D(Conv2D, PrunableLayer):\n  \"\"\"2D convolution layer (e.g. spatial convolution over images).\"\"\"\n\n  # most of these parameters follow the implementation of Conv2D in Keras,\n  # with the exception of kernel_range, bias_range, kernel_quantizer\n  # and bias_quantizer, and kernel_initializer.\n  #\n  # kernel_quantizer: quantizer function/class for kernel\n  # bias_quantizer: quantizer function/class for bias\n  # kernel_range/bias_ranger: for quantizer functions whose values\n  #   can go over [-1,+1], these values are used to set the clipping\n  #   value of kernels and biases, respectively, instead of using the\n  #   constraints specified by the user.\n  # mask: Optional mask for kernel weights.\n  #\n  # we refer the reader to the documentation of Conv2D in Keras for the\n  # other parameters.\n  #\n\n  def __init__(\n      self,\n      filters,\n      kernel_size,\n      strides=(1, 1),\n      padding=\"valid\",\n      data_format=\"channels_last\",\n      dilation_rate=(1, 1),\n      activation=None,\n      use_bias=True,\n      kernel_initializer=\"he_normal\",\n      bias_initializer=\"zeros\",\n      kernel_regularizer=None,\n      bias_regularizer=None,\n      activity_regularizer=None,\n      kernel_constraint=None,\n      bias_constraint=None,\n      kernel_range=None,\n      bias_range=None,\n      kernel_quantizer=None,\n      bias_quantizer=None,\n      mask=None,\n      **kwargs,\n  ):\n\n    if kernel_range is not None:\n      warnings.warn(\"kernel_range is deprecated in QConv2D layer.\")\n\n    if bias_range is not None:\n      warnings.warn(\"bias_range is deprecated in QConv2D layer.\")\n\n    self.kernel_range = kernel_range\n    self.bias_range = bias_range\n\n    self.kernel_quantizer = kernel_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.kernel_quantizer_internal, self.bias_quantizer_internal\n    ]\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    if mask is not None:\n      shape = mask.shape\n      if len(shape) < 2:\n        raise ValueError(\n            \"Expected shape to have rank at least 2 but provided shape has\"\n            f\" rank {len(shape)}\"\n        )\n      h, w = shape[0], shape[1]\n      self._mask = np.reshape(\n          mask, (h, w, 1, 1)\n      )  # Extend the dimension to be 4D.\n    else:\n      self._mask = None\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        data_format=data_format,\n        dilation_rate=dilation_rate,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        kernel_constraint=kernel_constraint,\n        bias_constraint=bias_constraint,\n        **kwargs\n    )\n\n  def convolution_op(self, inputs, kernel):\n    return tf.keras.backend.conv2d(\n        inputs,\n        kernel,\n        strides=self.strides,\n        padding=self.padding,\n        data_format=self.data_format,\n        dilation_rate=self.dilation_rate,\n    )\n\n  @tf.function(jit_compile=True)\n  def _jit_compiled_convolution_op(self, inputs, kernel):\n    return self.convolution_op(inputs, kernel)\n\n  def call(self, inputs):\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n\n    if self._mask is not None:\n      # Apply mask to kernel weights if one is provided.\n      quantized_kernel = quantized_kernel * self._mask\n\n    # Grouped convolutions are not fully supported on the CPU for compiled\n    # functions.\n    #\n    # This is a workaround taken from TF's core library. Remove when proper\n    # support is added.\n    # See definition of function \"_jit_compiled_convolution_op\" at\n    # cs/third_party/py/tf_keras/layers/convolutional/base_conv.py for more\n    # details.\n    if self.groups > 1:\n      outputs = self._jit_compiled_convolution_op(\n          inputs, tf.convert_to_tensor(quantized_kernel)\n      )\n    else:\n      outputs = self.convolution_op(inputs, quantized_kernel)\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      outputs = tf.keras.backend.bias_add(\n          outputs, quantized_bias, data_format=self.data_format\n      )\n\n    if self.activation is not None:\n      return self.activation(outputs)\n    return outputs\n\n  def get_config(self):\n    config = {\n        \"kernel_quantizer\": constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"kernel_range\": self.kernel_range,\n        \"bias_range\": self.bias_range,\n        \"mask\": self._mask.tolist() if self._mask is not None else None,\n    }\n    base_config = super().get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  @classmethod\n  def from_config(cls, config):\n    mask = config.get(\"mask\")\n    if mask is not None:\n      mask = np.array(mask)\n    config[\"mask\"] = mask\n    return cls(**config)\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\":\n            str(self.kernel_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"activation\":\n            str(self.activation),\n        \"filters\" : str(self.filters)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.kernel]\n\n\nclass QConv2DTranspose(Conv2DTranspose, PrunableLayer):\n  \"\"\"2D convolution layer (e.g. spatial convolution over images).\"\"\"\n\n  # most of these parameters follow the implementation of Conv2DTranspose\n  # in Keras, with the exception of kernel_quantizer and bias_quantizer\n  # and kernel_initializer.\n  #\n  # kernel_quantizer: quantizer function/class for kernel\n  # bias_quantizer: quantizer function/class for bias\n  #\n  # we refer the reader to the documentation of Conv2DTranspose in Keras for\n  # the other parameters.\n  #\n\n  def __init__(self,\n               filters,\n               kernel_size,\n               strides=(1, 1),\n               padding='valid',\n               output_padding=None,\n               data_format=None,\n               dilation_rate=(1, 1),\n               activation=None,\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               bias_initializer='zeros',\n               kernel_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               kernel_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               bias_quantizer=None,\n               **kwargs):\n\n    self.kernel_quantizer = kernel_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.kernel_quantizer_internal, self.bias_quantizer_internal\n    ]\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        output_padding=None,\n        data_format=data_format,\n        dilation_rate=dilation_rate,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        kernel_constraint=kernel_constraint,\n        bias_constraint=bias_constraint,\n        **kwargs\n    )\n\n  def call(self, inputs):\n    inputs_shape = array_ops.shape(inputs)\n    batch_size = inputs_shape[0]\n    if self.data_format == 'channels_first':\n      h_axis, w_axis = 2, 3\n    else:\n      h_axis, w_axis = 1, 2\n\n    height, width = inputs_shape[h_axis], inputs_shape[w_axis]\n    kernel_h, kernel_w = self.kernel_size\n    stride_h, stride_w = self.strides\n\n    if self.output_padding is None:\n      out_pad_h = out_pad_w = None\n    else:\n      out_pad_h, out_pad_w = self.output_padding\n\n    # Infer the dynamic output shape:\n    out_height = deconv_output_length(height,\n                                      kernel_h,\n                                      padding=self.padding,\n                                      output_padding=out_pad_h,\n                                      stride=stride_h,\n                                      dilation=self.dilation_rate[0])\n    out_width = deconv_output_length(width,\n                                     kernel_w,\n                                     padding=self.padding,\n                                     output_padding=out_pad_w,\n                                     stride=stride_w,\n                                     dilation=self.dilation_rate[1])\n    if self.data_format == 'channels_first':\n      output_shape = (batch_size, self.filters, out_height, out_width)\n    else:\n      output_shape = (batch_size, out_height, out_width, self.filters)\n\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n\n    output_shape_tensor = array_ops.stack(output_shape)\n    outputs = tf.keras.backend.conv2d_transpose(\n        inputs,\n        quantized_kernel,\n        output_shape_tensor,\n        strides=self.strides,\n        padding=self.padding,\n        data_format=self.data_format,\n        dilation_rate=self.dilation_rate)\n\n    if not context.executing_eagerly():\n      # Infer the static output shape:\n      out_shape = self.compute_output_shape(inputs.shape)\n      outputs.set_shape(out_shape)\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      outputs = tf.keras.backend.bias_add(\n          outputs,\n          quantized_bias,\n          data_format=self.data_format)\n\n    if self.activation is not None:\n      return self.activation(outputs)\n    return outputs\n\n  def get_config(self):\n    config = {\n        \"kernel_quantizer\": constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QConv2DTranspose, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.kernel]\n\n\nclass QSeparableConv1D(SeparableConv1D, PrunableLayer):\n  \"\"\"Depthwise separable 1D convolution.\"\"\"\n\n  # most of these parameters follow the implementation of SeparableConv1D\n  # in Keras, with the exception of depthwise_quantizer, pointwise_quantizer\n  # and bias_quantizer.\n  #\n  # depthwise_quantizer: quantizer function/class for depthwise spatial kernel\n  # pointwise_quantizer: quantizer function/class for pointwise kernel\n  # bias_quantizer: quantizer function/class for bias\n  #\n  # we refer the reader to the documentation of SeparableConv1D in Keras for\n  # the other parameters.\n  #\n\n  def __init__(self,\n               filters,\n               kernel_size,\n               strides=1,\n               padding='valid',\n               data_format=None,\n               dilation_rate=1,\n               depth_multiplier=1,\n               activation=None,\n               use_bias=True,\n               depthwise_initializer='glorot_uniform',\n               pointwise_initializer='glorot_uniform',\n               bias_initializer='zeros',\n               depthwise_regularizer=None,\n               pointwise_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               depthwise_constraint=None,\n               pointwise_constraint=None,\n               bias_constraint=None,\n               depthwise_quantizer=None,\n               pointwise_quantizer=None,\n               bias_quantizer=None,\n               **kwargs):\n\n    self.depthwise_quantizer = depthwise_quantizer\n    self.pointwise_quantizer = pointwise_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer)\n    self.pointwise_quantizer_internal = get_quantizer(self.pointwise_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.depthwise_quantizer_internal, \"_set_trainable_parameter\"):\n      self.depthwise_quantizer_internal._set_trainable_parameter()\n\n    if hasattr(self.pointwise_quantizer_internal, \"_set_trainable_parameter\"):\n      self.pointwise_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.depthwise_quantizer_internal, self.pointwise_quantizer_internal,\n        self.bias_quantizer_internal\n    ]\n\n    depthwise_constraint, depthwise_initializer = (\n        get_auto_range_constraint_initializer(self.depthwise_quantizer_internal,\n                                              depthwise_constraint,\n                                              depthwise_initializer))\n\n    pointwise_constraint, pointwise_initializer = (\n        get_auto_range_constraint_initializer(self.pointwise_quantizer_internal,\n                                              pointwise_constraint,\n                                              pointwise_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        data_format=data_format,\n        dilation_rate=dilation_rate,\n        depth_multiplier=depth_multiplier,\n        activation=activation,\n        use_bias=use_bias,\n        depthwise_initializer=initializers.get(depthwise_initializer),\n        pointwise_initializer=initializers.get(pointwise_initializer),\n        bias_initializer=initializers.get(bias_initializer),\n        depthwise_regularizer=regularizers.get(depthwise_regularizer),\n        pointwise_regularizer=regularizers.get(pointwise_regularizer),\n        bias_regularizer=regularizers.get(bias_regularizer),\n        activity_regularizer=regularizers.get(activity_regularizer),\n        depthwise_constraint=constraints.get(depthwise_constraint),\n        pointwise_constraint=constraints.get(pointwise_constraint),\n        bias_constraint=constraints.get(bias_constraint),\n        **kwargs\n    )\n\n  def call(self, inputs):\n    if self.padding == 'causal':\n      inputs = array_ops.pad(inputs, self._compute_causal_padding())\n\n    spatial_start_dim = 1 if self.data_format == 'channels_last' else 2\n\n    # Explicitly broadcast inputs and kernels to 4D.\n    inputs = array_ops.expand_dims(inputs, spatial_start_dim)\n    depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0)\n    pointwise_kernel = array_ops.expand_dims(self.pointwise_kernel, 0)\n    dilation_rate = (1,) + self.dilation_rate\n\n    if self.padding == 'causal':\n      op_padding = 'valid'\n    else:\n      op_padding = self.padding\n\n    if self.depthwise_quantizer:\n      quantized_depthwise_kernel = self.depthwise_quantizer_internal(\n          depthwise_kernel)\n    else:\n      quantized_depthwise_kernel = depthwise_kernel\n\n    if self.pointwise_quantizer:\n      quantized_pointwise_kernel = self.pointwise_quantizer_internal(\n          pointwise_kernel)\n    else:\n      quantized_pointwise_kernel = pointwise_kernel\n\n    outputs = tf.keras.backend.separable_conv2d(\n        inputs,\n        quantized_depthwise_kernel,\n        quantized_pointwise_kernel,\n        strides=self.strides * 2,\n        padding=op_padding,\n        dilation_rate=dilation_rate,\n        data_format=self.data_format)\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      outputs = tf.keras.backend.bias_add(\n          outputs,\n          quantized_bias,\n          data_format=self.data_format)\n\n    outputs = array_ops.squeeze(outputs, [spatial_start_dim])\n\n    if self.activation is not None:\n      return self.activation(outputs)\n    return outputs\n\n  def get_config(self):\n    config = {\n        \"depthwise_quantizer\": constraints.serialize(\n            self.depthwise_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"pointwise_quantizer\": constraints.serialize(\n            self.pointwise_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QSeparableConv1D, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.depthwise_kernel, self.pointwise_kernel]\n\n\nclass QSeparableConv2D(SeparableConv2D, PrunableLayer):\n  \"\"\"Depthwise separable 2D convolution.\"\"\"\n\n  # most of these parameters follow the implementation of SeparableConv2D\n  # in Keras, with the exception of depthwise_quantizer, pointwise_quantizer\n  # and bias_quantizer.\n  #\n  # depthwise_quantizer: quantizer function/class for depthwise spatial kernel\n  # pointwise_quantizer: quantizer function/class for pointwise kernel\n  # bias_quantizer: quantizer function/class for bias\n  #\n  # we refer the reader to the documentation of SeparableConv2D in Keras for\n  # the other parameters.\n  #\n\n  def __init__(self,\n               filters,\n               kernel_size,\n               strides=(1, 1),\n               padding='valid',\n               data_format=None,\n               dilation_rate=(1, 1),\n               depth_multiplier=1,\n               activation=None,\n               use_bias=True,\n               depthwise_initializer='glorot_uniform',\n               pointwise_initializer='glorot_uniform',\n               bias_initializer='zeros',\n               depthwise_regularizer=None,\n               pointwise_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               depthwise_constraint=None,\n               pointwise_constraint=None,\n               bias_constraint=None,\n               depthwise_quantizer=None,\n               pointwise_quantizer=None,\n               bias_quantizer=None,\n               **kwargs):\n\n    self.depthwise_quantizer = depthwise_quantizer\n    self.pointwise_quantizer = pointwise_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer)\n    self.pointwise_quantizer_internal = get_quantizer(self.pointwise_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.depthwise_quantizer_internal, \"_set_trainable_parameter\"):\n      self.depthwise_quantizer_internal._set_trainable_parameter()\n\n    if hasattr(self.pointwise_quantizer_internal, \"_set_trainable_parameter\"):\n      self.pointwise_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.depthwise_quantizer_internal, self.pointwise_quantizer_internal,\n        self.bias_quantizer_internal\n    ]\n\n    depthwise_constraint, depthwise_initializer = (\n        get_auto_range_constraint_initializer(self.depthwise_quantizer_internal,\n                                              depthwise_constraint,\n                                              depthwise_initializer))\n\n    pointwise_constraint, pointwise_initializer = (\n        get_auto_range_constraint_initializer(self.pointwise_quantizer_internal,\n                                              pointwise_constraint,\n                                              pointwise_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        data_format=data_format,\n        dilation_rate=dilation_rate,\n        depth_multiplier=depth_multiplier,\n        activation=activation,\n        use_bias=use_bias,\n        depthwise_initializer=initializers.get(depthwise_initializer),\n        pointwise_initializer=initializers.get(pointwise_initializer),\n        bias_initializer=initializers.get(bias_initializer),\n        depthwise_regularizer=regularizers.get(depthwise_regularizer),\n        pointwise_regularizer=regularizers.get(pointwise_regularizer),\n        bias_regularizer=regularizers.get(bias_regularizer),\n        activity_regularizer=regularizers.get(activity_regularizer),\n        depthwise_constraint=constraints.get(depthwise_constraint),\n        pointwise_constraint=constraints.get(pointwise_constraint),\n        bias_constraint=constraints.get(bias_constraint),\n        **kwargs\n    )\n\n  def call(self, inputs):\n    # Apply the actual ops.\n    if self.depthwise_quantizer:\n      quantized_depthwise_kernel = self.depthwise_quantizer_internal(\n          self.depthwise_kernel)\n    else:\n      quantized_depthwise_kernel = self.depthwise_kernel\n\n    if self.pointwise_quantizer:\n      quantized_pointwise_kernel = self.pointwise_quantizer_internal(\n          self.pointwise_kernel)\n    else:\n      quantized_pointwise_kernel = self.pointwise_kernel\n\n    outputs = tf.keras.backend.separable_conv2d(\n        inputs,\n        quantized_depthwise_kernel,\n        quantized_pointwise_kernel,\n        strides=self.strides,\n        padding=self.padding,\n        dilation_rate=self.dilation_rate,\n        data_format=self.data_format)\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      outputs = tf.keras.backend.bias_add(\n          outputs,\n          quantized_bias,\n          data_format=self.data_format)\n\n    if self.activation is not None:\n      return self.activation(outputs)\n    return outputs\n\n  def get_config(self):\n    config = {\n        \"depthwise_quantizer\": constraints.serialize(\n            self.depthwise_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"pointwise_quantizer\": constraints.serialize(\n            self.pointwise_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QSeparableConv2D, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.depthwise_kernel, self.pointwise_kernel]\n\n\nclass QDepthwiseConv2D(DepthwiseConv2D, PrunableLayer):\n  \"\"\"Creates quantized depthwise conv2d. Copied from mobilenet.\"\"\"\n\n  # most of these parameters follow the implementation of DepthwiseConv2D\n  # in Keras, # with the exception of depthwise_range, bias_range,\n  # depthwise_quantizer # and bias_quantizer, and kernel_initializer.\n  #\n  # depthwise_quantizer: quantizer function/class for kernel\n  # bias_quantizer: quantizer function/class for bias\n  # depthwise_range/bias_ranger: for quantizer functions whose values\n  #   can go over [-1,+1], these values are used to set the clipping\n  #   value of kernels and biases, respectively, instead of using the\n  #   constraints specified by the user.\n  #\n  # we refer the reader to the documentation of DepthwiseConv2D in Keras for the\n  # other parameters.\n  #\n\n  def __init__(self,\n               kernel_size,\n               strides=(1, 1),\n               padding=\"VALID\",\n               depth_multiplier=1,\n               data_format=None,\n               activation=None,\n               use_bias=True,\n               depthwise_initializer=\"he_normal\",\n               bias_initializer=\"zeros\",\n               depthwise_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               depthwise_constraint=None,\n               bias_constraint=None,\n               dilation_rate=(1, 1),\n               depthwise_quantizer=None,\n               bias_quantizer=None,\n               depthwise_range=None,\n               bias_range=None,\n               **kwargs):\n\n    if depthwise_range is not None:\n      warnings.warn(\"depthwise_range is deprecated in QDepthwiseConv2D layer.\")\n\n    if bias_range is not None:\n      warnings.warn(\"bias_range is deprecated in QDepthwiseConv2D layer.\")\n\n    self.depthwise_range = depthwise_range\n    self.bias_range = bias_range\n\n    self.depthwise_quantizer = depthwise_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.depthwise_quantizer_internal, \"_set_trainable_parameter\"):\n      self.depthwise_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.depthwise_quantizer_internal, self.bias_quantizer_internal\n    ]\n\n    depthwise_constraint, depthwise_initializer = (\n        get_auto_range_constraint_initializer(self.depthwise_quantizer_internal,\n                                              depthwise_constraint,\n                                              depthwise_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        data_format=data_format,\n        activation=activation,\n        use_bias=use_bias,\n        depthwise_regularizer=depthwise_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        depth_multiplier=depth_multiplier,\n        depthwise_initializer=depthwise_initializer,\n        bias_initializer=bias_initializer,\n        depthwise_constraint=depthwise_constraint,\n        bias_constraint=bias_constraint,\n        dilation_rate=dilation_rate,\n        **kwargs\n    )\n\n  def build(self, input_shape):\n    if len(input_shape) < 4:\n      raise ValueError(\n          \"Inputs to `QDepthwiseConv2D` should have rank 4. \"\n          \"Received input shape:\", str(input_shape))\n    if self.data_format == \"channels_first\":\n      channel_axis = 1\n    else:\n      channel_axis = 3\n    if input_shape[channel_axis] is None:\n      raise ValueError(\"The channel dimension of the inputs to \"\n                       \"`QDepthwiseConv2D` \"\n                       \"should be defined. Found `None`.\")\n    input_dim = int(input_shape[channel_axis])\n    depthwise_kernel_shape = (self.kernel_size[0], self.kernel_size[1],\n                              input_dim, self.depth_multiplier)\n\n    self.depthwise_kernel = self.add_weight(\n        shape=depthwise_kernel_shape,\n        initializer=self.depthwise_initializer,\n        name=\"depthwise_kernel\",\n        regularizer=self.depthwise_regularizer,\n        constraint=self.depthwise_constraint)\n\n    if self.use_bias:\n      self.bias = self.add_weight(\n          shape=(input_dim * self.depth_multiplier,),\n          initializer=self.bias_initializer,\n          name=\"bias\",\n          regularizer=self.bias_regularizer,\n          constraint=self.bias_constraint)\n    else:\n      self.bias = None\n    # Set input spec.\n    self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})\n    self.built = True\n\n  def call(self, inputs, training=None):\n    if self.depthwise_quantizer:\n      quantized_depthwise_kernel = (\n          self.depthwise_quantizer_internal(self.depthwise_kernel))\n    else:\n      quantized_depthwise_kernel = self.depthwise_kernel\n    outputs = tf.keras.backend.depthwise_conv2d(\n        inputs,\n        quantized_depthwise_kernel,\n        strides=self.strides,\n        padding=self.padding,\n        dilation_rate=self.dilation_rate,\n        data_format=self.data_format)\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n      outputs = tf.keras.backend.bias_add(\n          outputs, quantized_bias, data_format=self.data_format)\n\n    if self.activation is not None:\n      return self.activation(outputs)\n\n    return outputs\n\n  def get_config(self):\n    config = super(QDepthwiseConv2D, self).get_config()\n    config.pop(\"filters\", None)\n    config.pop(\"kernel_initializer\", None)\n    config.pop(\"kernel_regularizer\", None)\n    config.pop(\"kernel_constraint\", None)\n    config[\"depth_multiplier\"] = self.depth_multiplier\n    config[\"depthwise_initializer\"] = initializers.serialize(\n        self.depthwise_initializer# Google internal code, commented out by copybara\n    )\n    config[\"depthwise_regularizer\"] = regularizers.serialize(\n        self.depthwise_regularizer# Google internal code, commented out by copybara\n    )\n    config[\"depthwise_constraint\"] = constraints.serialize(\n        self.depthwise_constraint# Google internal code, commented out by copybara\n    )\n    config[\"depthwise_quantizer\"] = constraints.serialize(\n        self.depthwise_quantizer_internal# Google internal code, commented out by copybara\n    )\n    config[\"bias_quantizer\"] = constraints.serialize(\n        self.bias_quantizer_internal# Google internal code, commented out by copybara\n    )\n    config[\"depthwise_range\"] = self.depthwise_range\n    config[\"bias_range\"] = self.bias_range\n    return config\n\n  def get_quantization_config(self):\n    return {\n        \"depthwise_quantizer_internal\":\n            str(self.depthwise_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"activation\":\n            str(self.activation),\n        \"filters\" : str(self.filters)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.depthwise_kernel]\n\n\ndef QMobileNetSeparableConv2D(\n    filters,  # pylint: disable=invalid-name\n    kernel_size,\n    strides=(1, 1),\n    padding=\"VALID\",\n    dilation_rate=(1, 1),\n    depth_multiplier=1,\n    activation=None,\n    use_bias=True,\n    depthwise_initializer=\"he_normal\",\n    pointwise_initializer=\"he_normal\",\n    bias_initializer=\"zeros\",\n    depthwise_regularizer=None,\n    pointwise_regularizer=None,\n    bias_regularizer=None,\n    activity_regularizer=None,\n    depthwise_constraint=None,\n    pointwise_constraint=None,\n    bias_constraint=None,\n    depthwise_quantizer=None,\n    pointwise_quantizer=None,\n    bias_quantizer=None,\n    depthwise_activation=None,\n    depthwise_range=None,\n    pointwise_range=None,\n    bias_range=None,\n    depthwise_dropout_rate=0.0,\n    pw_first=False,\n    name=\"\"):\n  \"\"\"Adds a quantized separableconv2d.\"\"\"\n\n  # we use here a modified version that appeared in mobilenet that adds\n  # quantization to the network, and possibly an intermediate activation\n  # layer that acts as a quantizer and possible dropout layer between\n  # the depthwise and pointwise convolutions.\n  #\n  # since this implementation expands into depthwise -> pointwise\n  # convolutions, the users will not see a separable convolution operation\n  # in model.summary(), but rather a depthwise convolution followed by a\n  # pointwise convolution.\n  #\n  # depthwise_quantizer: depthwise quantization function\n  # pointwise_quantizer: pointwise quantization function\n  # bias_quantizer: bias quantization function for the pointwise convolution\n  # depthwise_range/pointwise_range/bias_range: ranges to be used if\n  # quantization values can become greater than -1 and +1.\n  # depthwise_dropout_rate: dropout between depthwise and pointwise is added\n  #   if rate > 0.0\n  # pw_first: this may disappear in the future, but as deep quantized networks\n  #   sometimes behave in different ways, if we are using binary or ternary\n  #   quantization, it may be better to apply pointwise before depthwise.\n  #\n  # For the remaining parameters, please refer to Keras implementation of\n  # SeparableConv2D.\n  #\n\n  def _call(inputs):  # pylint: disable=invalid-name\n    \"\"\"Internally builds qseparableconv2d.\"\"\"\n\n    x = inputs\n\n    if pw_first:\n      x = QConv2D(\n          filters, (1, 1),\n          strides=(1, 1),\n          padding=\"same\",\n          use_bias=use_bias,\n          kernel_constraint=pointwise_constraint,\n          kernel_initializer=pointwise_initializer,\n          kernel_regularizer=pointwise_regularizer,\n          kernel_quantizer=pointwise_quantizer,\n          bias_quantizer=bias_quantizer,\n          bias_regularizer=bias_regularizer,\n          bias_initializer=bias_initializer,\n          bias_constraint=bias_constraint,\n          activity_regularizer=activity_regularizer,\n          kernel_range=pointwise_range,\n          bias_range=bias_range,\n          name=name + \"_pw\")(\n              x)\n\n      if depthwise_activation:\n        if isinstance(depthwise_activation, QActivation):\n          x = depthwise_activation(x)\n        else:\n          x = QActivation(depthwise_activation, name=name + \"_dw_act\")(x)\n\n      if depthwise_dropout_rate > 0.0:\n        x = Dropout(rate=depthwise_dropout_rate, name=name + \"_dw_dropout\")(x)\n\n    x = QDepthwiseConv2D(\n        kernel_size,\n        strides=strides,\n        dilation_rate=dilation_rate,\n        padding=padding,\n        depth_multiplier=depth_multiplier,\n        use_bias=False,\n        depthwise_regularizer=depthwise_regularizer,\n        depthwise_initializer=depthwise_initializer,\n        depthwise_constraint=depthwise_constraint,\n        depthwise_quantizer=depthwise_quantizer,\n        depthwise_range=depthwise_range,\n        name=name + \"_dw\")(\n            x)\n\n    if not pw_first:\n      if depthwise_activation:\n        if isinstance(depthwise_activation, QActivation):\n          x = depthwise_activation(x)\n        else:\n          x = QActivation(depthwise_activation, name=name + \"_dw_act\")(x)\n\n      if depthwise_dropout_rate > 0.0:\n        x = Dropout(rate=depthwise_dropout_rate, name=name + \"_dw_dropout\")(x)\n\n      x = QConv2D(\n          filters, (1, 1),\n          strides=(1, 1),\n          padding=\"same\",\n          use_bias=use_bias,\n          kernel_constraint=pointwise_constraint,\n          kernel_initializer=pointwise_initializer,\n          kernel_regularizer=pointwise_regularizer,\n          kernel_quantizer=pointwise_quantizer,\n          bias_quantizer=bias_quantizer,\n          bias_regularizer=bias_regularizer,\n          bias_initializer=bias_initializer,\n          bias_constraint=bias_constraint,\n          activity_regularizer=activity_regularizer,\n          kernel_range=pointwise_range,\n          bias_range=bias_range,\n          name=name + \"_pw\")(\n              x)\n\n    if activation:\n      if isinstance(activation, QActivation):\n        x = activation(x)\n      else:\n        x = Activation(activation, name=name + \"_pw_act\")(x)\n    return x\n\n  return _call\n"
  },
  {
    "path": "qkeras/qdepthwise_conv2d_transpose.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\nimport tensorflow as tf\nfrom tensorflow.keras.layers import Conv2DTranspose\nfrom tensorflow.keras.layers import InputSpec\n\nfrom .qconvolutional import deconv_output_length\nfrom .quantizers import get_quantizer\nfrom tensorflow.python.eager import context\nfrom tensorflow.python.keras import constraints\nfrom tensorflow.python.ops import array_ops\nfrom tensorflow.python.ops import array_ops\n\n\n# TODO(akshayap): Commonized functionality with QSeparableConv2DTranspose.\nclass QDepthwiseConv2DTranspose(Conv2DTranspose):\n  \"\"\"Quantized Depthwise Conv2DTranspose layer.\"\"\"\n\n  # Most of these parameters follow the implementation of Conv2DTranspose\n  # in Keras, with the exception of following parameters.\n  #\n  # depthwise_activation: activation quantizer for depthwise convolution\n  # depthwise_kernel_quantizer: quantizer function/class for depthwise kernel\n  # bias_quantizer: quantizer function/class for bias\n  #\n  # we refer the reader to the documentation of Conv2DTranspose in Keras for\n  # the other parameters.\n\n  def __init__(\n      self,\n      filters,\n      kernel_size,\n      group_size=1,\n      strides=(1, 1),\n      padding=\"valid\",\n      output_padding=None,\n      depth_multiplier=1,\n      depthwise_activation=None,\n      use_bias=True,\n      depthwise_kernel_quantizer=None,\n      bias_quantizer=None,\n      **kwargs,\n  ):\n\n    self.filters = filters\n    self.kernel_size = kernel_size\n    self.strides = strides\n    self.padding = padding\n    self.output_padding = output_padding\n    self.depth_multiplier = depth_multiplier\n    self.depthwise_activation = depthwise_activation\n    self.use_bias = use_bias\n    self.group_size = group_size\n\n    self.depthwise_kernel_quantizer = depthwise_kernel_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.depthwise_kernel_quantizer_internal = get_quantizer(\n        self.depthwise_kernel_quantizer\n    )\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    for q in [\n        self.depthwise_kernel_quantizer_internal,\n    ]:\n      if hasattr(q, \"_set_trainable_parameter\"):\n        q._set_trainable_parameter()\n\n    if depthwise_activation is not None:\n      self.depthwise_activation = get_quantizer(depthwise_activation)\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        use_bias=use_bias,\n        **kwargs,\n    )\n\n  def _get_input_axis(self):\n    if self.data_format == \"channels_first\":\n      b_axis, c_axis, h_axis, w_axis = 0, 1, 2, 3\n    else:\n      b_axis, c_axis, h_axis, w_axis = 0, 3, 1, 2\n\n    return b_axis, c_axis, h_axis, w_axis\n\n  def _get_input_dims(self, input_shape):\n    b_axis, c_axis, h_axis, w_axis = self._get_input_axis()\n\n    return (\n        input_shape[b_axis],\n        input_shape[c_axis],\n        input_shape[h_axis],\n        input_shape[w_axis],\n    )\n\n  def _get_output_size(\n      self,\n      inputs,\n      output_padding,\n      padding,\n      strides,\n      dilation_rate,\n      kernel_h,\n      kernel_w,\n  ):\n    input_shape = array_ops.shape(inputs)\n    batch_size, _, height, width = self._get_input_dims(input_shape)\n    stride_h, stride_w = strides\n\n    dilation_h, dilation_w = dilation_rate[0], dilation_rate[1]\n\n    if output_padding is None:\n      out_pad_h = out_pad_w = None\n    else:\n      out_pad_h, out_pad_w = output_padding\n\n    # Infer the dynamic output shape:\n    out_height = deconv_output_length(\n        height,\n        kernel_h,\n        padding=padding,\n        output_padding=out_pad_h,\n        stride=stride_h,\n        dilation=dilation_h,\n    )\n\n    out_width = deconv_output_length(\n        width,\n        kernel_w,\n        padding=padding,\n        output_padding=out_pad_w,\n        stride=stride_w,\n        dilation=dilation_w,\n    )\n\n    return (batch_size, out_height, out_width)\n\n  def build(self, input_shape):\n    self._input_shape = input_shape\n\n    _, input_channel, _, _ = self._get_input_dims(input_shape)\n    channel_axis = self._get_input_axis()[1]\n\n    self.input_spec = InputSpec(\n        min_ndim=self.rank + 2, axes={channel_axis: input_channel}\n    )\n    # When setting kernel shape=(kw, kh, 1, input_channel), it does depthwise\n    # convolution.\n    depthwise_kernel_shape = self.kernel_size + (\n        input_channel,\n        self.group_size,\n    )\n\n    self.depthwise_kernel = self.add_weight(\n        name=f\"depthwise_kernel\",\n        shape=depthwise_kernel_shape,\n        initializer=self.kernel_initializer,\n        regularizer=self.kernel_regularizer,\n        constraint=self.kernel_constraint,\n        trainable=True,\n        dtype=self.dtype,\n    )\n\n    if self.use_bias:\n      self.bias = self.add_weight(\n          name=\"bias\",\n          shape=(self.filters,),\n          initializer=self.bias_initializer,\n          regularizer=self.bias_regularizer,\n          constraint=self.bias_constraint,\n          trainable=True,\n          dtype=self.dtype,\n      )\n    else:\n      self.bias = None\n\n    self.built = True\n\n  def compute_final_output_shape(self, input_shape, kernel_size, strides):\n    input_shape = tf.TensorShape(input_shape).as_list()\n    # By using list(), output_shape is a copy of input_shape, instead of a\n    # reference to input_shape.\n    output_shape = list(input_shape)\n    _, c_axis, h_axis, w_axis = self._get_input_axis()\n\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = strides\n\n    if self.output_padding is None:\n      out_pad_h = out_pad_w = None\n    else:\n      out_pad_h, out_pad_w = self.output_padding\n\n    # Convolution is performed separately on each spatial domain.\n    output_shape[c_axis] = input_shape[c_axis]\n\n    output_shape[h_axis] = deconv_output_length(\n        output_shape[h_axis],\n        kernel_h,\n        padding=self.padding,\n        output_padding=out_pad_h,\n        stride=stride_h,\n        dilation=self.dilation_rate[0],\n    )\n    output_shape[w_axis] = deconv_output_length(\n        output_shape[w_axis],\n        kernel_w,\n        padding=self.padding,\n        output_padding=out_pad_w,\n        stride=stride_w,\n        dilation=self.dilation_rate[1],\n    )\n    return tf.TensorShape(output_shape)\n\n  def conv_transpose_op(\n      self,\n      inputs,\n      filters,\n      strides,\n      padding,\n      output_padding,\n      dilation_rate,\n      kernel_quantizer,\n      kernel_weights,\n      use_bias,\n      bias_quantizer,\n      bias,\n      activation,\n  ):\n    \"\"\"Transpose convolution operation.\"\"\"\n\n    kernel_h, kernel_w = self.kernel_size\n    batch_size, out_height, out_width = self._get_output_size(\n        inputs,\n        output_padding,\n        padding,\n        strides,\n        dilation_rate,\n        kernel_h,\n        kernel_w,\n    )\n\n    if kernel_quantizer:\n      quantized_kernel = kernel_quantizer(kernel_weights)\n    else:\n      quantized_kernel = kernel_weights\n\n    output_filters = self.group_size\n\n    if self.data_format == \"channels_first\":\n      output_shape = (batch_size, output_filters, out_height, out_width)\n    else:\n      output_shape = (batch_size, out_height, out_width, output_filters)\n\n    output_shape_tensor = array_ops.stack(output_shape)\n\n    num_input_channels = self._input_shape[-1]\n    if num_input_channels % self.group_size:\n      raise ValueError(\n          \"Input channels should be exactly divisible by group_size.\"\n      )\n    num_output_groups = num_input_channels // self.group_size\n\n    # Split the input channels into groups.\n    x = tf.split(inputs, num_output_groups, axis=-1)\n\n    # For depthwise convolution, since CPU doesn't support grouped\n    # convolution, we run convolution on each slice of inputs and concat\n    # the results.\n    outputs = [\n        tf.keras.backend.conv2d_transpose(\n            x=x[i],\n            kernel=quantized_kernel[\n                :,\n                :,\n                self.group_size * i : self.group_size * (i + 1),\n                :,\n            ],\n            output_shape=output_shape_tensor,\n            strides=strides,\n            padding=padding,\n            data_format=self.data_format,\n            dilation_rate=dilation_rate,\n        )\n        for i in range(num_output_groups)\n    ]\n\n    # Concat the channels.\n    outputs = tf.concat(outputs, axis=-1)\n\n    if not context.executing_eagerly():\n      # Infer the static output shape:\n      out_shape = self.compute_final_output_shape(\n          input_shape=inputs.shape,\n          kernel_size=(kernel_h, kernel_w),\n          strides=strides,\n      )\n      outputs.set_shape(out_shape)\n\n    if use_bias:\n      quantized_bias = bias_quantizer(bias) if bias_quantizer else bias\n      outputs = tf.keras.backend.bias_add(\n          outputs, quantized_bias, data_format=self.data_format\n      )\n\n    if activation is not None:\n      return activation(outputs)\n\n    return outputs\n\n  def call(self, inputs):\n    input_shape = array_ops.shape(inputs)\n    _, input_channel, _, _ = self._get_input_dims(input_shape)\n\n    return self.conv_transpose_op(\n        inputs=inputs,\n        # Depthwise convolution doesn't operate across channels. Thereofore its\n        # output channels is the same as input channels.\n        filters=input_channel,\n        strides=self.strides,\n        padding=self.padding,\n        output_padding=self.output_padding,\n        dilation_rate=self.dilation_rate,\n        kernel_quantizer=self.depthwise_kernel_quantizer_internal,\n        kernel_weights=self.depthwise_kernel,\n        use_bias=False,  # Usually set bias=False for depthwise conv.\n        bias_quantizer=None,\n        bias=None,\n        activation=self.depthwise_activation,\n    )\n\n  def get_config(self):\n    config = super().get_config()\n    config.update({\n        \"filters\": self.filters,\n        \"kernel_size\": self.kernel_size,\n        \"strides\": self.strides,\n        \"padding\": self.padding,\n        \"output_padding\": self.output_padding,\n        \"dilation_rate\": self.dilation_rate,\n        \"data_format\": self.data_format,\n        \"depth_multiplier\": self.depth_multiplier,\n        \"activation\": self.activation,\n        \"use_bias\": self.use_bias,\n        \"depthwise_kernel_quantizer\": constraints.serialize(\n            self.depthwise_kernel_quantizer_internal\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal,\n        ),\n        \"group_size\": self.group_size,\n    })\n    return config\n\n  def get_quantizers(self):\n    return [\n        self.depthwise_kernel_quantizer_internal,\n        self.bias_quantizer_internal,\n        self.depthwise_activation,\n    ]\n\n  def get_prunable_weights(self):\n    w = [self.depthwise_kernel]\n    if self.use_bias:\n      w.append(self.bias)\n\n    return w\n"
  },
  {
    "path": "qkeras/qdepthwiseconv2d_batchnorm.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Fold batchnormalization with previous QDepthwiseConv2D layers.\"\"\"\n\nimport tensorflow as tf\nfrom tensorflow.keras import layers\nfrom tensorflow.keras.models import Model\n\nfrom .qconvolutional import QDepthwiseConv2D\nfrom .quantizers import *\nfrom tensorflow.python.framework import smart_cond as tf_utils\nfrom tensorflow.python.ops import math_ops\nfrom tensorflow.python.ops import array_ops\n\ntf.compat.v2.enable_v2_behavior()\n\n\nclass QDepthwiseConv2DBatchnorm(QDepthwiseConv2D):\n  \"\"\"Fold batchnormalization with a previous QDepthwiseConv2d layer.\"\"\"\n\n  def __init__(\n      self,\n      # QDepthwiseConv2d params\n      kernel_size,\n      strides=(1, 1),\n      padding=\"VALID\",\n      depth_multiplier=1,\n      data_format=None,\n      activation=None,\n      use_bias=True,\n      depthwise_initializer=\"he_normal\",\n      bias_initializer=\"zeros\",\n      depthwise_regularizer=None,\n      bias_regularizer=None,\n      activity_regularizer=None,\n      depthwise_constraint=None,\n      bias_constraint=None,\n      dilation_rate=(1, 1),\n      depthwise_quantizer=None,\n      bias_quantizer=None,\n      depthwise_range=None,\n      bias_range=None,\n\n      # batchnorm params\n      axis=-1,\n      momentum=0.99,\n      epsilon=0.001,\n      center=True,\n      scale=True,\n      beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\",\n      moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\",\n      beta_regularizer=None,\n      gamma_regularizer=None,\n      beta_constraint=None,\n      gamma_constraint=None,\n      renorm=False,\n      renorm_clipping=None,\n      renorm_momentum=0.99,\n      fused=None,\n      trainable=True,\n      virtual_batch_size=None,\n      adjustment=None,\n\n      # other params\n      ema_freeze_delay=None,\n      folding_mode=\"ema_stats_folding\",\n      **kwargs):\n\n    \"\"\"A composite layer that folds depthwiseconv2d and batch normalization.\n\n    The first group of parameters correponds to the initialization parameters\n      of a QDepthwiseConv2d layer. check qkeras.qconvolutional.QDepthwiseConv2D\n      for details.\n\n    The 2nd group of parameters corresponds to the initialization parameters\n      of a BatchNormalization layer. Check keras.layers.normalization.BatchNorma\n      lizationBase for details.\n\n    The 3rd group of parameters corresponds to the initialization parameters\n      specific to this class.\n\n      ema_freeze_delay: int or None. number of steps before batch normalization\n        mv_mean and mv_variance will be frozen and used in the folded layer.\n      folding_mode: string\n        \"ema_stats_folding\": mimic tflite which uses the ema statistics to\n          fold the kernel to suppress quantization induced jitter then performs\n          the correction to have a similar effect of using the current batch\n          statistics.\n        \"batch_stats_folding\": use batch mean and variance to fold kernel first;\n          after enough training steps switch to moving_mean and moving_variance\n          for kernel folding.\n    \"\"\"\n\n    # intialization the QDepthwiseConv2d part of the composite layer\n    super().__init__(\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        depth_multiplier=depth_multiplier,\n        data_format=data_format,\n        activation=activation,\n        use_bias=use_bias,\n        depthwise_initializer=depthwise_initializer,\n        bias_initializer=bias_initializer,\n        depthwise_regularizer=depthwise_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        depthwise_constraint=depthwise_constraint,\n        bias_constraint=bias_constraint,\n        dilation_rate=dilation_rate,\n        depthwise_quantizer=depthwise_quantizer,\n        bias_quantizer=bias_quantizer,\n        depthwise_range=depthwise_range,\n        bias_range=bias_range,\n        **kwargs\n    )\n\n    # initialization of batchnorm part of the composite layer\n    self.batchnorm = layers.BatchNormalization(\n        axis=axis, momentum=momentum, epsilon=epsilon, center=center,\n        scale=scale, beta_initializer=beta_initializer,\n        gamma_initializer=gamma_initializer,\n        moving_mean_initializer=moving_mean_initializer,\n        moving_variance_initializer=moving_variance_initializer,\n        beta_regularizer=beta_regularizer,\n        gamma_regularizer=gamma_regularizer,\n        beta_constraint=beta_constraint, gamma_constraint=gamma_constraint,\n        renorm=renorm, renorm_clipping=renorm_clipping, \n        renorm_momentum=renorm_momentum, fused=fused, trainable=trainable,\n        virtual_batch_size=virtual_batch_size, adjustment=adjustment)\n\n    self.ema_freeze_delay = ema_freeze_delay\n    assert folding_mode in [\"ema_stats_folding\", \"batch_stats_folding\"]\n    self.folding_mode = folding_mode\n\n  def build(self, input_shape):\n    super(QDepthwiseConv2DBatchnorm, self).build(input_shape)\n\n    # If start training from scratch, self._iteration (i.e., training_steps)\n    # is initialized with -1. When loading ckpt, it can load the number of\n    # training steps that have been previously trainied.\n    # TODO(lishanok): develop a way to count iterations outside layer\n    self._iteration = tf.Variable(-1, trainable=False, name=\"iteration\",\n                                  dtype=tf.int64)\n\n  def call(self, inputs, training=None):\n\n    # numpy value, mark the layer is in training\n    training = self.batchnorm._get_training_value(training)  # pylint: disable=protected-access\n\n    # checking if to update batchnorm params\n    if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0):\n      # if ema_freeze_delay is None or a negative value, do not freeze bn stats\n      bn_training = tf.cast(training, dtype=bool)\n    else:\n      bn_training = tf.math.logical_and(training, tf.math.less_equal(\n          self._iteration, self.ema_freeze_delay))\n\n    depthwise_kernel = self.depthwise_kernel\n\n    # run depthwise_conv2d to produce output for the following batchnorm\n    conv_outputs = tf.keras.backend.depthwise_conv2d(\n        inputs,\n        depthwise_kernel,\n        strides=self.strides,\n        padding=self.padding,\n        dilation_rate=self.dilation_rate,\n        data_format=self.data_format)\n\n    if self.use_bias:\n      bias = self.bias\n      conv_outputs = tf.keras.backend.bias_add(\n          conv_outputs, bias, data_format=self.data_format)\n    else:\n      bias = 0\n\n    _ = self.batchnorm(conv_outputs, training=bn_training)\n\n    self._iteration.assign_add(tf_utils.smart_cond(\n        training, lambda: tf.constant(1, tf.int64),\n        lambda: tf.constant(0, tf.int64)))\n\n    # calcuate mean and variance from current batch\n    bn_shape = conv_outputs.shape\n    ndims = len(bn_shape)\n    reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis]\n    keep_dims = len(self.batchnorm.axis) > 1\n    mean, variance = self.batchnorm._moments(  # pylint: disable=protected-access\n        math_ops.cast(conv_outputs, self.batchnorm._param_dtype),  # pylint: disable=protected-access\n        reduction_axes,\n        keep_dims=keep_dims)\n    gamma = self.batchnorm.gamma\n    beta = self.batchnorm.beta\n    moving_mean = self.batchnorm.moving_mean\n    moving_variance = self.batchnorm.moving_variance\n\n    if self.folding_mode not in [\"batch_stats_folding\", \"ema_stats_folding\"]:\n      assert ValueError(\"mode {} not supported!\".format(self.folding_mode))\n\n    mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)\n    batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)\n\n    if gamma is not None:\n      mv_inv *= gamma\n      batch_inv *= gamma\n\n    folded_bias = tf_utils.smart_cond(\n        bn_training,\n        lambda: batch_inv * (bias - mean) + beta,\n        lambda: mv_inv * (bias - moving_mean) + beta)\n\n    if self.folding_mode == \"batch_stats_folding\":\n      # using batch mean and variance in the initial training stage\n      # after sufficient training, switch to moving mean and variance\n      inv = tf_utils.smart_cond(bn_training, lambda: batch_inv, lambda: mv_inv)\n\n    elif self.folding_mode == \"ema_stats_folding\":\n      # We always scale the weights with a correction factor to the long term\n      # statistics prior to quantization. This ensures that there is no jitter\n      # in the quantized weights due to batch to batch variation. During the\n      # initial phase of training, we undo the scaling of the weights so that\n      # outputs are identical to regular batch normalization. We also modify\n      # the bias terms correspondingly. After sufficient training, switch from\n      # using batch statistics to long term moving averages for batch\n      # normalization.\n\n      # use batch stats for calcuating bias before bn freeze, and use moving\n      # stats after bn freeze\n\n      # moving stats is always used to fold kernel in tflite; before bn freeze\n      # an additional correction factor will be applied to the depthwiseconv2d\n      # output\n      inv = mv_inv\n\n    # for DepthwiseConv2D inv needs to be broadcasted to the last 2 dimensions\n    # of the kernels\n    depthwise_weights_shape = [\n        depthwise_kernel.get_shape().as_list()[2],\n        depthwise_kernel.get_shape().as_list()[3]\n    ]\n    inv = array_ops.reshape(inv, depthwise_weights_shape)\n    # wrap conv kernel with bn parameters\n    folded_depthwise_kernel = inv * depthwise_kernel\n    # quantize the folded kernel\n    if self.depthwise_quantizer is not None:\n      q_folded_depthwise_kernel = self.depthwise_quantizer_internal(\n          folded_depthwise_kernel)\n    else:\n      q_folded_depthwise_kernel = folded_depthwise_kernel\n\n    # If loaded from a ckpt, bias_quantizer is the ckpt value\n    # Else if bias_quantizer not specified, bias\n    #   quantizer is None and we need to calculate bias quantizer\n    #   type according to accumulator type. User can call\n    #   bn_folding_utils.populate_bias_quantizer_for_folded_layers(\n    #      model, input_quantizer_list]) to populate such bias quantizer.\n    if self.bias_quantizer is not None:\n      q_folded_bias = self.bias_quantizer_internal(folded_bias)\n    else:\n      q_folded_bias = folded_bias\n\n    applied_kernel = q_folded_depthwise_kernel\n    applied_bias = q_folded_bias\n\n    # calculate depthwise_conv2d output using the quantized folded kernel\n    folded_outputs = tf.keras.backend.depthwise_conv2d(\n        inputs,\n        applied_kernel,\n        strides=self.strides,\n        padding=self.padding,\n        dilation_rate=self.dilation_rate,\n        data_format=self.data_format)\n\n    if training is True and self.folding_mode == \"ema_stats_folding\":\n      batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)\n      y_corr = tf_utils.smart_cond(\n          bn_training,\n          lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) *\n                   math_ops.rsqrt(variance + self.batchnorm.epsilon)),\n          lambda: tf.constant(1.0, shape=moving_variance.shape))\n      folded_outputs = math_ops.mul(folded_outputs, y_corr)\n\n    folded_outputs = tf.keras.backend.bias_add(\n        folded_outputs,\n        applied_bias,\n        data_format=self.data_format)\n\n    if self.activation is not None:\n      return self.activation(folded_outputs)\n\n    return folded_outputs\n\n  def get_config(self):\n    base_config = super().get_config()\n    bn_config = self.batchnorm.get_config()\n    config = {\"ema_freeze_delay\": self.ema_freeze_delay,\n              \"folding_mode\": self.folding_mode}\n    name = base_config[\"name\"]\n    out_config = dict(\n        list(base_config.items())\n        + list(bn_config.items()) + list(config.items()))\n\n    # names from different config override each other; use the base layer name\n    # as the this layer's config name\n    out_config[\"name\"] = name\n    return out_config\n\n  def get_quantization_config(self):\n    return {\n        \"depthwise_quantizer\": str(self.depthwise_quantizer_internal),\n        \"bias_quantizer\": str(self.bias_quantizer_internal),\n        \"activation\": str(self.activation),\n        \"filters\": str(self.filters)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_folded_weights(self):\n    \"\"\"Function to get the batchnorm folded weights.\n\n    This function converts the weights by folding batchnorm parameters into\n    the weight of QDepthwiseConv2d. The high-level equation:\n\n    W_fold = gamma * W / sqrt(variance + epsilon)\n    bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta\n    \"\"\"\n\n    depthwise_kernel = self.depthwise_kernel\n\n    if self.use_bias:\n      bias = self.bias\n    else:\n      bias = 0\n\n    # get Batchnorm stats\n    gamma = self.batchnorm.gamma\n    beta = self.batchnorm.beta\n    moving_mean = self.batchnorm.moving_mean\n    moving_variance = self.batchnorm.moving_variance\n\n    # get the inversion factor so that we replace division by multiplication\n    inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)\n    if gamma is not None:\n      inv *= gamma\n    # fold bias with bn stats\n    folded_bias = inv * (bias - moving_mean) + beta\n\n    # for DepthwiseConv2D inv needs to be broadcasted to the last 2 dimensions\n    # of the kernels\n    depthwise_weights_shape = [\n        depthwise_kernel.get_shape().as_list()[2],\n        depthwise_kernel.get_shape().as_list()[3]\n    ]\n    inv = array_ops.reshape(inv, depthwise_weights_shape)\n    # wrap conv kernel with bn parameters\n    folded_depthwise_kernel = inv * depthwise_kernel\n\n    return [folded_depthwise_kernel, folded_bias]\n"
  },
  {
    "path": "qkeras/qlayers.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# ==============================================================================\n\"\"\"Definition of quantization package.\"\"\"\n\n# Some parts of the code were adapted from\n#\n# https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\n#\n# \"Copyright (c) 2017, Bert Moons\" where it applies\n#\n# and were implemented following several papers.\n#\n#    https://arxiv.org/pdf/1609.07061.pdf\n#    https://arxiv.org/abs/1602.02830\n#    https://arxiv.org/abs/1603.05279\n#    https://arxiv.org/abs/1605.04711\n#    https://ieeexplore.ieee.org/abstract/document/6986082\n#    https://ieeexplore.ieee.org/iel4/78/5934/00229903.pdf\n#\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport sys\nimport warnings\n\nimport numpy as np\nimport six\nimport tensorflow.compat.v2 as tf\nfrom tensorflow.keras import activations\nfrom tensorflow.keras import constraints\nfrom tensorflow.keras import initializers\nfrom tensorflow.keras import regularizers\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.constraints import Constraint\nfrom tensorflow.keras.initializers import Initializer\nfrom tensorflow.keras.layers import Dense\nfrom tensorflow.keras.layers import Layer\nfrom tensorflow.python.framework import smart_cond as tf_utils\n\nfrom .quantizers import *\nfrom .quantizers import _get_integer_bits\nfrom .quantizers import get_quantizer\nfrom tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer\n\n\ndef get_auto_range_constraint_initializer(quantizer, constraint, initializer):\n  \"\"\"Get value range automatically for quantizer.\n\n  Arguments:\n   quantizer: A quantizer class in quantizers.py.\n   constraint: A tf.keras constraint.\n   initializer: A tf.keras initializer.\n\n  Returns:\n    a tuple (constraint, initializer), where\n      constraint is clipped by Clip class in this file, based on the\n      value range of quantizer.\n      initializer is initializer contraint by value range of quantizer.\n  \"\"\"\n  if quantizer is not None:\n    constraint = get_constraint(constraint, quantizer)\n    initializer = get_initializer(initializer)\n\n    if initializer and initializer.__class__.__name__ not in [\"Ones\", \"Zeros\", 'QInitializer']:\n      # we want to get the max value of the quantizer that depends\n      # on the distribution and scale\n      if not (hasattr(quantizer, \"alpha\") and\n              isinstance(quantizer.alpha, six.string_types)):\n        initializer = QInitializer(\n            initializer, use_scale=True, quantizer=quantizer)\n  return constraint, initializer\n\n\nclass QInitializer(Initializer):\n  \"\"\"Wraps around Keras initializer to provide a fanin scaling factor.\"\"\"\n\n  def __init__(self, initializer, use_scale, quantizer):\n    self.initializer = initializer\n    self.use_scale = use_scale\n    self.quantizer = quantizer\n\n    try:\n      self.is_po2 = \"po2\" in quantizer.__class__.__name__\n    except:\n      self.is_po2 = False\n\n  def __call__(self, shape, dtype=None):\n    x = self.initializer(shape, dtype)\n\n    max_x = np.max(abs(x))\n    std_x = np.std(x)\n    delta = self.quantizer.max() * 2**-self.quantizer.bits\n\n    # delta is the minimum resolution of the number system.\n    # we want to make sure we have enough values.\n    if delta > std_x and hasattr(self.initializer, \"scale\"):\n      q = self.quantizer(x)\n      max_q = np.max(abs(q))\n      scale = 1.0\n      if max_q == 0.0:\n        xx = np.mean(x * x)\n        scale = self.quantizer.max() / np.sqrt(xx)\n      else:\n        qx = np.sum(q * x)\n        qq = np.sum(q * q)\n\n        scale = qq / qx\n\n      self.initializer.scale *= max(scale, 1)\n      x = self.initializer(shape, dtype)\n\n    return np.clip(x, -self.quantizer.max(), self.quantizer.max())\n\n  def get_config(self):\n    return {\n        \"initializer\": self.initializer,\n        \"use_scale\": self.use_scale,\n        \"quantizer\": self.quantizer,\n    }\n\n  @classmethod\n  def from_config(cls, config):\n    config = {\n      'initializer' : get_initializer(config['initializer']),\n      'use_scale'   : config['use_scale'],\n      'quantizer'   : get_quantizer(config['quantizer'])}\n    return cls(**config)\n\n#\n# Because it may be hard to get serialization from activation functions,\n# we may be replacing their instantiation by QActivation in the future.\n#\n\n\nclass QActivation(Layer, PrunableLayer):\n  \"\"\"Implements quantized activation layers.\"\"\"\n\n  # TODO(lishanok): Implement activation type conversion outside of the class.\n  # When caller calls the initializer, it should convert string to a quantizer\n  # object if string is given as activation.\n  def __init__(self, activation, **kwargs):\n\n    super().__init__(**kwargs)\n\n    self.activation = activation\n\n    if not isinstance(activation, six.string_types):\n      self.quantizer = activation\n      if hasattr(self.quantizer, \"__name__\"):\n        self.__name__ = self.quantizer.__name__\n      elif hasattr(self.quantizer, \"name\"):\n        self.__name__ = self.quantizer.name\n      elif hasattr(self.quantizer, \"__class__\"):\n        self.__name__ = self.quantizer.__class__.__name__\n      return\n\n    self.__name__ = activation\n\n    try:\n      self.quantizer = get_quantizer(activation)\n    except KeyError:\n      raise ValueError(\"invalid activation '{}'\".format(activation))\n\n  def call(self, inputs):\n    return self.quantizer(inputs)\n\n  def get_config(self):\n    config = {\"activation\": self.activation}\n    base_config = super(QActivation, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  @classmethod\n  def from_config(cls, config):\n    try:\n      if isinstance(config[\"activation\"], dict):\n        # If config[\"activation\"] is serialized, it would be a dict.\n        # Otherwise, it will be either string or quantizer object, which\n        # doesn't require deserialization.\n        config[\"activation\"] = activations.deserialize(config[\"activation\"])\n      return cls(**config)\n\n    except Exception as e:\n      raise TypeError(\n          f\"Error when deserializing class '{cls.__name__}' using \"\n          f\"config={config}.\\n\\nException encountered: {e}\"\n      )\n\n  def get_quantization_config(self):\n    return str(self.activation)\n\n  def compute_output_shape(self, input_shape):\n    return input_shape\n\n  def get_prunable_weights(self):\n    return []\n\n\nclass QAdaptiveActivation(Layer, PrunableLayer):\n  \"\"\"[EXPERIMENTAL] Implements an adaptive quantized activation layer using EMA.\n\n  This layer calculates an exponential moving average of min and max of the\n  activation values to automatically determine the scale (integer bits) of\n  the quantizer used in this layer.\n  \"\"\"\n\n  def __init__(self,\n               activation,\n               total_bits,\n               current_step=None,\n               symmetric=True,\n               quantization_delay=0,\n               ema_freeze_delay=None,\n               ema_decay=0.9999,\n               per_channel=False,\n               po2_rounding=False,\n               relu_neg_slope=0.0,\n               relu_upper_bound=None,\n               **kwargs):\n    \"\"\"Initializes this QAdaptiveActivation layer.\n\n    Args:\n      activation: Str. The activation quantizer type to use for this activation\n        layer, such as 'quantized_relu'. Should be a string with no params.\n      total_bits: Int. The total bits that can be used by the quantizer\n      current_step: tf.Variable specifying the current step in training.\n        You can find this by passing model.optimizer.iterations\n        (see tf.keras.optimizers.Optimizer.iterations). If set to None, the\n        layer will attempt to estimate the current step itself, but please note\n        that this number may not always match the optimizer step.\n      symmetric: Bool. If to enforce symmetry about the origin in the quantized\n        bit representation of the value. When using linear activation, this\n        should be True for best results.\n      quantization_delay: Int. How many training steps to wait until quantizing\n        the activation values.\n      ema_freeze_delay: Int. Steps to wait until stopping the update of the\n        exponential moving average values. Set to None for an infinite delay.\n      ema_decay: Float. The decay value used for exponential moving average (see\n        tf.keras.backend.moving_average_update)\n      per_channel: Bool. If to quantize the activation values on a\n        per-channel basis.\n      po2_rounding: Bool. If true, the EMA max value is rounded to the nearest\n        power-of-2. If false, the EMA max value is rounded up (with ceil) to a\n        power-of-two. These power-of-two operations are necessary to calculate\n        the number of integer bits used in the quantizer, and the difference\n        between using round and ceil trade off the quantizer's range and\n        precision.\n      relu_neg_slope: Float. Slope of the negative values in relu to enable the\n        use of leaky relu. This parameter will only be used with the quantizer\n        type quantized_relu. Set to 0.0 to use normal relu.\n      relu_upper_bound: Float. The upper bound to use if the activation is set\n        to relu. Set to None to not artificially set an upper bound. Pease note\n        that this param is ignored if the activation is not quantized_relu\n      **kwargs: Args passed to the Layer class.\n    \"\"\"\n    super().__init__(**kwargs)\n\n    self.total_bits = total_bits\n    self.symmetric = symmetric\n    self.is_estimating_step_count = False  # If the layer should estimate its\n    # own step count by incrementing it\n    # every call.\n    if isinstance(current_step, tf.Variable):\n      self.step = current_step\n    elif current_step is None:\n      self.step = tf.Variable(-1, dtype=tf.int64)\n      self.is_estimating_step_count = True\n      print(\"[WARNING] QAdaptiveActivation is estimating it's own training \"\n            \"step count, which may not always be the same as the true optimizer\"\n            \" training step. To mitigate this, please set the current_step \"\n            \"parameter when initializing QAdaptiveActivation\", file=sys.stderr)\n    else:\n      self.step = tf.Variable(current_step, dtype=tf.int64)\n      print(\"[WARNING] QAdaptiveActivation is disconnected from the optimizer \"\n            \"current step, which may lead to incorrect training. If you wish to\"\n            \" resume training, set this layer's self.step to the optimizer's \"\n            \"tf.Variable current step\", file=sys.stderr)\n    self.quantization_delay = quantization_delay\n    self.ema_freeze_delay = ema_freeze_delay\n    self.will_ema_freeze = True if ema_freeze_delay else False\n    self.ema_decay = ema_decay\n    self.per_channel = per_channel\n    self.po2_rounding = po2_rounding\n    self.ema_min = None\n    self.ema_max = None\n    self.relu_neg_slope = relu_neg_slope\n    self.relu_upper_bound = relu_upper_bound\n\n    # Verify quantizer type is correct\n    self.supported_quantizers = [\"quantized_bits\", \"quantized_relu\"]\n    if activation not in self.supported_quantizers:\n      raise ValueError((\"Invalid activation {}. Activation quantizer may NOT \"\n                        \"contain any parameters (they will be set automatically\"\n                        \" by this layer), and only the quantizer types {} are \"\n                        \"supported.\").format(activation,\n                                             self.supported_quantizers))\n\n    # Get the quantizer associated with the activation\n    try:\n      self.quantizer = get_quantizer(activation)\n    except KeyError:\n      raise ValueError(\"Invalid activation '{}'\".format(activation))\n\n    # Check that the quantizer is supported\n    if self.quantizer.__class__.__name__ not in self.supported_quantizers:\n      raise ValueError(\"Unsupported activation quantizer '{}'\".format(\n          self.quantizer.__class__.__name__))\n\n    # Set keep_negative\n    if self.quantizer.__class__.__name__ == \"quantized_relu\":\n      self.quantizer.is_quantized_clip = False  # Use relu_upper_bound instead\n      if self.relu_upper_bound:\n        self.quantizer.relu_upper_bound = self.relu_upper_bound\n      self.quantizer.negative_slope = relu_neg_slope\n      self.keep_negative = relu_neg_slope != 0.0\n      self.quantizer.is_quantized_clip = False  # Use normal relu when qnoise=0\n    elif self.quantizer.__class__.__name__ == \"quantized_bits\":\n      self.keep_negative = True\n      self.quantizer.keep_negative = True\n\n    # If not using quantization delay, then print warning\n    if self.quantization_delay < 1:\n      print(\"[WARNING] If QAdaptiveActivation has the quantization_delay set \"\n            \"to 0, then the moving averages will be heavily biased towards the \"\n            \"initial quantizer configuration, which will likely prevent the \"\n            \"model from converging. Consider a larger quantization_delay.\",\n            file=sys.stderr)\n\n    self.activation = self.quantizer  # self.activation is used by QTools\n\n  def build(self, input_shape):\n    if self.will_ema_freeze:\n      self.ema_freeze_delay = tf.constant(self.ema_freeze_delay, dtype=tf.int64)\n\n    self.ema_decay = tf.constant(self.ema_decay, dtype=tf.float32)\n    self.is_estimating_step_count = tf.constant(self.is_estimating_step_count,\n                                                dtype=tf.bool)\n\n    # Calculate the number of channels\n    channel_index = -1 if K.image_data_format() == \"channels_last\" else 1\n    if self.per_channel:\n      input_shape_list = list(input_shape) if isinstance(\n          input_shape, tuple) else input_shape.as_list()\n      num_channels = tf.constant(input_shape_list[channel_index],\n                                 shape=(1), dtype=tf.int64)\n    else:\n      num_channels = tf.constant(1, shape=(1), dtype=tf.int64)\n\n    # Initialize the moving mins and max\n    if self.ema_min is None or self.ema_max is None:\n      self.ema_min = tf.Variable(tf.zeros(num_channels), name=\"ema_min\",\n                                 trainable=False)\n      self.ema_max = tf.Variable(tf.zeros(num_channels), name=\"ema_max\",\n                                 trainable=False)\n\n    # Determine the parameters for the quantizer\n    self.quantizer.bits = self.total_bits\n\n    # Set up the initial integer bits and quantizer params\n    self.quantizer.integer = tf.Variable(tf.zeros(num_channels,\n                                                  dtype=tf.int32),\n                                         name=\"quantizer_integer_bits\",\n                                         trainable=False)\n    integer_bits = _get_integer_bits(min_value=self.ema_min,\n                                     max_value=self.ema_max,\n                                     bits=self.total_bits,\n                                     symmetric=self.symmetric,\n                                     keep_negative=self.keep_negative,\n                                     is_clipping=self.po2_rounding)\n    self.quantizer.integer.assign(integer_bits)\n    self.quantizer.alpha = 1.0  # Setting alpha to 1.0 allows the integer bits\n    # to serve as the scale\n    self.quantizer.symmetric = self.symmetric\n    self.quantization_delay = tf.constant(self.quantization_delay,\n                                          dtype=tf.int64)\n\n  def call(self, inputs, training=False):\n    x = inputs\n    training = training and self.trainable\n    self.will_ema_freeze = self.will_ema_freeze and self.trainable\n\n    # Update the step count if the optimizer step count is unknown\n    self.step.assign_add(K.switch(\n        tf.math.logical_and(self.is_estimating_step_count, training),\n        tf.constant(1, tf.int64), tf.constant(0, tf.int64)))\n\n    # Perform the quantization\n    if training:\n      # Calculate the qnoise, a scalar from 0 to 1 that represents the level of\n      # quantization noise to use. At training start, we want no quantization,\n      # so qnoise_factor = 0.0. After quantization_delay steps, we want normal\n      # quantization, so qnoise_factor = 1.0.\n      qnoise_factor = K.switch(\n          tf.greater_equal(self.step, self.quantization_delay),\n          lambda: tf.constant(1.0), lambda: tf.constant(0.0))\n      self.quantizer.update_qnoise_factor(qnoise_factor)\n      qx = self.quantizer(x)\n\n    else:  # If not training, we always want to use full quantization\n      self.quantizer.update_qnoise_factor(tf.constant(1.0))\n      qx = self.quantizer(x)\n\n    # Calculate the axis along where to find the min and max EMAs\n    len_axis = len(x.shape)\n    if len_axis > 1:\n      if self.per_channel:\n        if K.image_data_format() == \"channels_last\":\n          axis = list(range(len_axis - 1))\n        else:\n          axis = list(range(1, len_axis))\n      else:\n        axis = list(range(len_axis))\n    else:\n      axis = [0]\n\n    # Determine if freezing the EMA\n    is_ema_training = tf.constant(training, dtype=tf.bool)\n    if self.will_ema_freeze:\n      is_ema_training = tf.cond(\n          tf.greater(self.step, self.ema_freeze_delay),\n          lambda: tf.constant(False), lambda: tf.constant(True))\n\n    def update_branch():\n      \"\"\" Update the moving average when is_ema_training is True.\"\"\"\n\n      # Set the qnoise factor to 0 to update the EMA using the unquantized input\n      prev_qnoise_factor = tf.identity(self.quantizer.qnoise_factor)\n      self.quantizer.update_qnoise_factor(tf.constant(0.0))\n\n      # Update the EMA\n      act_x = self.quantizer(x)  # act_x is the input after the activation\n      # function, but before the quantizer. This is\n      # done by using a qnoise_factor of 0\n      new_min = tf.squeeze(K.min(act_x, axis=axis, keepdims=True))\n      K.moving_average_update(self.ema_min, new_min, self.ema_decay)\n      new_max = tf.squeeze(K.max(act_x, axis=axis, keepdims=True))\n      K.moving_average_update(self.ema_max, new_max, self.ema_decay)\n\n      # Reset the qnoise factor to the previous value\n      self.quantizer.update_qnoise_factor(prev_qnoise_factor)\n\n    # Update the moving average when is_ema_training is True\n    tf_utils.smart_cond(\n        is_ema_training, true_fn=update_branch, false_fn=lambda: None)\n\n    # Set the integer bits for the quantizer\n    integer_bits = _get_integer_bits(\n        min_value=self.ema_min,\n        max_value=self.ema_max,\n        bits=self.total_bits,\n        symmetric=self.symmetric,\n        keep_negative=self.keep_negative,\n        is_clipping=self.po2_rounding)\n    self.quantizer.integer.assign(integer_bits)\n\n    return qx\n\n  # Override get_weights since we do not want ema_min or ema_max to be public\n  def get_weights(self):\n    return []\n\n  # Override set_weights since we do not want ema_min or ema_max to be public\n  def set_weights(self, weights):\n    return\n\n  def get_config(self):\n    config = {\n        \"activation\": self.quantizer.__class__.__name__,\n        \"total_bits\": self.total_bits,\n        \"current_step\": self.step.numpy(),\n        \"symmetric\": self.symmetric,\n        \"quantization_delay\": np.array(self.quantization_delay),\n        \"ema_freeze_delay\": np.array(self.ema_freeze_delay),\n        \"ema_decay\": np.array(self.ema_decay),\n        \"per_channel\": self.per_channel,\n        \"po2_rounding\": self.po2_rounding,\n        \"relu_neg_slope\": self.relu_neg_slope\n    }\n    base_config = super(QAdaptiveActivation, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    self.quantizer.integer_bits = np.array(self.quantizer)\n    return str(self.quantizer)\n\n  def compute_output_shape(self, input_shape):\n    return input_shape\n\n  def get_prunable_weights(self):\n    return []\n\n\n#\n# Constraint class to clip weights and bias between -1 and 1 so that:\n#    1. quantization approximation is symmetric (b = 0).\n#    2. max(x) and min(x) are 1 and -1 respectively.\n#\nclass Clip(Constraint):\n  \"\"\"Clips weight constraint.\"\"\"\n\n  # This function was modified from Keras minmaxconstraints.\n  #\n  # Constrains the weights to be between min/max values.\n  #   min_value: the minimum norm for the incoming weights.\n  #   max_value: the maximum norm for the incoming weights.\n  #   constraint: previous constraint to be clipped.\n  #   quantizer: quantizer to be applied to constraint.\n\n  def __init__(self, min_value=0.0, max_value=1.0,\n               constraint=None, quantizer=None):\n    \"\"\"Initializes Clip constraint class.\"\"\"\n\n    self.min_value = min_value\n    self.max_value = max_value\n    self.constraint = constraints.get(constraint)\n    # Don't wrap yourself\n    if isinstance(self.constraint, Clip):\n      self.constraint = None\n    self.quantizer = get_quantizer(quantizer)\n\n  def __call__(self, w):\n    \"\"\"Clips values between min and max values.\"\"\"\n    if self.constraint:\n      w = self.constraint(w)\n      if self.quantizer:\n        w = self.quantizer(w)\n    w = tf.keras.backend.clip(w, self.min_value, self.max_value)\n    return w\n\n  def get_config(self):\n    \"\"\"Returns configuration of constraint class.\"\"\"\n    return {\"min_value\": self.min_value, \"max_value\": self.max_value}\n\n  @classmethod\n  def from_config(cls, config):\n    if isinstance(config.get('constraint', None), Clip):\n      config['constraint'] = None\n    config['constraint'] = constraints.get(config.get('constraint', None))\n    config['quantizer'] = get_quantizer(config.get('quantizer', None))\n    return cls(**config)\n\n#\n# Definition of Quantized NN classes. These classes were copied\n# from the equivalent layers in Keras, and we modified to apply quantization.\n# Similar implementations can be seen in the references.\n#\n\n\nclass QDense(Dense, PrunableLayer):\n  \"\"\"Implements a quantized Dense layer.\"\"\"\n\n  # Most of these parameters follow the implementation of Dense in\n  # Keras, with the exception of kernel_range, bias_range,\n  # kernel_quantizer, bias_quantizer, and kernel_initializer.\n  #\n  # kernel_quantizer: quantizer function/class for kernel\n  # bias_quantizer: quantizer function/class for bias\n  # kernel_range/bias_ranger: for quantizer functions whose values\n  #   can go over [-1,+1], these values are used to set the clipping\n  #   value of kernels and biases, respectively, instead of using the\n  #   constraints specified by the user.\n  #\n  # we refer the reader to the documentation of Dense in Keras for the\n  # other parameters.\n\n  def __init__(self,\n               units,\n               activation=None,\n               use_bias=True,\n               kernel_initializer=\"he_normal\",\n               bias_initializer=\"zeros\",\n               kernel_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               kernel_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               bias_quantizer=None,\n               kernel_range=None,\n               bias_range=None,\n               **kwargs):\n\n    if kernel_range is not None:\n      warnings.warn(\"kernel_range is deprecated in QDense layer.\")\n\n    if bias_range is not None:\n      warnings.warn(\"bias_range is deprecated in QDense layer.\")\n\n    self.kernel_range = kernel_range\n    self.bias_range = bias_range\n\n    self.kernel_quantizer = kernel_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.kernel_quantizer_internal, self.bias_quantizer_internal\n    ]\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        units=units,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        bias_regularizer=bias_regularizer,\n        activity_regularizer=activity_regularizer,\n        kernel_constraint=kernel_constraint,\n        bias_constraint=bias_constraint,\n        **kwargs,\n    )\n\n  def call(self, inputs):\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n    output = tf.keras.backend.dot(inputs, quantized_kernel)\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n      output = tf.keras.backend.bias_add(output, quantized_bias,\n                                         data_format=\"channels_last\")\n    if self.activation is not None:\n      output = self.activation(output)\n    return output\n\n  def compute_output_shape(self, input_shape):\n    assert input_shape and len(input_shape) >= 2\n    assert input_shape[-1]\n    output_shape = list(input_shape)\n    output_shape[-1] = self.units\n    return tuple(output_shape)\n\n  def get_config(self):\n    config = {\n        \"units\": self.units,\n        \"activation\": activations.serialize(\n            self.activation# Google internal code, commented out by copybara\n        ),\n        \"use_bias\": self.use_bias,\n        \"kernel_quantizer\": constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"kernel_initializer\": initializers.serialize(\n            self.kernel_initializer# Google internal code, commented out by copybara\n        ),\n        \"bias_initializer\": initializers.serialize(\n            self.bias_initializer# Google internal code, commented out by copybara\n        ),\n        \"kernel_regularizer\": regularizers.serialize(\n            self.kernel_regularizer# Google internal code, commented out by copybara\n        ),\n        \"bias_regularizer\": regularizers.serialize(\n            self.bias_regularizer# Google internal code, commented out by copybara\n        ),\n        \"activity_regularizer\": regularizers.serialize(\n            self.activity_regularizer# Google internal code, commented out by copybara\n        ),\n        \"kernel_constraint\": constraints.serialize(\n            self.kernel_constraint# Google internal code, commented out by copybara\n        ),\n        \"bias_constraint\": constraints.serialize(\n            self.bias_constraint# Google internal code, commented out by copybara\n        ),\n        \"kernel_range\": self.kernel_range,\n        \"bias_range\": self.bias_range,\n    }\n    base_config = super(QDense, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\":\n            str(self.kernel_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"activation\":\n            str(self.activation),\n        \"units\" : str(self.units)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.kernel]\n\n\ndef get_constraint(identifier, quantizer):\n  \"\"\"Gets the initializer.\n\n  Args:\n    identifier: A constraint, which could be dict, string, or callable function.\n    quantizer: A quantizer class or quantization function\n\n  Returns:\n    A constraint class\n  \"\"\"\n  if identifier:\n    if isinstance(identifier, dict) and identifier['class_name'] == 'Clip':\n      return Clip.from_config(identifier['config'])\n    else:\n      return constraints.get(identifier)\n  else:\n    max_value = max(1, quantizer.max()) if hasattr(quantizer, \"max\") else 1.0\n    return Clip(-max_value, max_value, identifier, quantizer)\n\ndef get_initializer(identifier):\n  \"\"\"Gets the initializer.\n\n  Args:\n    identifier: An initializer, which could be dict, string, or callable function.\n\n  Returns:\n    A initializer class\n\n  Raises:\n    ValueError: An error occurred when quantizer cannot be interpreted.\n  \"\"\"\n  if identifier is None:\n    return None\n  if isinstance(identifier, dict):\n    if identifier['class_name'] == 'QInitializer':\n      return QInitializer.from_config(identifier['config'])\n    else:\n      return initializers.get(identifier)\n  elif isinstance(identifier, six.string_types):\n    return initializers.get(identifier)\n  elif callable(identifier):\n    return identifier\n  else:\n    raise ValueError(\"Could not interpret initializer identifier: \" +\n                     str(identifier))\n"
  },
  {
    "path": "qkeras/qmac.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport tensorflow as tf\nfrom tensorflow.keras import constraints\nfrom .quantizers import get_quantizer\n\nfrom tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer\nfrom .qlayers import get_auto_range_constraint_initializer\n\n\n# QKeras needs to support more layers for matrix multiplication and shift\n# operations such as in Transformer. Such layers should be all placed here.\n\n\nclass QScaleShift(tf.keras.layers.Layer, PrunableLayer):\n  \"\"\"Quantized scale and shift layer.\n\n  output = scale * x + bias where scale and bias are each of shape (1,).\n\n  QScaleShift is similar to the special case in QDepthwiseConv2D\n    where kernel_size=(1,1). However there are several differences:\n  1) There is no concept of padding and striding in QScaleShift since\n    it's not a conv layer;\n  2) QDepthwiseConv2D expected min_ndim=4 for input shape; while QScaleShift\n    input could be any shape;\n  3) In QDepthwiseConv2D each output channel has its own weight value;\n    while QScaleShift share the same weight across the entire input tensor.\n  4) Since it's not a Conv operation, hardware implementation for\n    QScaleShift and QDWConv2D is fundamentally different. Therefore it\n    makes sense to separate them as two different types of layers.\n  \"\"\"\n\n  def __init__(self,\n               weight_quantizer=None,\n               bias_quantizer=None,\n               use_bias=True,\n               activation=None,\n               weight_initializer=\"he_normal\",\n               weight_regularizer=None,\n               bias_initializer=\"zeros\",\n               bias_regularizer=None,\n               **kwargs):\n\n    super().__init__()\n    self.use_bias = use_bias\n    self.weight_regularizer = weight_regularizer\n    self.bias_regularizer = bias_regularizer\n\n    self.weight_quantizer = weight_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.weight_quantizer_internal = get_quantizer(self.weight_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    _, self.weight_initializer = (\n        get_auto_range_constraint_initializer(\n            self.weight_quantizer_internal, None,\n            weight_initializer))\n\n    _, self.bias_initializer = (\n        get_auto_range_constraint_initializer(\n            self.bias_quantizer_internal, None, bias_initializer))\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    if hasattr(self.weight_quantizer_internal, \"_set_trainable_parameter\"):\n      self.weight_quantizer_internal._set_trainable_parameter()\n    if hasattr(self.bias_quantizer_internal, \"_set_trainable_parameter\"):\n      self.bias_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [self.weight_quantizer_internal,\n                       self.bias_quantizer_internal]\n\n    self.activation = get_quantizer(activation)\n\n    super().__init__(**kwargs)\n\n  def build(self, input_shape):\n    self.weight = self.add_weight(\n        name=\"weight\", shape=(1, 1), dtype=\"float32\",\n        initializer=self.weight_initializer,\n        regularizer=self.weight_regularizer, trainable=True)\n\n    if self.use_bias:\n      self.bias = self.add_weight(\n          name=\"bias\", shape=(1, 1), dtype=\"float32\",\n          initializer=self.bias_initializer, regularizer=self.bias_regularizer,\n          trainable=True)\n    else:\n      self.bias = None\n    self.built = True\n\n  def call(self, inputs):\n\n    quantized_weight = (\n        self.weight_quantizer_internal(self.weight) if\n        self.weight_quantizer_internal is not None else self.weight)\n\n    outputs = tf.math.multiply(inputs, quantized_weight)\n\n    if self.use_bias:\n      quantized_bias = (\n          self.bias_quantizer_internal(self.bias) if\n          self.bias_quantizer_internal is not None else self.bias)\n\n      outputs = quantized_bias + outputs\n\n    return self.activation(outputs) if self.activation is not None else outputs\n\n  def get_config(self):\n    config = {\n        \"weight_quantizer\": constraints.serialize(\n            self.weight_quantizer_internal# Google internal code, commented out by copybara\n            ),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n            ),\n        \"weight_initializer\": constraints.serialize(\n            self.weight_initializer# Google internal code, commented out by copybara\n            ),\n        \"bias_initializer\": constraints.serialize(\n            self.bias_initializer# Google internal code, commented out by copybara\n            ),\n        \"activation\": constraints.serialize(\n            self.activation# Google internal code, commented out by copybara\n            ),\n        \"use_bias\": self.use_bias,\n        \"weight_regularizer\": constraints.serialize(\n            self.weight_regularizer# Google internal code, commented out by copybara\n            ),\n        \"bias_regularizer\": constraints.serialize(\n            self.bias_regularizer# Google internal code, commented out by copybara\n            ),\n    }\n    base_config = super().get_config()\n    base_config.update(config)\n    return base_config\n\n  def get_quantization_config(self):\n    return {\n        \"weight_quantizer\":\n            str(self.weight_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"activation\":\n            str(self.activation)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return [self.weight, self.bias]\n"
  },
  {
    "path": "qkeras/qmodel.proto",
    "content": "// Copyright 2019 Google LLC\n//\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n// ==============================================================================\nsyntax = \"proto2\";\n\npackage qkeras;\n\nimport \"google/protobuf/any.proto\";\n\n// Protobuf to represent a quantized machine learning model.\nmessage QModel {\n  // Layers of a quantized model.\n  repeated QLayer qlayers = 1;\n}\n\n// Protobuf to represent an individual layer that supports quantization.\n//\n// TODO(akshayap): Add platform agnostic way of saving weights, ideally\n// something that can mimic numpy arrays.\nmessage QLayer {\n  // Layer name.\n  optional string name = 1;\n  // Input shape for the layer.\n  repeated int32 input_shape = 2 [packed = true];\n  // Output shape for the layer.\n  repeated int32 output_shape = 3 [packed = true];\n  // Quantization configuration for this layer.\n  optional Quantization quantization = 4;\n  // Harware parameters associated with this layer.\n  optional HardwareParams hw_params = 5;\n  // Model specific custom details.\n  optional google.protobuf.Any details = 6;\n}\n\n// Qantization configurations for a model layer.\nmessage Quantization {\n  // Number of bits to perform quantization.\n  optional int32 bits = 1;\n  // Number of bits to the left of the decimal point.\n  optional int32 integer = 2;\n  // The minimum allowed power of two exponent\n  optional int32 min_po2 = 3;\n  // The maximum allowed power of two exponent\n  optional int32 max_po2 = 4;\n}\n\n// Parameters for hardware synthesis of machine learning models.\nmessage HardwareParams {\n  // MAC bitwidth.\n  optional int32 mac_bitwidth = 1;\n}\n"
  },
  {
    "path": "qkeras/qnormalization.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# ==============================================================================\n\"\"\"Definition of normalization quantization package.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nimport six\nimport warnings\n\nimport tensorflow.compat.v2 as tf\n\nfrom tensorflow.keras import constraints\nfrom tensorflow.keras import initializers\nfrom tensorflow.keras import regularizers\nfrom tensorflow.keras.layers import BatchNormalization\nfrom tensorflow.python.framework import ops\nfrom tensorflow.python.framework import smart_cond as tf_utils\nfrom tensorflow.python.ops import array_ops\nfrom tensorflow.python.ops import math_ops\nfrom tensorflow.python.ops import nn\nfrom .qlayers import Clip\nfrom .qlayers import get_auto_range_constraint_initializer\nfrom .qlayers import get_quantizer\nfrom .quantizers import quantized_relu_po2\nfrom .quantizers import quantized_po2\nfrom .safe_eval import safe_eval\nfrom tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer\n\n\nclass QBatchNormalization(BatchNormalization, PrunableLayer):\n  \"\"\"Quantized Batch Normalization layer.\n  For training, mean and variance are not quantized.\n  For inference, the quantized moving mean and moving variance are used.\n\n  output = (x - mean) / sqrt(var + epsilon) * quantized_gamma + quantized_beta\n\n  \"\"\"\n\n  def __init__(\n      self,\n      axis=-1,\n      momentum=0.99,\n      epsilon=1e-3,\n      center=True,\n      scale=True,\n      activation=None,\n      beta_initializer='zeros',\n      gamma_initializer='ones',\n      moving_mean_initializer='zeros',\n      moving_variance_initializer='ones',\n      beta_regularizer=None,\n      gamma_regularizer=None,\n      beta_quantizer='quantized_po2(5)',\n      gamma_quantizer='quantized_relu_po2(6, 2048)',\n      mean_quantizer='quantized_po2(5)',\n      variance_quantizer='quantized_relu_po2(6, quadratic_approximation=True)',\n      inverse_quantizer=None,\n      gamma_constraint=None,\n      beta_constraint=None,\n      # use quantized_po2 and enforce quadratic approximation\n      # to get an even exponent for sqrt\n      beta_range=None,\n      gamma_range=None,\n      **kwargs):\n\n    if gamma_range is not None:\n      warnings.warn('gamma_range is deprecated in QBatchNormalization layer.')\n\n    if beta_range is not None:\n      warnings.warn('beta_range is deprecated in QBatchNormalization layer.')\n\n    self.gamma_range = gamma_range\n    self.beta_range = beta_range\n    self.activation = activation\n\n    self.beta_quantizer = beta_quantizer\n    self.gamma_quantizer = gamma_quantizer\n    self.mean_quantizer = mean_quantizer\n    self.variance_quantizer = variance_quantizer\n    self.inverse_quantizer = inverse_quantizer\n\n    if self.inverse_quantizer is not None:\n      assert self.variance_quantizer is None and self.gamma_quantizer is None, (\n          'If using the inverse quantizer, the gamma and variance quantizers '\n          'should not be used in order to avoid quantizing a value twice.')\n\n    self.beta_quantizer_internal = get_quantizer(self.beta_quantizer)\n    self.gamma_quantizer_internal = get_quantizer(self.gamma_quantizer)\n    self.mean_quantizer_internal = get_quantizer(self.mean_quantizer)\n    self.variance_quantizer_internal = get_quantizer(self.variance_quantizer)\n    self.inverse_quantizer_internal = get_quantizer(self.inverse_quantizer)\n\n    if hasattr(self.gamma_quantizer_internal, '_set_trainable_parameter'):\n      self.gamma_quantizer_internal._set_trainable_parameter()\n    if hasattr(self.variance_quantizer_internal, '_set_trainable_parameter'):\n      self.variance_quantizer_internal._set_trainable_parameter()\n\n    self.quantizers = [\n        self.gamma_quantizer_internal,\n        self.beta_quantizer_internal,\n        self.mean_quantizer_internal,\n        self.variance_quantizer_internal,\n        self.inverse_quantizer_internal\n    ]\n\n    if scale and self.gamma_quantizer:\n      gamma_constraint, gamma_initializer = (\n          get_auto_range_constraint_initializer(\n              self.gamma_quantizer_internal,\n              gamma_constraint,\n              gamma_initializer)\n      )\n\n    if center and self.beta_quantizer:\n      beta_constraint, beta_initializer = (\n          get_auto_range_constraint_initializer(\n              self.beta_quantizer_internal,\n              beta_constraint,\n              beta_initializer)\n      )\n\n    if kwargs.get('fused', None):\n      warnings.warn('batch normalization fused is disabled '\n                    'in qkeras qnormalization.py.')\n      del kwargs['fused']\n\n    if kwargs.get('renorm', None):\n      warnings.warn('batch normalization renorm is disabled '\n                    'in qkeras qnormalization.py.')\n      del kwargs['renorm']\n\n    if kwargs.get('virtual_batch_size', None):\n      warnings.warn('batch normalization virtual_batch_size is disabled '\n                    'in qkeras qnormalization.py.')\n      del kwargs['virtual_batch_size']\n\n    if kwargs.get('adjustment', None):\n      warnings.warn('batch normalization adjustment is disabled '\n                    'in qkeras qnormalization.py.')\n      del kwargs['adjustment']\n\n    super().__init__(\n        axis=axis,\n        momentum=momentum,\n        epsilon=epsilon,\n        center=center,\n        scale=scale,\n        beta_initializer=beta_initializer,\n        gamma_initializer=gamma_initializer,\n        moving_mean_initializer=moving_mean_initializer,\n        moving_variance_initializer=moving_variance_initializer,\n        beta_regularizer=beta_regularizer,\n        gamma_regularizer=gamma_regularizer,\n        beta_constraint=beta_constraint,\n        gamma_constraint=gamma_constraint,\n        fused=False,\n        renorm=False,\n        virtual_batch_size=None,\n        adjustment=None,\n        **kwargs\n    )\n\n  def call(self, inputs, training=None):\n    if self.scale and self.gamma_quantizer:\n      quantized_gamma = self.gamma_quantizer_internal(self.gamma)\n    else:\n      quantized_gamma = self.gamma\n\n    if self.center and self.beta_quantizer:\n      quantized_beta = self.beta_quantizer_internal(self.beta)\n    else:\n      quantized_beta = self.beta\n\n    if self.mean_quantizer:\n      quantized_moving_mean = self.mean_quantizer_internal(self.moving_mean)\n    else:\n      quantized_moving_mean = self.moving_mean\n\n    if self.variance_quantizer:\n      quantized_moving_variance = self.variance_quantizer_internal(\n          self.moving_variance)\n    else:\n      quantized_moving_variance = self.moving_variance\n\n    training = self._get_training_value(training)\n\n    # Compute the axes along which to reduce the mean / variance\n    input_shape = inputs.shape\n    ndims = len(input_shape)\n    reduction_axes = [i for i in range(ndims) if i not in self.axis]\n\n    # Broadcasting only necessary for single-axis batch norm where the axis is\n    # not the last dimension\n    broadcast_shape = [1] * ndims\n    broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value\n    def _broadcast(v):\n      if (v is not None and len(v.shape) != ndims and\n          reduction_axes != list(range(ndims - 1))):\n        return array_ops.reshape(v, broadcast_shape)\n      return v\n\n    scale, offset = _broadcast(quantized_gamma), _broadcast(quantized_beta)\n\n    # Determine a boolean value for `training`: could be True, False, or None.\n    training_value = tf_utils.smart_constant_value(training)\n    if training_value == False:  # pylint: disable=singleton-comparison,g-explicit-bool-comparison\n      quantized_mean, quantized_variance = (quantized_moving_mean,\n                                            quantized_moving_variance)\n    else:\n      # Some of the computations here are not necessary when training==False\n      # but not a constant. However, this makes the code simpler.\n      keep_dims = len(self.axis) > 1\n      mean, variance = self._moments(\n          math_ops.cast(inputs, self._param_dtype),\n          reduction_axes,\n          keep_dims=keep_dims)\n\n      moving_mean = self.moving_mean\n      moving_variance = self.moving_variance\n\n      mean = tf_utils.smart_cond(\n          training, lambda: mean, lambda: ops.convert_to_tensor(moving_mean))\n      variance = tf_utils.smart_cond(\n          training,\n          lambda: variance,\n          lambda: ops.convert_to_tensor(moving_variance))\n\n      new_mean, new_variance = mean, variance\n\n      if self.mean_quantizer:\n        quantized_mean = self.mean_quantizer_internal(mean)\n      else:\n        quantized_mean = mean\n\n      if self.variance_quantizer:\n        quantized_variance = self.variance_quantizer_internal(variance)\n      else:\n        quantized_variance = variance\n\n      if self._support_zero_size_input():\n        inputs_size = array_ops.size(inputs)\n      else:\n        inputs_size = None\n\n      def _do_update(var, value):\n        \"\"\"Compute the updates for mean and variance.\"\"\"\n        return self._assign_moving_average(var, value, self.momentum,\n                                           inputs_size)\n\n      def mean_update():\n        true_branch = lambda: _do_update(self.moving_mean, new_mean)\n        false_branch = lambda: self.moving_mean\n        return tf_utils.smart_cond(training, true_branch, false_branch)\n\n      def variance_update():\n        \"\"\"Update the moving variance.\"\"\"\n        true_branch = lambda: _do_update(self.moving_variance, new_variance)\n        false_branch = lambda: self.moving_variance\n        return tf_utils.smart_cond(training, true_branch, false_branch)\n\n      self.add_update(mean_update)\n      self.add_update(variance_update)\n\n    quantized_mean = _broadcast(math_ops.cast(quantized_mean, inputs.dtype))\n    quantized_variance = _broadcast(\n        math_ops.cast(quantized_variance, inputs.dtype))\n    if offset is not None:\n      offset = math_ops.cast(offset, inputs.dtype)\n    if scale is not None:\n      scale = math_ops.cast(scale, inputs.dtype)\n\n    # Calculate and quantize the inverse\n    inv = math_ops.rsqrt(quantized_variance + self.epsilon)\n    if scale is not None:\n      inv *= scale\n    if self.inverse_quantizer_internal is not None:\n      inv = self.inverse_quantizer_internal(inv)\n\n    # Calculate the forward pass of the BN\n    outputs = inputs * math_ops.cast(inv, inputs.dtype) + math_ops.cast(\n        offset - quantized_mean * inv\n        if offset is not None else -quantized_mean * inv, inputs.dtype)\n\n    # If some components of the shape got lost due to adjustments, fix that.\n    outputs.set_shape(input_shape)\n\n    return outputs\n\n  def get_config(self):\n    config = {\n        'axis': self.axis,\n        'momentum': self.momentum,\n        'epsilon': self.epsilon,\n        'center': self.center,\n        'scale': self.scale,\n        'beta_quantizer': constraints.serialize(\n            self.beta_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'gamma_quantizer': constraints.serialize(\n            self.gamma_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'mean_quantizer': constraints.serialize(\n            self.mean_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'variance_quantizer': constraints.serialize(\n            self.variance_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'beta_initializer': initializers.serialize(\n            self.beta_initializer# Google internal code, commented out by copybara\n        ),\n        'gamma_initializer': initializers.serialize(\n            self.gamma_initializer# Google internal code, commented out by copybara\n        ),\n        'moving_mean_initializer': initializers.serialize(\n            self.moving_mean_initializer# Google internal code, commented out by copybara\n        ),\n        'moving_variance_initializer': initializers.serialize(\n            self.moving_variance_initializer# Google internal code, commented out by copybara\n        ),\n        'inverse_quantizer': initializers.serialize(\n            self.inverse_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'beta_regularizer': regularizers.serialize(\n            self.beta_regularizer# Google internal code, commented out by copybara\n        ),\n        'gamma_regularizer': regularizers.serialize(\n            self.gamma_regularizer# Google internal code, commented out by copybara\n        ),\n        'beta_constraint': constraints.serialize(\n            self.beta_constraint# Google internal code, commented out by copybara\n        ),\n        'gamma_constraint': constraints.serialize(\n            self.gamma_constraint# Google internal code, commented out by copybara\n        ),\n        'beta_range': self.beta_range,\n        'gamma_range': self.gamma_range,\n    }\n    base_config = super().get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def compute_output_shape(self, input_shape):\n    return input_shape\n\n  def get_quantizers(self):\n    return self.quantizers\n\n  def get_prunable_weights(self):\n    return []\n"
  },
  {
    "path": "qkeras/qoctave.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Octave Convolution.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport re\n\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Add\nfrom tensorflow.keras.layers import AveragePooling2D\nfrom tensorflow.keras.layers import Conv2D\nfrom tensorflow.keras.layers import SeparableConv2D\nfrom tensorflow.keras.layers import UpSampling2D\nfrom .qlayers import QActivation\nfrom .qconvolutional import QConv2D\nfrom .qconvolutional import QSeparableConv2D\nfrom .qpooling import QAveragePooling2D\n\n\ndef GetActivationSuffix(activation):\n  \"\"\"Returns suffix for layer name to facilitate debugging.\"\"\"\n  if not activation:\n    return \"linear\"\n\n  if \"po2\" in activation:\n    return \"q2\"\n  elif \"quantized_relu\" in activation:\n    suffix = \"qr\"\n  elif \"quantized_tanh\" in activation:\n    suffix = \"qt\"\n  else:\n    suffix = \"qb\"\n\n  numbers = re.findall(r\"[0-9]+\", activation)\n\n  numbers = [n + \"_\" if len(n) > 1 else n for n in numbers]\n\n  return suffix + \"\".join(numbers)\n\n\ndef QOctaveConv2D(\n    filters,\n    kernel_size,\n    alpha,\n    strides=(1, 1),\n    padding=\"valid\",\n    kernel_initializer=\"he_normal\",\n    bias_initializer=\"zeros\",\n    # NOTE: kernel_regularizer not used with separable convolution\n    kernel_regularizer=None,\n    bias_regularizer=None,\n    kernel_constraint=None,\n    bias_constraint=None,\n    use_separable=True,\n    name=\"\",\n    **kwargs):\n  \"\"\"Implements quantized QOctaveConv2D.\"\"\"\n\n  def _QOctaveConv2DInternal(x):\n    \"\"\"Computes QOctaveConv2D on a tensor.\"\"\"\n\n    x_h, x_l = x\n\n    bias_quantizer = kwargs.get(\"bias_quantizer\", None)\n    kernel_quantizer = kwargs.get(\"kernel_quantizer\", None)\n    depthwise_quantizer = kwargs.get(\"depthwise_quantizer\", None)\n    pointwise_quantizer = kwargs.get(\"pointwise_quantizer\", None)\n    acc_quantizer = kwargs.get(\"acc_quantizer\", None)\n    pooling_quantizer = kwargs.get(\"pooling_quantizer\", None)\n    depthwise_activation = kwargs.get(\"depthwise_activation\", None)\n    activation = kwargs.get(\"activation\", None)\n\n    bias_range = kwargs.get(\"bias_range\", 1.0)\n    kernel_range = kwargs.get(\"kernel_range\", 1.0)\n    depthwise_range = kwargs.get(\"depthwise_range\", 1.0)\n    pointwise_range = kwargs.get(\"pointwise_range\", 1.0)\n\n    if activation:\n      act_suffix = \"_\" + GetActivationSuffix(activation)\n    acc_suffix = \"_\" + GetActivationSuffix(acc_quantizer)\n\n    if alpha == -1.0:\n      if use_separable:\n        x_h = QSeparableConv2D(\n            filters, kernel_size, strides=strides, padding=padding,\n            depthwise_regularizer=kernel_regularizer,\n            depthwise_constraint=kernel_constraint,\n            depthwise_initializer=kernel_initializer,\n            pointwise_regularizer=kernel_regularizer,\n            pointwise_constraint=kernel_constraint,\n            pointwise_initializer=kernel_initializer,\n            bias_regularizer=bias_regularizer,\n            bias_constraint=bias_constraint,\n            bias_initializer=bias_initializer,\n            depthwise_quantizer=depthwise_quantizer,\n            pointwise_quantizer=pointwise_quantizer,\n            bias_quantizer=bias_quantizer,\n            depthwise_activation=depthwise_activation,\n            pointwise_range=pointwise_range,\n            depthwise_range=depthwise_range,\n            bias_range=bias_range,\n            name=name + \"_c_h_to_h\")(x_h)\n      else:\n        x_h = QConv2D(\n            filters, kernel_size, strides=strides, padding=padding,\n            kernel_regularizer=kernel_regularizer,\n            kernel_constraint=kernel_constraint,\n            kernel_initializer=kernel_initializer,\n            bias_regularizer=bias_regularizer,\n            bias_constraint=bias_constraint,\n            bias_initializer=bias_initializer,\n            kernel_quantizer=kernel_quantizer,\n            bias_quantizer=bias_quantizer,\n            kernel_range=kernel_range,\n            bias_range=bias_range,\n            name=name + \"_c_h_to_h\")(x_h)\n\n      if activation:\n        x_h = QActivation(\n            activation, name=name + \"_c_h_to_h_act\" + act_suffix)(\n                x_h)\n\n      return [x_h, None]\n\n    co_h = int(filters * (1 - alpha))\n    co_l = filters - co_h\n\n    x_h_to_h = None\n    x_h_to_l = None\n    x_l_to_l = None\n    x_l_to_h = None\n\n    if co_h > 0:\n      if x_h is not None:\n        if use_separable:\n          x_h_to_h = QSeparableConv2D(\n              co_h, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              depthwise_quantizer=depthwise_quantizer,\n              pointwise_quantizer=pointwise_quantizer,\n              bias_quantizer=bias_quantizer,\n              depthwise_activation=depthwise_activation,\n              pointwise_range=pointwise_range,\n              depthwise_range=depthwise_range,\n              bias_range=bias_range,\n              name=name + \"_c_h_to_h\")(x_h)\n        else:\n          x_h_to_h = QConv2D(\n              co_h, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              kernel_quantizer=kernel_quantizer,\n              bias_quantizer=bias_quantizer,\n              kernel_range=kernel_range,\n              bias_range=bias_range,\n              name=name + \"_c_h_to_h\")(x_h)\n\n        if acc_quantizer:\n          x_h_to_h = QActivation(\n              acc_quantizer,\n              name=name + \"_c_h_to_h_act\" + acc_suffix)(x_h_to_h)\n\n    if co_l > 0:\n      if x_h is not None:\n        x_h_to_l = QAveragePooling2D(\n            pool_size=2, strides=2,\n            quantizer=pooling_quantizer,\n            name=name + \"_avg_h_to_l\")(x_h)\n\n        if use_separable:\n          x_h_to_l = QSeparableConv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              depthwise_quantizer=depthwise_quantizer,\n              pointwise_quantizer=pointwise_quantizer,\n              bias_quantizer=bias_quantizer,\n              depthwise_activation=depthwise_activation,\n              pointwise_range=pointwise_range,\n              depthwise_range=depthwise_range,\n              bias_range=bias_range,\n              name=name + \"_c_h_to_l\")(x_h_to_l)\n        else:\n          x_h_to_l = QConv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              kernel_quantizer=kernel_quantizer,\n              bias_quantizer=bias_quantizer,\n              kernel_range=kernel_range,\n              bias_range=bias_range,\n              name=name + \"_c_h_to_l\")(x_h_to_l)\n\n        if acc_quantizer:\n          x_h_to_l = QActivation(\n              acc_quantizer,\n              name=name + \"_c_h_to_l_act\" + acc_suffix)(x_h_to_l)\n\n    if co_h > 0:\n      if x_l is not None:\n        _, height, width, _ = x_l.shape.as_list()\n        if height == 1 and width == 1:\n          local_kernel = 1\n          local_strides = 1\n          local_padding = \"same\"\n          upsampling = False\n        else:\n          local_kernel = kernel_size\n          local_strides = strides\n          local_padding = padding\n          upsampling = True\n\n        if use_separable and upsampling:\n          x_l_to_h = QSeparableConv2D(\n              co_h, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              depthwise_quantizer=depthwise_quantizer,\n              pointwise_quantizer=pointwise_quantizer,\n              bias_quantizer=bias_quantizer,\n              depthwise_activation=depthwise_activation,\n              pointwise_range=pointwise_range,\n              depthwise_range=depthwise_range,\n              bias_range=bias_range,\n              name=name + \"_c_l_to_h\")(x_l)\n        else:\n          x_l_to_h = QConv2D(\n              co_h, local_kernel, strides=local_strides, padding=local_padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              kernel_quantizer=kernel_quantizer,\n              bias_quantizer=bias_quantizer,\n              kernel_range=kernel_range,\n              bias_range=bias_range,\n              name=name + \"_c_l_to_h\")(x_l)\n\n        if acc_quantizer:\n          x_l_to_h = QActivation(\n              acc_quantizer,\n              name=name + \"_c_l_to_h_act\" + acc_suffix)(x_l_to_h)\n\n        if upsampling:\n          x_l_to_h = UpSampling2D(\n              size=(2, 2), name=name + \"_u_l_to_h\")(x_l_to_h)\n\n    if co_l > 0:\n      if x_l is not None:\n        if use_separable:\n          x_l_to_l = QSeparableConv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              depthwise_quantizer=depthwise_quantizer,\n              pointwise_quantizer=depthwise_quantizer,\n              bias_quantizer=bias_quantizer,\n              depthwise_activation=depthwise_activation,\n              pointwise_range=pointwise_range,\n              depthwise_range=depthwise_range,\n              bias_range=bias_range,\n              name=name + \"_c_l_to_l\")(x_l)\n        else:\n          x_l_to_l = QConv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              kernel_quantizer=kernel_quantizer,\n              bias_quantizer=bias_quantizer,\n              kernel_range=kernel_range,\n              bias_range=bias_range,\n              name=name + \"_c_l_to_l\")(x_l)\n\n        if acc_quantizer:\n          x_l_to_l = QActivation(\n              acc_quantizer, name=name + \"_c_l_to_l_act\" + acc_suffix)(\n                  x_l_to_l)\n\n    if x_h_to_h is not None and x_l_to_h is not None:\n      x_h = Add(name=name + \"_a_h\")([x_h_to_h, x_l_to_h])\n    elif x_h_to_h is not None:\n      x_h = x_h_to_h\n    elif x_l_to_h is not None:\n      x_h = x_l_to_h\n    else:\n      x_h = None\n\n    if x_l_to_l is not None and x_h_to_l is not None:\n      x_l = Add(name=name + \"_a_l\")([x_l_to_l, x_h_to_l])\n    elif x_l_to_l is not None:\n      x_l = x_l_to_l\n    elif x_h_to_l is not None:\n      x_l = x_h_to_l\n    else:\n      x_l = None\n\n    if x_h is not None and activation is not None:\n      x_h = QActivation(activation,\n                        name=name + \"_h_act\" + act_suffix)(x_h)\n\n    if x_l is not None and activation is not None:\n      x_l = QActivation(activation,\n                        name=name + \"_l_act\" + act_suffix)(x_l)\n\n    return [x_h, x_l]\n\n  return _QOctaveConv2DInternal\n\n\ndef OctaveConv2D(\n    filters, kernel_size, alpha,\n    strides=(1, 1), padding=\"valid\",\n    kernel_initializer=\"he_normal\",\n    bias_initializer=\"zeros\",\n    kernel_regularizer=None,\n    bias_regularizer=None,\n    kernel_constraint=None,\n    bias_constraint=None,\n    activation=None,\n    use_separable=True,\n    name=\"\",\n    **kwargs):\n\n  \"\"\"Implements OctaveConv2D.\"\"\"\n\n  def _OctaveConv2DInternal(x):\n\n    \"\"\"Computes octave on tensor.\"\"\"\n\n    acc_quantizer = kwargs.get(\"acc_quantizer\", None)\n\n    x_h, x_l = x\n\n    if alpha == -1.0:\n      if use_separable:\n        x_h = SeparableConv2D(\n            filters, kernel_size, strides=strides, padding=padding,\n            depthwise_regularizer=kernel_regularizer,\n            depthwise_constraint=kernel_constraint,\n            depthwise_initializer=kernel_initializer,\n            pointwise_regularizer=kernel_regularizer,\n            pointwise_constraint=kernel_constraint,\n            pointwise_initializer=kernel_initializer,\n            bias_regularizer=bias_regularizer,\n            bias_constraint=bias_constraint,\n            bias_initializer=bias_initializer,\n            name=name + \"_c_h_to_h\")(x_h)\n      else:\n        x_h = Conv2D(\n            filters, kernel_size, strides=strides, padding=padding,\n            kernel_regularizer=kernel_regularizer,\n            kernel_constraint=kernel_constraint,\n            kernel_initializer=kernel_initializer,\n            bias_regularizer=bias_regularizer,\n            bias_constraint=bias_constraint,\n            bias_initializer=bias_initializer,\n            name=name+\"_c_h_to_h\")(x_h)\n\n      if activation:\n        x_h = Activation(activation, name=name + \"_c_h_to_h_act\")(x_h)\n\n      return [x_h, None]\n\n    co_h = int(filters * (1 - alpha))\n    co_l = filters - co_h\n\n    x_h_to_h = None\n    x_h_to_l = None\n    x_l_to_l = None\n    x_l_to_h = None\n\n    if co_h > 0:\n      if x_h is not None:\n        if use_separable:\n          x_h_to_h = SeparableConv2D(\n              co_h, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_h_to_h\")(x_h)\n        else:\n          x_h_to_h = Conv2D(\n              co_h, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_h_to_h\")(x_h)\n\n        if activation:\n          x_h_to_h = Activation(\n              acc_quantizer, name=name + \"_c_h_to_h_act\")(x_h_to_h)\n\n    if co_l > 0:\n      if x_h is not None:\n        x_h_to_l = AveragePooling2D(pool_size=2, strides=2,\n                                    name=name + \"_p_h_to_l\")(x_h)\n\n        if use_separable:\n          x_h_to_l = SeparableConv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_h_to_l\")(x_h_to_l)\n        else:\n          x_h_to_l = Conv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_h_to_l\")(x_h_to_l)\n\n        if activation:\n          x_h_to_l = Activation(\n              acc_quantizer, name=name + \"_c_h_to_l_act\")(x_h_to_l)\n\n    if co_h > 0:\n      if x_l is not None:\n        _, height, width, _ = x_l.shape.as_list()\n        if height == 1 and width == 1:\n          local_kernel = 1\n          local_strides = 1\n          local_padding = \"same\"\n          upsampling = False\n        else:\n          local_kernel = kernel_size\n          local_strides = strides\n          local_padding = padding\n          upsampling = True\n\n        if use_separable and upsampling:\n          x_l_to_h = SeparableConv2D(\n              co_h, kernel_size, strides=strides, padding=padding,\n              depthwise_regularizer=kernel_regularizer,\n              depthwise_constraint=kernel_constraint,\n              depthwise_initializer=kernel_initializer,\n              pointwise_regularizer=kernel_regularizer,\n              pointwise_constraint=kernel_constraint,\n              pointwise_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_l_to_h\")(x_l)\n        else:\n          x_l_to_h = Conv2D(\n              co_h, local_kernel, strides=local_strides, padding=local_padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_l_to_h\")(x_l)\n\n        if activation:\n          x_l_to_h = Activation(\n              acc_quantizer, name=name + \"_c_l_to_h_act\")(x_l_to_h)\n\n        if upsampling:\n          x_l_to_h = UpSampling2D(\n              size=(2, 2), name=name + \"_u_l_to_h\")(x_l_to_h)\n\n    if co_l > 0:\n      if x_l is not None:\n        if use_separable:\n          x_l_to_l = SeparableConv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_l_to_l\")(x_l)\n        else:\n          x_l_to_l = Conv2D(\n              co_l, kernel_size, strides=strides, padding=padding,\n              kernel_regularizer=kernel_regularizer,\n              kernel_constraint=kernel_constraint,\n              kernel_initializer=kernel_initializer,\n              bias_regularizer=bias_regularizer,\n              bias_constraint=bias_constraint,\n              bias_initializer=bias_initializer,\n              name=name + \"_c_l_to_l\")(x_l)\n\n        if activation:\n          x_l_to_l = Activation(\n              acc_quantizer, name=name + \"_c_l_to_l_act\")(x_l_to_l)\n\n    if x_h_to_h is not None and x_l_to_h is not None:\n      x_h = Add(name=name + \"_a_h\")([x_h_to_h, x_l_to_h])\n    elif x_h_to_h is not None:\n      x_h = x_h_to_h\n    elif x_l_to_h is not None:\n      x_h = x_l_to_h\n    else:\n      x_h = None\n\n    if x_l_to_l is not None and x_h_to_l is not None:\n      x_l = Add(name=name + \"_a_l\")([x_l_to_l, x_h_to_l])\n    elif x_l_to_l is not None:\n      x_l = x_l_to_l\n    elif x_h_to_l is not None:\n      x_l = x_h_to_l\n    else:\n      x_l = None\n\n    if x_h is not None:\n      x_h = Activation(activation, name=name + \"_h_act\")(x_h)\n\n    if x_l is not None:\n      x_l = Activation(activation, name=name + \"_l_act\")(x_l)\n\n    return (x_h, x_l)\n\n  return _OctaveConv2DInternal\n"
  },
  {
    "path": "qkeras/qpooling.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nimport numpy as np\nfrom tensorflow.keras import constraints\n\nimport tensorflow as tf\nimport tensorflow.keras.backend as K\n\nfrom tensorflow.keras.layers import AveragePooling2D\nfrom tensorflow.keras.layers import GlobalAveragePooling2D\nfrom .qlayers import QActivation\nfrom .quantizers import get_quantizer\n\n\nclass QAveragePooling2D(AveragePooling2D):\n  \"\"\"Computes the quantized version of AveragePooling2D.\"\"\"\n\n  def __init__(self, pool_size=(2, 2),\n               strides=None,\n               padding=\"valid\",\n               data_format=None,\n               average_quantizer=None,\n               activation=None,\n               **kwargs):\n\n    self.average_quantizer = average_quantizer\n    self.average_quantizer_internal = get_quantizer(self.average_quantizer)\n    self.quantizers = [self.average_quantizer_internal]\n\n    if activation is not None:\n      self.activation = get_quantizer(activation)\n    else:\n      self.activation = activation\n\n    super().__init__(\n        pool_size=pool_size,\n        strides=strides,\n        padding=padding,\n        data_format=data_format,\n        **kwargs\n    )\n\n  def call(self, inputs):\n    \"\"\"Performs quantized AveragePooling followed by QActivation.\n\n    Since there is no specific parameter for averaging op, we couldn't apply\n    averaging quantizer to the averaging op. We have two options:\n    1. we perform our own average as sum first then multiply with the\n       inversion\n       of the division factor: sum(x) * quantize(1/pool_area)\n    2. first, we call keras version of averaging first: y1 = keras_average(x)\n       then multiply it with pool_size^2: y2 = y1 * pool_area\n       Last, y3 = y2 * quantize(1/ pool_area)\n    3. Improved based on #2, but multiply x with pool_area before averaging\n       so that we don't lose precision during averaging. The order now becomes:\n       first, multiply x with pool_area: y1 = x * pool_area\n       then we call keras version of averaging: y2 = keras_average(y1)\n       Last, y3 = y2 * quantize(1/ pool_area)\n    4. Since there is sum_pooling operation, another solution is to use\n       depthwise_conv2d with kernel weights = 1 to get the pooling sum. In this\n       case we don't lose precision due to averaging. However, this solution\n       will introduce extra weights to the layer, which might break our code\n       elsewhere.\n\n    Since we need to match software and hardware inference numerics, we are now\n    using #3 in the implementation.\n    \"\"\"\n\n    if self.average_quantizer:\n      # Calculates the pool area\n      if isinstance(self.pool_size, int):\n        pool_area = self.pool_size * self.pool_size\n      else:\n        pool_area = np.prod(self.pool_size)\n\n      # Calculates the pooling average of x*pool_area\n      x = super(QAveragePooling2D, self).call(inputs*pool_area)\n\n      # Quantizes the multiplication factor.\n      mult_factor = 1.0 / pool_area\n      q_mult_factor = self.average_quantizer_internal(mult_factor)\n      q_mult_factor = K.cast_to_floatx(q_mult_factor)\n\n      # Computes pooling average.\n      x = x * q_mult_factor\n\n    else:\n      # Since no quantizer is available, we directly call the keras layer\n      x = super(QAveragePooling2D, self).call(inputs)\n\n    if self.activation is not None:\n      return self.activation(x)\n    return x\n\n  def get_config(self):\n    config = {\n        \"average_quantizer\": constraints.serialize(\n            self.average_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"activation\": constraints.serialize(\n            self.activation# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QAveragePooling2D, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"average_quantizer\":\n            str(self.average_quantizer_internal),\n        \"activation\":\n            str(self.activation)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n\n\nclass QGlobalAveragePooling2D(GlobalAveragePooling2D):\n  \"\"\"Computes the quantized version of GlobalAveragePooling2D.\"\"\"\n\n  def __init__(self, data_format=None,\n               average_quantizer=None,\n               activation=None,\n               **kwargs):\n\n    self.average_quantizer = average_quantizer\n    self.average_quantizer_internal = get_quantizer(self.average_quantizer)\n    self.quantizers = [self.average_quantizer_internal]\n\n    if activation is not None:\n      self.activation = get_quantizer(activation)\n    else:\n      self.activation = activation\n\n    super().__init__(data_format=data_format, **kwargs)\n\n  def compute_pooling_area(self, input_shape):\n    if not isinstance(input_shape, tuple):\n      input_shape = input_shape.as_list()\n    if self.data_format == \"channels_last\":\n      return input_shape[1] * input_shape[2]\n    else:\n      return input_shape[2] * input_shape[3]\n\n  def call(self, inputs):\n    \"\"\"Performs quantized GlobalAveragePooling followed by QActivation.\n\n    Since there is no specific parameter for averaging op, we couldn't apply\n    averaging quantizer to the averaging op. We have two options:\n    1. we perform our own average as sum first then multiply with the\n       inversion\n       of the division factor: sum(x) * quantize(1/pool_area)\n    2. first, we call keras version of averaging first:\n       y1 = keras_global_average(x)\n       then multiply it with the denominator(pool_area) used by averaging:\n       y2 = y1 * pool_area\n       Last, y3 = y2 * quantize(1/ pool_area)\n    3. we perform pooling sum, and then multiply the sum with the quantized\n       inverse multiplication factor to get the average value.\n\n    Our previous implementation uses option #2. Yet we observed minor numerical\n    mismatch between software and hardware inference. Therefore we use #3 as\n    the current implementation.\n    \"\"\"\n\n    if self.average_quantizer:\n      # Calculates pooling sum.\n      if self.data_format == \"channels_last\":\n        x = K.sum(inputs, axis=[1, 2], keepdims=self.keepdims)\n      else:\n        x = K.sum(inputs, axis=[2, 3], keepdims=self.keepdims)\n\n      # Calculates the pooling area\n      pool_area = self.compute_pooling_area(input_shape=inputs.shape)\n\n      # Quantizes the inverse multiplication factor\n      mult_factor = 1.0 / pool_area\n      q_mult_factor = self.average_quantizer_internal(mult_factor)\n\n      # Derives average pooling value from pooling sum.\n      x = x * q_mult_factor\n\n    else:\n      # If quantizer is not available, calls the keras layer.\n      x = super(QGlobalAveragePooling2D, self).call(inputs)\n\n    if self.activation is not None:\n      return self.activation(x)\n    return x\n\n  def get_config(self):\n    config = {\n        \"average_quantizer\": constraints.serialize(\n            self.average_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        \"activation\": constraints.serialize(\n            self.activation# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QGlobalAveragePooling2D, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"average_quantizer\":\n            str(self.average_quantizer_internal),\n        \"activation\":\n            str(self.activation)\n    }\n\n  def get_quantizers(self):\n    return self.quantizers\n"
  },
  {
    "path": "qkeras/qrecurrent.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\" Quantized Recurrent layers. \"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport warnings\nimport tensorflow as tf\nfrom tensorflow.keras import activations\nfrom tensorflow.keras import constraints\nfrom tensorflow.keras import initializers\nfrom tensorflow.keras import regularizers\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import SimpleRNNCell\nfrom tensorflow.keras.layers import LSTMCell\nfrom tensorflow.keras.layers import GRUCell\nfrom tensorflow.keras.layers import RNN\nfrom tensorflow.keras.layers import Bidirectional\nfrom tensorflow.python.util import nest\nfrom tensorflow.python.ops import array_ops\n# from tensorflow.python.ops import array_ops\nfrom tensorflow.python.framework import ops\nfrom tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer\n\nimport tensorflow.keras.backend as K\nfrom .qlayers import get_auto_range_constraint_initializer\nfrom .qlayers import QActivation\nfrom .quantizers import get_quantized_initializer\nfrom .quantizers import get_quantizer\n\n\nclass QSimpleRNNCell(SimpleRNNCell):\n  \"\"\"\n  Cell class for the QSimpleRNNCell layer.\n\n  Most of these parameters follow the implementation of SimpleRNNCell in\n  Keras, with the exception of kernel_quantizer, recurrent_quantizer,\n  bias_quantizer, and state_quantizer.\n\n  kernel_quantizer: quantizer function/class for kernel\n  recurrent_quantizer: quantizer function/class for recurrent kernel\n  bias_quantizer: quantizer function/class for bias\n  state_quantizer: quantizer function/class for states\n\n  We refer the reader to the documentation of SimpleRNNCell in Keras for the\n  other parameters.\n\n  \"\"\"\n  def __init__(self,\n               units,\n               activation='quantized_tanh',\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               recurrent_initializer='orthogonal',\n               bias_initializer='zeros',\n               kernel_regularizer=None,\n               recurrent_regularizer=None,\n               bias_regularizer=None,\n               kernel_constraint=None,\n               recurrent_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               recurrent_quantizer=None,\n               bias_quantizer=None,\n               state_quantizer=None,\n               dropout=0.,\n               recurrent_dropout=0.,\n               **kwargs):\n\n    self.kernel_quantizer = kernel_quantizer\n    self.recurrent_quantizer = recurrent_quantizer\n    self.bias_quantizer = bias_quantizer\n    self.state_quantizer = state_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n    self.state_quantizer_internal = get_quantizer(self.state_quantizer)\n\n    self.quantizers = [\n        self.kernel_quantizer_internal, self.recurrent_quantizer_internal,\n        self.bias_quantizer_internal, self.state_quantizer_internal\n    ]\n\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    if hasattr(self.recurrent_quantizer_internal, \"_set_trainable_parameter\"):\n      self.recurrent_quantizer_internal._set_trainable_parameter()\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    recurrent_constraint, recurrent_initializer = (\n        get_auto_range_constraint_initializer(self.recurrent_quantizer_internal,\n                                              recurrent_constraint,\n                                              recurrent_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    super().__init__(\n        units=units,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        recurrent_initializer=recurrent_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        recurrent_regularizer=recurrent_regularizer,\n        bias_regularizer=bias_regularizer,\n        kernel_constraint=kernel_constraint,\n        recurrent_constraint=recurrent_constraint,\n        bias_constraint=bias_constraint,\n        dropout=dropout,\n        recurrent_dropout=recurrent_dropout,\n        **kwargs\n    )\n\n  def call(self, inputs, states, training=None):\n    prev_output = states[0] if nest.is_nested(states) else states\n\n    dp_mask = self.get_dropout_mask_for_cell(inputs, training)\n    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(\n        prev_output, training)\n\n    if self.state_quantizer:\n      quantized_prev_output = self.state_quantizer_internal(prev_output)\n    else:\n      quantized_prev_output = prev_output\n\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n\n    if dp_mask is not None:\n      h = K.dot(inputs * dp_mask, quantized_kernel)\n    else:\n      h = K.dot(inputs, quantized_kernel)\n\n    if self.bias is not None:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      h = K.bias_add(h, quantized_bias)\n\n    if rec_dp_mask is not None:\n      quantized_prev_output = quantized_prev_output * rec_dp_mask\n\n    if self.recurrent_quantizer:\n      quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel)\n    else:\n      quantized_recurrent = self.recurrent_kernel\n\n    output = h + K.dot(quantized_prev_output, quantized_recurrent)\n\n    if self.activation is not None:\n      output = self.activation(output)\n    return output, [output]\n\n  def get_config(self):\n    config = {\n        'kernel_quantizer': constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'recurrent_quantizer': constraints.serialize(\n            self.recurrent_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'bias_quantizer': constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'state_quantizer': constraints.serialize(\n            self.state_quantizer_internal# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QSimpleRNNCell, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n\nclass QSimpleRNN(RNN, PrunableLayer):\n  \"\"\"\n  Class for the QSimpleRNN layer.\n\n  Most of these parameters follow the implementation of SimpleRNN in\n  Keras, with the exception of kernel_quantizer, recurrent_quantizer,\n  bias_quantizer and state_quantizer.\n\n\n  kernel_quantizer: quantizer function/class for kernel\n  recurrent_quantizer: quantizer function/class for recurrent kernel\n  bias_quantizer: quantizer function/class for bias\n  state_quantizer: quantizer function/class for states\n\n\n  We refer the reader to the documentation of SimpleRNN in Keras for the\n  other parameters.\n\n  \"\"\"\n\n  def __init__(self,\n               units,\n               activation='quantized_tanh',\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               recurrent_initializer='orthogonal',\n               bias_initializer='zeros',\n               kernel_regularizer=None,\n               recurrent_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               kernel_constraint=None,\n               recurrent_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               recurrent_quantizer=None,\n               bias_quantizer=None,\n               state_quantizer=None,\n               dropout=0.,\n               recurrent_dropout=0.,\n               return_sequences=False,\n               return_state=False,\n               go_backwards=False,\n               stateful=False,\n               unroll=False,\n               **kwargs):\n\n    if 'enable_caching_device' in kwargs:\n      cell_kwargs = {'enable_caching_device':\n                     kwargs.pop('enable_caching_device')}\n    else:\n      cell_kwargs = {}\n\n    cell = QSimpleRNNCell(\n        units,\n        activation=activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        recurrent_initializer=recurrent_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        recurrent_regularizer=recurrent_regularizer,\n        bias_regularizer=bias_regularizer,\n        kernel_constraint=kernel_constraint,\n        recurrent_constraint=recurrent_constraint,\n        bias_constraint=bias_constraint,\n        kernel_quantizer=kernel_quantizer,\n        recurrent_quantizer=recurrent_quantizer,\n        bias_quantizer=bias_quantizer,\n        state_quantizer=state_quantizer,\n        dropout=dropout,\n        recurrent_dropout=recurrent_dropout,\n        dtype=kwargs.get('dtype'),\n        trainable=kwargs.get('trainable', True),\n        **cell_kwargs)\n\n    super().__init__(\n        cell,\n        return_sequences=return_sequences,\n        return_state=return_state,\n        go_backwards=go_backwards,\n        stateful=stateful,\n        unroll=unroll,\n        **kwargs\n    )\n    self.activity_regularizer = regularizers.get(activity_regularizer)\n    self.input_spec = [tf.keras.layers.InputSpec(ndim=3)]\n\n  def call(self, inputs, mask=None, training=None, initial_state=None):\n    self._maybe_reset_cell_dropout_mask(self.cell)\n    return super(QSimpleRNN, self).call(\n        inputs, mask=mask, training=training, initial_state=initial_state)\n\n  def get_quantizers(self):\n    return self.cell.quantizers\n\n  def get_prunable_weights(self):\n    return [self.cell.kernel, self.cell.recurrent_kernel]\n\n  @property\n  def units(self):\n    return self.cell.units\n\n  @property\n  def activation(self):\n    return self.cell.activation\n\n  @property\n  def use_bias(self):\n    return self.cell.use_bias\n\n  @property\n  def kernel_initializer(self):\n    return self.cell.kernel_initializer\n\n  @property\n  def recurrent_initializer(self):\n    return self.cell.recurrent_initializer\n\n  @property\n  def bias_initializer(self):\n    return self.cell.bias_initializer\n\n  @property\n  def kernel_regularizer(self):\n    return self.cell.kernel_regularizer\n\n  @property\n  def recurrent_regularizer(self):\n    return self.cell.recurrent_regularizer\n\n  @property\n  def bias_regularizer(self):\n    return self.cell.bias_regularizer\n\n  @property\n  def kernel_constraint(self):\n    return self.cell.kernel_constraint\n\n  @property\n  def recurrent_constraint(self):\n    return self.cell.recurrent_constraint\n\n  @property\n  def bias_constraint(self):\n    return self.cell.bias_constraint\n\n  @property\n  def kernel_quantizer_internal(self):\n    return self.cell.kernel_quantizer_internal\n\n  @property\n  def recurrent_quantizer_internal(self):\n    return self.cell.recurrent_quantizer_internal\n\n  @property\n  def bias_quantizer_internal(self):\n    return self.cell.bias_quantizer_internal\n\n  @property\n  def state_quantizer_internal(self):\n    return self.cell.state_quantizer_internal\n\n  @property\n  def kernel_quantizer(self):\n    return self.cell.kernel_quantizer\n\n  @property\n  def recurrent_quantizer(self):\n    return self.cell.recurrent_quantizer\n\n  @property\n  def bias_quantizer(self):\n    return self.cell.bias_quantizer\n\n  @property\n  def state_quantizer(self):\n    return self.cell.state_quantizer\n\n  @property\n  def dropout(self):\n    return self.cell.dropout\n\n  @property\n  def recurrent_dropout(self):\n    return self.cell.recurrent_dropout\n\n  def get_config(self):\n    config = {\n        'units': self.units,\n        'activation': activations.serialize(\n            self.activation# Google internal code, commented out by copybara\n        ),\n        'use_bias': self.use_bias,\n        'kernel_initializer': initializers.serialize(\n            self.kernel_initializer# Google internal code, commented out by copybara\n        ),\n        'recurrent_initializer': initializers.serialize(\n            self.recurrent_initializer# Google internal code, commented out by copybara\n        ),\n        'bias_initializer': initializers.serialize(\n            self.bias_initializer# Google internal code, commented out by copybara\n        ),\n        'kernel_regularizer': regularizers.serialize(\n            self.kernel_regularizer# Google internal code, commented out by copybara\n        ),\n        'recurrent_regularizer': regularizers.serialize(\n            self.recurrent_regularizer# Google internal code, commented out by copybara\n        ),\n        'bias_regularizer': regularizers.serialize(\n            self.bias_regularizer# Google internal code, commented out by copybara\n        ),\n        'activity_regularizer': regularizers.serialize(\n            self.activity_regularizer# Google internal code, commented out by copybara\n        ),\n        'kernel_constraint': constraints.serialize(\n            self.kernel_constraint# Google internal code, commented out by copybara\n        ),\n        'recurrent_constraint': constraints.serialize(\n            self.recurrent_constraint# Google internal code, commented out by copybara\n        ),\n        'bias_constraint': constraints.serialize(\n            self.bias_constraint# Google internal code, commented out by copybara\n        ),\n        'kernel_quantizer': constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'recurrent_quantizer': constraints.serialize(\n            self.recurrent_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'bias_quantizer': constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'state_quantizer': constraints.serialize(\n            self.state_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'dropout': self.dropout,\n        'recurrent_dropout': self.recurrent_dropout,\n    }\n    base_config = super(QSimpleRNN, self).get_config()\n    del base_config['cell']\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\":\n            str(self.kernel_quantizer_internal),\n        \"recurrent_quantizer\":\n            str(self.recurrent_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"state_quantizer\":\n            str(self.state_quantizer_internal),\n        \"activation\":\n            str(self.activation)\n    }\n\n  @classmethod\n  def from_config(cls, config):\n    if 'implementation' in config:\n      config.pop('implementation')\n    return cls(**config)\n\n\nclass QLSTMCell(LSTMCell):\n  \"\"\"\n  Cell class for the QLSTMCell layer.\n\n  Most of these parameters follow the implementation of LSTMCell in\n  Keras, with the exception of kernel_quantizer, recurrent_quantizer,\n  bias_quantizer, state_quantizer.\n\n\n  kernel_quantizer: quantizer function/class for kernel\n  recurrent_quantizer: quantizer function/class for recurrent kernel\n  bias_quantizer: quantizer function/class for bias\n  state_quantizer: quantizer function/class for states\n\n  We refer the reader to the documentation of LSTMCell in Keras for the\n  other parameters.\n\n  \"\"\"\n\n  def __init__(self,\n               units,\n               activation='quantized_tanh',\n               recurrent_activation='hard_sigmoid',\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               recurrent_initializer='orthogonal',\n               bias_initializer='zeros',\n               unit_forget_bias=True,\n               kernel_regularizer=None,\n               recurrent_regularizer=None,\n               bias_regularizer=None,\n               kernel_constraint=None,\n               recurrent_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               recurrent_quantizer=None,\n               bias_quantizer=None,\n               state_quantizer=None,\n               dropout=0.,\n               recurrent_dropout=0.,\n               implementation=1,\n               **kwargs):\n    self.kernel_quantizer = kernel_quantizer\n    self.recurrent_quantizer = recurrent_quantizer\n    self.bias_quantizer = bias_quantizer\n    self.state_quantizer = state_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n    self.state_quantizer_internal = get_quantizer(self.state_quantizer)\n\n    self.quantizers = [\n      self.kernel_quantizer_internal,\n      self.recurrent_quantizer_internal,\n      self.bias_quantizer_internal,\n      self.state_quantizer_internal,\n    ]\n\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    if hasattr(self.recurrent_quantizer_internal, \"_set_trainable_parameter\"):\n      self.recurrent_quantizer_internal._set_trainable_parameter()\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    recurrent_constraint, recurrent_initializer = (\n        get_auto_range_constraint_initializer(self.recurrent_quantizer_internal,\n                                              recurrent_constraint,\n                                              recurrent_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    if recurrent_activation is not None:\n      recurrent_activation = get_quantizer(recurrent_activation)\n\n    super().__init__(\n        units=units,\n        activation=activation,\n        use_bias=use_bias,\n        recurrent_activation=recurrent_activation,\n        kernel_initializer=kernel_initializer,\n        recurrent_initializer=recurrent_initializer,\n        bias_initializer=bias_initializer,\n        unit_forget_bias=True,\n        kernel_regularizer=kernel_regularizer,\n        recurrent_regularizer=recurrent_regularizer,\n        bias_regularizer=bias_regularizer,\n        kernel_constraint=kernel_constraint,\n        recurrent_constraint=recurrent_constraint,\n        bias_constraint=bias_constraint,\n        dropout=dropout,\n        recurrent_dropout=recurrent_dropout,\n        implementation=implementation,\n        **kwargs\n    )\n\n  def _compute_carry_and_output(self, x, h_tm1, c_tm1, quantized_recurrent):\n    \"\"\"Computes carry and output using split kernels.\"\"\"\n    x_i, x_f, x_c, x_o = x\n    h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1\n    i = self.recurrent_activation(\n        x_i + K.dot(h_tm1_i, quantized_recurrent[:, :self.units]))\n    f = self.recurrent_activation(x_f + K.dot(\n        h_tm1_f, quantized_recurrent[:, self.units:self.units * 2]))\n    c = f * c_tm1 + i * self.activation(x_c + K.dot(\n        h_tm1_c, quantized_recurrent[:, self.units * 2:self.units * 3]))\n    o = self.recurrent_activation(\n        x_o + K.dot(h_tm1_o, quantized_recurrent[:, self.units * 3:]))\n    return c, o\n\n  def _compute_carry_and_output_fused(self, z, c_tm1):\n    \"\"\"Computes carry and output using fused kernels.\"\"\"\n    z0, z1, z2, z3 = z\n    i = self.recurrent_activation(z0)\n    f = self.recurrent_activation(z1)\n    c = f * c_tm1 + i * self.activation(z2)\n    o = self.recurrent_activation(z3)\n    return c, o\n\n  def call(self, inputs, states, training=None):\n    h_tm1_tmp = states[0]  # previous memory state\n    c_tm1_tmp = states[1]  # previous carry state\n\n    if self.state_quantizer:\n      c_tm1 = self.state_quantizer_internal(c_tm1_tmp)\n      h_tm1 = self.state_quantizer_internal(h_tm1_tmp)\n    else:\n      c_tm1 = c_tm1_tmp\n      h_tm1 = h_tm1_tmp\n\n    dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)\n    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(\n        h_tm1, training, count=4)\n\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n    if self.recurrent_quantizer:\n      quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel)\n    else:\n      quantized_recurrent = self.recurrent_kernel\n    if self.bias_quantizer:\n      quantized_bias = self.bias_quantizer_internal(self.bias)\n    else:\n      quantized_bias = self.bias\n\n    if self.implementation == 1:\n      if 0 < self.dropout < 1.:\n        inputs_i = inputs * dp_mask[0]\n        inputs_f = inputs * dp_mask[1]\n        inputs_c = inputs * dp_mask[2]\n        inputs_o = inputs * dp_mask[3]\n      else:\n        inputs_i = inputs\n        inputs_f = inputs\n        inputs_c = inputs\n        inputs_o = inputs\n      k_i, k_f, k_c, k_o = array_ops.split(\n          quantized_kernel, num_or_size_splits=4, axis=1)\n      x_i = K.dot(inputs_i, k_i)\n      x_f = K.dot(inputs_f, k_f)\n      x_c = K.dot(inputs_c, k_c)\n      x_o = K.dot(inputs_o, k_o)\n      if self.use_bias:\n        b_i, b_f, b_c, b_o = array_ops.split(\n            quantized_bias, num_or_size_splits=4, axis=0)\n        x_i = K.bias_add(x_i, b_i)\n        x_f = K.bias_add(x_f, b_f)\n        x_c = K.bias_add(x_c, b_c)\n        x_o = K.bias_add(x_o, b_o)\n\n      if 0 < self.recurrent_dropout < 1.:\n        h_tm1_i = h_tm1 * rec_dp_mask[0]\n        h_tm1_f = h_tm1 * rec_dp_mask[1]\n        h_tm1_c = h_tm1 * rec_dp_mask[2]\n        h_tm1_o = h_tm1 * rec_dp_mask[3]\n      else:\n        h_tm1_i = h_tm1\n        h_tm1_f = h_tm1\n        h_tm1_c = h_tm1\n        h_tm1_o = h_tm1\n      x = (x_i, x_f, x_c, x_o)\n      h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o)\n      c, o = self._compute_carry_and_output(x, h_tm1, c_tm1, quantized_recurrent)\n    else:\n      if 0. < self.dropout < 1.:\n        inputs = inputs * dp_mask[0]\n      z = K.dot(inputs, quantized_kernel)\n      z += K.dot(h_tm1, quantized_recurrent)\n      if self.use_bias:\n        z = K.bias_add(z, quantized_bias)\n\n      z = array_ops.split(z, num_or_size_splits=4, axis=1)\n      c, o = self._compute_carry_and_output_fused(z, c_tm1)\n\n    h = o * self.activation(c)\n    return h, [h, c]\n\n  def get_config(self):\n    config = {\n        'kernel_quantizer': constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'recurrent_quantizer': constraints.serialize(\n            self.recurrent_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'bias_quantizer': constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'state_quantizer': constraints.serialize(\n            self.state_quantizer_internal# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QLSTMCell, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n\nclass QLSTM(RNN, PrunableLayer):\n  \"\"\"\n  Class for the QLSTM layer.\n\n  Most of these parameters follow the implementation of LSTM in\n  Keras, with the exception of kernel_quantizer, recurrent_quantizer,\n  bias_quantizer, state_quantizer.\n\n\n  kernel_quantizer: quantizer function/class for kernel\n  recurrent_quantizer: quantizer function/class for recurrent kernel\n  bias_quantizer: quantizer function/class for bias\n  state_quantizer: quantizer function/class for states\n\n  We refer the reader to the documentation of LSTM in Keras for the\n  other parameters.\n\n  \"\"\"\n\n  def __init__(self,\n               units,\n               activation='quantized_tanh',\n               recurrent_activation='hard_sigmoid',\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               recurrent_initializer='orthogonal',\n               bias_initializer='zeros',\n               unit_forget_bias=True,\n               kernel_regularizer=None,\n               recurrent_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               kernel_constraint=None,\n               recurrent_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               recurrent_quantizer=None,\n               bias_quantizer=None,\n               state_quantizer=None,\n               dropout=0.,\n               recurrent_dropout=0.,\n               implementation=1,\n               return_sequences=False,\n               return_state=False,\n               go_backwards=False,\n               stateful=False,\n               unroll=False,\n               **kwargs):\n    if implementation == 0:\n      print('`implementation=0` has been deprecated, '\n              'and now defaults to `implementation=1`.'\n              'Please update your layer call.')\n\n    if 'enable_caching_device' in kwargs:\n      cell_kwargs = {'enable_caching_device':\n                     kwargs.pop('enable_caching_device')}\n    else:\n      cell_kwargs = {}\n\n    cell = QLSTMCell(\n        units,\n        activation=activation,\n        recurrent_activation=recurrent_activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        recurrent_initializer=recurrent_initializer,\n        unit_forget_bias=unit_forget_bias,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        recurrent_regularizer=recurrent_regularizer,\n        bias_regularizer=bias_regularizer,\n        kernel_constraint=kernel_constraint,\n        recurrent_constraint=recurrent_constraint,\n        bias_constraint=bias_constraint,\n        kernel_quantizer=kernel_quantizer,\n        recurrent_quantizer=recurrent_quantizer,\n        bias_quantizer=bias_quantizer,\n        state_quantizer=state_quantizer,\n        dropout=dropout,\n        recurrent_dropout=recurrent_dropout,\n        implementation=implementation,\n        dtype=kwargs.get('dtype'),\n        trainable=kwargs.get('trainable', True),\n        **cell_kwargs)\n\n    super().__init__(\n        cell,\n        return_sequences=return_sequences,\n        return_state=return_state,\n        go_backwards=go_backwards,\n        stateful=stateful,\n        unroll=unroll,\n        **kwargs\n    )\n    self.activity_regularizer = regularizers.get(activity_regularizer)\n    self.input_spec = [tf.keras.layers.InputSpec(ndim=3)]\n\n  def call(self, inputs, mask=None, training=None, initial_state=None):\n    self._maybe_reset_cell_dropout_mask(self.cell)\n    return super(QLSTM, self).call(\n        inputs, mask=mask, training=training, initial_state=initial_state)\n\n  def get_quantizers(self):\n    return self.cell.quantizers\n\n  def get_prunable_weights(self):\n    return [self.cell.kernel, self.cell.recurrent_kernel]\n\n  @property\n  def units(self):\n    return self.cell.units\n\n  @property\n  def activation(self):\n    return self.cell.activation\n\n  @property\n  def recurrent_activation(self):\n    return self.cell.recurrent_activation\n\n  @property\n  def use_bias(self):\n    return self.cell.use_bias\n\n  @property\n  def kernel_initializer(self):\n    return self.cell.kernel_initializer\n\n  @property\n  def recurrent_initializer(self):\n    return self.cell.recurrent_initializer\n\n  @property\n  def bias_initializer(self):\n    return self.cell.bias_initializer\n\n  @property\n  def unit_forget_bias(self):\n    return self.cell.unit_forget_bias\n\n  @property\n  def kernel_regularizer(self):\n    return self.cell.kernel_regularizer\n\n  @property\n  def recurrent_regularizer(self):\n    return self.cell.recurrent_regularizer\n\n  @property\n  def bias_regularizer(self):\n    return self.cell.bias_regularizer\n\n  @property\n  def kernel_constraint(self):\n    return self.cell.kernel_constraint\n\n  @property\n  def recurrent_constraint(self):\n    return self.cell.recurrent_constraint\n\n  @property\n  def bias_constraint(self):\n    return self.cell.bias_constraint\n\n  @property\n  def kernel_quantizer_internal(self):\n    return self.cell.kernel_quantizer_internal\n\n  @property\n  def recurrent_quantizer_internal(self):\n    return self.cell.recurrent_quantizer_internal\n\n  @property\n  def bias_quantizer_internal(self):\n    return self.cell.bias_quantizer_internal\n\n  @property\n  def state_quantizer_internal(self):\n    return self.cell.state_quantizer_internal\n\n  @property\n  def kernel_quantizer(self):\n    return self.cell.kernel_quantizer\n\n  @property\n  def recurrent_quantizer(self):\n    return self.cell.recurrent_quantizer\n\n  @property\n  def bias_quantizer(self):\n    return self.cell.bias_quantizer\n\n  @property\n  def state_quantizer(self):\n    return self.cell.state_quantizer\n\n  @property\n  def dropout(self):\n    return self.cell.dropout\n\n  @property\n  def recurrent_dropout(self):\n    return self.cell.recurrent_dropout\n\n  @property\n  def implementation(self):\n    return self.cell.implementation\n\n  def get_config(self):\n    config = {\n        'units': self.units,\n        'activation': activations.serialize(\n            self.activation# Google internal code, commented out by copybara\n        ),\n        'recurrent_activation': activations.serialize(\n            self.recurrent_activation# Google internal code, commented out by copybara\n        ),\n        'use_bias': self.use_bias,\n        'kernel_initializer': initializers.serialize(\n            self.kernel_initializer# Google internal code, commented out by copybara\n        ),\n        'recurrent_initializer': initializers.serialize(\n            self.recurrent_initializer# Google internal code, commented out by copybara\n        ),\n        'bias_initializer': initializers.serialize(\n            self.bias_initializer# Google internal code, commented out by copybara\n        ),\n        'unit_forget_bias': self.unit_forget_bias,\n        'kernel_regularizer': regularizers.serialize(\n            self.kernel_regularizer# Google internal code, commented out by copybara\n        ),\n        'recurrent_regularizer': regularizers.serialize(\n            self.recurrent_regularizer# Google internal code, commented out by copybara\n        ),\n        'bias_regularizer': regularizers.serialize(\n            self.bias_regularizer# Google internal code, commented out by copybara\n        ),\n        'activity_regularizer': regularizers.serialize(\n            self.activity_regularizer# Google internal code, commented out by copybara\n        ),\n        'kernel_constraint': constraints.serialize(\n            self.kernel_constraint# Google internal code, commented out by copybara\n        ),\n        'recurrent_constraint': constraints.serialize(\n            self.recurrent_constraint# Google internal code, commented out by copybara\n        ),\n        'bias_constraint': constraints.serialize(\n            self.bias_constraint# Google internal code, commented out by copybara\n        ),\n        'kernel_quantizer': constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'recurrent_quantizer': constraints.serialize(\n            self.recurrent_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'bias_quantizer': constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'state_quantizer': constraints.serialize(\n            self.state_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'dropout': self.dropout,\n        'recurrent_dropout': self.recurrent_dropout,\n        'implementation': self.implementation,\n    }\n    base_config = super(QLSTM, self).get_config()\n    del base_config['cell']\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\":\n            str(self.kernel_quantizer_internal),\n        \"recurrent_quantizer\":\n            str(self.recurrent_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"state_quantizer\":\n            str(self.state_quantizer_internal),\n        \"activation\":\n            str(self.activation),\n        \"recurrent_activation\":\n            str(self.recurrent_activation),\n    }\n\n  @classmethod\n  def from_config(cls, config):\n    if 'implementation' in config and config['implementation'] == 0:\n      config['implementation'] = 1\n    return cls(**config)\n\n\nclass QGRUCell(GRUCell):\n  \"\"\"\n  Cell class for the QGRUCell layer.\n\n  Most of these parameters follow the implementation of GRUCell in\n  Keras, with the exception of kernel_quantizer, recurrent_quantizer,\n  bias_quantizer and state_quantizer.\n\n\n  kernel_quantizer: quantizer function/class for kernel\n  recurrent_quantizer: quantizer function/class for recurrent kernel\n  bias_quantizer: quantizer function/class for bias\n  state_quantizer: quantizer function/class for states\n\n\n  We refer the reader to the documentation of GRUCell in Keras for the\n  other parameters.\n\n  \"\"\"\n  def __init__(self,\n               units,\n               activation='quantized_tanh',\n               recurrent_activation='hard_sigmoid',\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               recurrent_initializer='orthogonal',\n               bias_initializer='zeros',\n               kernel_regularizer=None,\n               recurrent_regularizer=None,\n               bias_regularizer=None,\n               kernel_constraint=None,\n               recurrent_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               recurrent_quantizer=None,\n               bias_quantizer=None,\n               state_quantizer=None,\n               dropout=0.,\n               recurrent_dropout=0.,\n               implementation=1,\n               reset_after=False,\n               **kwargs):\n\n    self.kernel_quantizer = kernel_quantizer\n    self.recurrent_quantizer = recurrent_quantizer\n    self.bias_quantizer = bias_quantizer\n    self.state_quantizer = state_quantizer\n\n    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)\n    self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n    self.state_quantizer_internal = get_quantizer(self.state_quantizer)\n\n    self.quantizers = [\n      self.kernel_quantizer_internal,\n      self.recurrent_quantizer_internal,\n      self.bias_quantizer_internal,\n      self.state_quantizer_internal\n    ]\n\n    if hasattr(self.kernel_quantizer_internal, \"_set_trainable_parameter\"):\n      self.kernel_quantizer_internal._set_trainable_parameter()\n\n    if hasattr(self.recurrent_quantizer_internal, \"_set_trainable_parameter\"):\n      self.recurrent_quantizer_internal._set_trainable_parameter()\n\n    kernel_constraint, kernel_initializer = (\n        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,\n                                              kernel_constraint,\n                                              kernel_initializer))\n\n    recurrent_constraint, recurrent_initializer = (\n        get_auto_range_constraint_initializer(self.recurrent_quantizer_internal,\n                                              recurrent_constraint,\n                                              recurrent_initializer))\n\n    if use_bias:\n      bias_constraint, bias_initializer = (\n          get_auto_range_constraint_initializer(self.bias_quantizer_internal,\n                                                bias_constraint,\n                                                bias_initializer))\n\n    if activation is not None:\n      activation = get_quantizer(activation)\n\n    if recurrent_activation is not None:\n      recurrent_activation = get_quantizer(recurrent_activation)\n\n    super().__init__(\n        units=units,\n        activation=activation,\n        recurrent_activation=recurrent_activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        recurrent_initializer=recurrent_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        recurrent_regularizer=recurrent_regularizer,\n        bias_regularizer=bias_regularizer,\n        kernel_constraint=kernel_constraint,\n        recurrent_constraint=recurrent_constraint,\n        bias_constraint=bias_constraint,\n        dropout=dropout,\n        recurrent_dropout=recurrent_dropout,\n        implementation=implementation,\n        reset_after=reset_after,\n        **kwargs\n    )\n\n  def call(self, inputs, states, training=None):\n    # previous memory\n    h_tm1_tmp = states[0] if nest.is_nested(states) else states\n\n    dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3)\n    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(\n        h_tm1_tmp, training, count=3)\n\n    if self.state_quantizer:\n      h_tm1 = self.state_quantizer_internal(h_tm1_tmp)\n    else:\n      h_tm1 = h_tm1_tmp\n\n    if self.kernel_quantizer:\n      quantized_kernel = self.kernel_quantizer_internal(self.kernel)\n    else:\n      quantized_kernel = self.kernel\n    if self.recurrent_quantizer:\n      quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel)\n    else:\n      quantized_recurrent = self.kernel\n\n    if self.use_bias:\n      if self.bias_quantizer:\n        quantized_bias = self.bias_quantizer_internal(self.bias)\n      else:\n        quantized_bias = self.bias\n\n      if not self.reset_after:\n        input_bias, recurrent_bias = quantized_bias, None\n      else:\n        input_bias, recurrent_bias = array_ops.unstack(quantized_bias)\n\n    if self.implementation == 1:\n      if 0. < self.dropout < 1.:\n        inputs_z = inputs * dp_mask[0]\n        inputs_r = inputs * dp_mask[1]\n        inputs_h = inputs * dp_mask[2]\n      else:\n        inputs_z = inputs\n        inputs_r = inputs\n        inputs_h = inputs\n\n      x_z = K.dot(inputs_z, quantized_kernel[:, :self.units])\n      x_r = K.dot(inputs_r, quantized_kernel[:, self.units:self.units * 2])\n      x_h = K.dot(inputs_h, quantized_kernel[:, self.units * 2:])\n\n      if self.use_bias:\n        x_z = K.bias_add(x_z, input_bias[:self.units])\n        x_r = K.bias_add(x_r, input_bias[self.units: self.units * 2])\n        x_h = K.bias_add(x_h, input_bias[self.units * 2:])\n\n      if 0. < self.recurrent_dropout < 1.:\n        h_tm1_z = h_tm1 * rec_dp_mask[0]\n        h_tm1_r = h_tm1 * rec_dp_mask[1]\n        h_tm1_h = h_tm1 * rec_dp_mask[2]\n      else:\n        h_tm1_z = h_tm1\n        h_tm1_r = h_tm1\n        h_tm1_h = h_tm1\n\n      recurrent_z = K.dot(h_tm1_z, quantized_recurrent[:, :self.units])\n      recurrent_r = K.dot(h_tm1_r,\n                          quantized_recurrent[:, self.units:self.units * 2])\n      if self.reset_after and self.use_bias:\n        recurrent_z = K.bias_add(recurrent_z, recurrent_bias[:self.units])\n        recurrent_r = K.bias_add(recurrent_r,\n                                 recurrent_bias[self.units:self.units * 2])\n\n      z = self.recurrent_activation(x_z + recurrent_z)\n      r = self.recurrent_activation(x_r + recurrent_r)\n\n      # reset gate applied after/before matrix multiplication\n      if self.reset_after:\n        recurrent_h = K.dot(h_tm1_h, quantized_recurrent[:, self.units * 2:])\n        if self.use_bias:\n          recurrent_h = K.bias_add(recurrent_h, recurrent_bias[self.units * 2:])\n        recurrent_h = r * recurrent_h\n      else:\n        recurrent_h = K.dot(r * h_tm1_h,\n                            quantized_recurrent[:, self.units * 2:])\n\n      hh = self.activation(x_h + recurrent_h)\n    else:\n      if 0. < self.dropout < 1.:\n        inputs = inputs * dp_mask[0]\n\n      # inputs projected by all gate matrices at once\n      matrix_x = K.dot(inputs, quantized_kernel)\n      if self.use_bias:\n        # biases: bias_z_i, bias_r_i, bias_h_i\n        matrix_x = K.bias_add(matrix_x, input_bias)\n\n      x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=-1)\n\n      if self.reset_after:\n        # hidden state projected by all gate matrices at once\n        matrix_inner = K.dot(h_tm1, quantized_recurrent)\n        if self.use_bias:\n          matrix_inner = K.bias_add(matrix_inner, recurrent_bias)\n      else:\n        # hidden state projected separately for update/reset and new\n        matrix_inner = K.dot(h_tm1, quantized_recurrent[:, :2 * self.units])\n\n      recurrent_z, recurrent_r, recurrent_h = array_ops.split(\n          matrix_inner, [self.units, self.units, -1], axis=-1)\n\n      z = self.recurrent_activation(x_z + recurrent_z)\n      r = self.recurrent_activation(x_r + recurrent_r)\n\n      if self.reset_after:\n        recurrent_h = r * recurrent_h\n      else:\n        recurrent_h = K.dot(r * h_tm1,\n                            quantized_recurrent[:, 2 * self.units:])\n\n      hh = self.activation(x_h + recurrent_h)\n    # previous and candidate state mixed by update gate\n    h = z * h_tm1 + (1 - z) * hh\n    return h, [h]\n\n  def get_config(self):\n    config = {\n        'kernel_quantizer': constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'recurrent_quantizer': constraints.serialize(\n            self.recurrent_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'bias_quantizer': constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'state_quantizer': constraints.serialize(\n            self.state_quantizer_internal# Google internal code, commented out by copybara\n        ),\n    }\n    base_config = super(QGRUCell, self).get_config()\n    return dict(list(base_config.items()) + list(config.items()))\n\n\nclass QGRU(RNN, PrunableLayer):\n  \"\"\"\n  Class for the QGRU layer.\n\n  Most of these parameters follow the implementation of GRU in\n  Keras, with the exception of kernel_quantizer, recurrent_quantizer,\n  bias_quantizer and state_quantizer.\n\n\n  kernel_quantizer: quantizer function/class for kernel\n  recurrent_quantizer: quantizer function/class for recurrent kernel\n  bias_quantizer: quantizer function/class for bias\n  state_quantizer: quantizer function/class for states\n\n\n  We refer the reader to the documentation of GRU in Keras for the\n  other parameters.\n\n  \"\"\"\n\n  def __init__(self,\n               units,\n               activation='quantized_tanh',\n               recurrent_activation='hard_sigmoid',\n               use_bias=True,\n               kernel_initializer='glorot_uniform',\n               recurrent_initializer='orthogonal',\n               bias_initializer='zeros',\n               kernel_regularizer=None,\n               recurrent_regularizer=None,\n               bias_regularizer=None,\n               activity_regularizer=None,\n               kernel_constraint=None,\n               recurrent_constraint=None,\n               bias_constraint=None,\n               kernel_quantizer=None,\n               recurrent_quantizer=None,\n               bias_quantizer=None,\n               state_quantizer=None,\n               dropout=0.,\n               recurrent_dropout=0.,\n               implementation=1,\n               return_sequences=False,\n               return_state=False,\n               go_backwards=False,\n               stateful=False,\n               unroll=False,\n               reset_after=False,\n               **kwargs):\n    if implementation == 0:\n      print('`implementation=0` has been deprecated, '\n              'and now defaults to `implementation=1`.'\n              'Please update your layer call.')\n\n    if 'enable_caching_device' in kwargs:\n      cell_kwargs = {'enable_caching_device':\n                     kwargs.pop('enable_caching_device')}\n    else:\n      cell_kwargs = {}\n\n    cell = QGRUCell(\n        units,\n        activation=activation,\n        recurrent_activation=recurrent_activation,\n        use_bias=use_bias,\n        kernel_initializer=kernel_initializer,\n        recurrent_initializer=recurrent_initializer,\n        bias_initializer=bias_initializer,\n        kernel_regularizer=kernel_regularizer,\n        recurrent_regularizer=recurrent_regularizer,\n        bias_regularizer=bias_regularizer,\n        kernel_constraint=kernel_constraint,\n        recurrent_constraint=recurrent_constraint,\n        bias_constraint=bias_constraint,\n        kernel_quantizer=kernel_quantizer,\n        recurrent_quantizer=recurrent_quantizer,\n        bias_quantizer=bias_quantizer,\n        state_quantizer=state_quantizer,\n        dropout=dropout,\n        recurrent_dropout=recurrent_dropout,\n        implementation=implementation,\n        reset_after=reset_after,\n        dtype=kwargs.get('dtype'),\n        trainable=kwargs.get('trainable', True),\n        **cell_kwargs)\n\n    super().__init__(\n        cell,\n        return_sequences=return_sequences,\n        return_state=return_state,\n        go_backwards=go_backwards,\n        stateful=stateful,\n        unroll=unroll,\n        **kwargs\n    )\n    self.activity_regularizer = regularizers.get(activity_regularizer)\n    self.input_spec = [tf.keras.layers.InputSpec(ndim=3)]\n\n  def call(self, inputs, mask=None, training=None, initial_state=None):\n    self._maybe_reset_cell_dropout_mask(self.cell)\n    return super(QGRU, self).call(\n        inputs, mask=mask, training=training, initial_state=initial_state)\n\n  def get_quantizers(self):\n    return self.cell.quantizers\n\n  def get_prunable_weights(self):\n    return [self.cell.kernel, self.cell.recurrent_kernel]\n\n  @property\n  def units(self):\n    return self.cell.units\n\n  @property\n  def activation(self):\n    return self.cell.activation\n\n  @property\n  def recurrent_activation(self):\n    return self.cell.recurrent_activation\n\n  @property\n  def use_bias(self):\n    return self.cell.use_bias\n\n  @property\n  def kernel_initializer(self):\n    return self.cell.kernel_initializer\n\n  @property\n  def recurrent_initializer(self):\n    return self.cell.recurrent_initializer\n\n  @property\n  def bias_initializer(self):\n    return self.cell.bias_initializer\n\n  @property\n  def kernel_regularizer(self):\n    return self.cell.kernel_regularizer\n\n  @property\n  def recurrent_regularizer(self):\n    return self.cell.recurrent_regularizer\n\n  @property\n  def bias_regularizer(self):\n    return self.cell.bias_regularizer\n\n  @property\n  def kernel_constraint(self):\n    return self.cell.kernel_constraint\n\n  @property\n  def recurrent_constraint(self):\n    return self.cell.recurrent_constraint\n\n  @property\n  def bias_constraint(self):\n    return self.cell.bias_constraint\n\n  @property\n  def kernel_quantizer_internal(self):\n    return self.cell.kernel_quantizer_internal\n\n  @property\n  def recurrent_quantizer_internal(self):\n    return self.cell.recurrent_quantizer_internal\n\n  @property\n  def bias_quantizer_internal(self):\n    return self.cell.bias_quantizer_internal\n\n  @property\n  def state_quantizer_internal(self):\n    return self.cell.state_quantizer_internal\n\n  @property\n  def kernel_quantizer(self):\n    return self.cell.kernel_quantizer\n\n  @property\n  def recurrent_quantizer(self):\n    return self.cell.recurrent_quantizer\n\n  @property\n  def bias_quantizer(self):\n    return self.cell.bias_quantizer\n\n  @property\n  def state_quantizer(self):\n    return self.cell.state_quantizer\n\n  @property\n  def dropout(self):\n    return self.cell.dropout\n\n  @property\n  def recurrent_dropout(self):\n    return self.cell.recurrent_dropout\n\n  @property\n  def implementation(self):\n    return self.cell.implementation\n\n  @property\n  def reset_after(self):\n    return self.cell.reset_after\n\n  def get_config(self):\n    config = {\n        'units': self.units,\n        'activation': activations.serialize(\n            self.activation# Google internal code, commented out by copybara\n        ),\n        'recurrent_activation': activations.serialize(\n            self.recurrent_activation# Google internal code, commented out by copybara\n        ),\n        'use_bias': self.use_bias,\n        'kernel_initializer': initializers.serialize(\n            self.kernel_initializer# Google internal code, commented out by copybara\n        ),\n        'recurrent_initializer': initializers.serialize(\n            self.recurrent_initializer# Google internal code, commented out by copybara\n        ),\n        'bias_initializer': initializers.serialize(\n            self.bias_initializer# Google internal code, commented out by copybara\n        ),\n        'kernel_regularizer': regularizers.serialize(\n            self.kernel_regularizer# Google internal code, commented out by copybara\n        ),\n        'recurrent_regularizer': regularizers.serialize(\n            self.recurrent_regularizer# Google internal code, commented out by copybara\n        ),\n        'bias_regularizer': regularizers.serialize(\n            self.bias_regularizer# Google internal code, commented out by copybara\n        ),\n        'activity_regularizer': regularizers.serialize(\n            self.activity_regularizer# Google internal code, commented out by copybara\n        ),\n        'kernel_constraint': constraints.serialize(\n            self.kernel_constraint# Google internal code, commented out by copybara\n        ),\n        'recurrent_constraint': constraints.serialize(\n            self.recurrent_constraint# Google internal code, commented out by copybara\n        ),\n        'bias_constraint': constraints.serialize(\n            self.bias_constraint# Google internal code, commented out by copybara\n        ),\n        'kernel_quantizer': constraints.serialize(\n            self.kernel_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'recurrent_quantizer': constraints.serialize(\n            self.recurrent_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'bias_quantizer': constraints.serialize(\n            self.bias_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'state_quantizer': constraints.serialize(\n            self.state_quantizer_internal# Google internal code, commented out by copybara\n        ),\n        'dropout': self.dropout,\n        'recurrent_dropout': self.recurrent_dropout,\n        'implementation': self.implementation,\n        'reset_after': self.reset_after,\n    }\n    base_config = super(QGRU, self).get_config()\n    del base_config['cell']\n    return dict(list(base_config.items()) + list(config.items()))\n\n  def get_quantization_config(self):\n    return {\n        \"kernel_quantizer\":\n            str(self.kernel_quantizer_internal),\n        \"recurrent_quantizer\":\n            str(self.recurrent_quantizer_internal),\n        \"bias_quantizer\":\n            str(self.bias_quantizer_internal),\n        \"state_quantizer\":\n            str(self.state_quantizer_internal),\n        \"activation\":\n            str(self.activation),\n        \"recurrent_activation\":\n            str(self.recurrent_activation),\n    }\n\n  @classmethod\n  def from_config(cls, config):\n    if 'implementation' in config and config['implementation'] == 0:\n      config['implementation'] = 1\n    return cls(**config)\n\n\nclass QBidirectional(Bidirectional):\n  \"\"\"\n  Class for the QBidirecitonal wrapper.\n\n  Most of these parameters follow the implementation of Bidirectional in\n  Keras.\n\n  We refer the reader to the documentation of Bidirectional in Keras for the\n  other parameters.\n\n  \"\"\"\n  def get_quantizers(self):\n    \"\"\"\n    Returns quantizers in the order they were created.\n    \"\"\"\n    return self.forward_layer.get_quantizers() + self.backward_layer.get_quantizers()\n\n  @property\n  def activation(self):\n    return self.layer.activation\n\n  def get_quantization_config(self):\n    return {\n      \"layer\" : self.layer.get_quantization_config(),\n      \"backward_layer\" : self.backward_layer.get_quantization_config()\n    }\n"
  },
  {
    "path": "qkeras/qseparable_conv2d_transpose.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\nimport tensorflow as tf\nfrom tensorflow.keras.layers import Conv2DTranspose\nfrom tensorflow.keras.layers import InputSpec\n\nfrom .qconvolutional import deconv_output_length\nfrom .quantizers import get_quantizer\nfrom tensorflow.python.eager import context\nfrom tensorflow.python.keras import constraints\nfrom tensorflow.python.ops import array_ops\nfrom tensorflow.python.ops import array_ops\n\n\nclass QSeparableConv2DTranspose(Conv2DTranspose):\n  \"\"\"Quantized Separable Conv2DTranspose layer.\"\"\"\n\n  # Most of these parameters follow the implementation of Conv2DTranspose\n  # in Keras, with the exception of following parameters.\n  #\n  # depthwise_activation: activation quantizer for depthwise convolution\n  # pointwise_activation: activation quantizer for pointwise convolution\n  # depthwise_kernel_quantizer: quantizer function/class for depthwise kernel\n  # pointwise_kernel_quantizers: quantizer function/class for pointwise kernel\n  # bias_quantizer: quantizer function/class for bias\n  #\n  # we refer the reader to the documentation of Conv2DTranspose in Keras for\n  # the other parameters.\n\n  def __init__(self,\n               filters,\n               kernel_size,\n               strides=(1, 1),\n               padding=\"valid\",\n               output_padding=None,\n               depth_multiplier=1,\n               depthwise_activation=None,\n               pointwise_activation=None,\n               use_bias=True,\n               depthwise_kernel_quantizer=None,\n               pointwise_kernel_quantizer=None,\n               bias_quantizer=None,\n               **kwargs):\n\n    self.filters = filters\n    self.kernel_size = kernel_size\n    self.strides = strides\n    self.padding = padding\n    self.output_padding = output_padding\n    self.depth_multiplier = depth_multiplier\n    self.depthwise_activation = depthwise_activation\n    self.pointwise_activation = pointwise_activation\n    self.use_bias = use_bias\n\n    self.depthwise_kernel_quantizer = depthwise_kernel_quantizer\n    self.pointwise_kernel_quantizer = pointwise_kernel_quantizer\n    self.bias_quantizer = bias_quantizer\n\n    self.depthwise_kernel_quantizer_internal = get_quantizer(\n        self.depthwise_kernel_quantizer)\n    self.pointwise_kernel_quantizer_internal = get_quantizer(\n        self.pointwise_kernel_quantizer)\n    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)\n\n    # optimize parameter set to \"auto\" scaling mode if possible\n    for q in [self.depthwise_kernel_quantizer_internal,\n              self.pointwise_kernel_quantizer_internal]:\n      if hasattr(q, \"_set_trainable_parameter\"):\n        q._set_trainable_parameter()\n\n    if depthwise_activation is not None:\n      self.depthwise_activation = get_quantizer(depthwise_activation)\n\n    if pointwise_activation is not None:\n      self.pointwise_activation = get_quantizer(pointwise_activation)\n\n    super().__init__(\n        filters=filters,\n        kernel_size=kernel_size,\n        strides=strides,\n        padding=padding,\n        use_bias=use_bias,\n        **kwargs)\n\n  def _get_input_axis(self):\n    if self.data_format == \"channels_first\":\n      b_axis, c_axis, h_axis, w_axis = 0, 1, 2, 3\n    else:\n      b_axis, c_axis, h_axis, w_axis = 0, 3, 1, 2\n\n    return b_axis, c_axis, h_axis, w_axis\n\n  def _get_input_dims(self, input_shape):\n    b_axis, c_axis, h_axis, w_axis = self._get_input_axis()\n\n    return (\n        input_shape[b_axis], input_shape[c_axis],\n        input_shape[h_axis], input_shape[w_axis])\n\n  def _get_output_size(self, inputs, output_padding, padding, strides,\n                       dilation_rate, kernel_weights):\n    input_shape = array_ops.shape(inputs)\n    batch_size, _, height, width = self._get_input_dims(input_shape)\n    kernel_h, kernel_w = kernel_weights.shape[:2]\n    stride_h, stride_w = strides\n\n    dilation_h, dilation_w = dilation_rate[0], dilation_rate[1]\n\n    if output_padding is None:\n      out_pad_h = out_pad_w = None\n    else:\n      out_pad_h, out_pad_w = output_padding\n\n    # Infer the dynamic output shape:\n    out_height = deconv_output_length(\n        height,\n        kernel_h,\n        padding=padding,\n        output_padding=out_pad_h,\n        stride=stride_h,\n        dilation=dilation_h,\n    )\n\n    out_width = deconv_output_length(\n        width,\n        kernel_w,\n        padding=padding,\n        output_padding=out_pad_w,\n        stride=stride_w,\n        dilation=dilation_w,\n    )\n\n    return (batch_size, out_height, out_width, kernel_h, kernel_w)\n\n  def build(self, input_shape):\n    self._input_shape = input_shape\n\n    _, input_channel, _, _ = self._get_input_dims(input_shape)\n    channel_axis = self._get_input_axis()[1]\n\n    self.input_spec = InputSpec(\n        min_ndim=self.rank + 2, axes={channel_axis: input_channel}\n    )\n    # By enforcing the kernel shape, we can control how convolution is\n    # done in depthwise or pointwise.\n    # When setting kernel shape=(kw, kh, 1, input_channel), it does depthwise\n    # convolution.\n    depthwise_kernel_shape = self.kernel_size + (1, input_channel)\n\n    self.depthwise_kernel = self.add_weight(\n        name=\"depthwise_kernel\",\n        shape=depthwise_kernel_shape,\n        initializer=self.kernel_initializer,\n        regularizer=self.kernel_regularizer,\n        constraint=self.kernel_constraint,\n        trainable=True,\n        dtype=self.dtype,\n    )\n\n    # When setting kernel shape=(1, 1, output_channel, input_channel), it does\n    # pointwise convolution.\n    pointwise_kernel_shape = (1, 1, self.filters, input_channel)\n    self.pointwise_kernel = self.add_weight(\n        name=\"pointwise_kernel\",\n        shape=pointwise_kernel_shape,\n        initializer=self.kernel_initializer,\n        regularizer=self.kernel_regularizer,\n        constraint=self.kernel_constraint,\n        trainable=True,\n        dtype=self.dtype,\n    )\n\n    if self.use_bias:\n      # This bias term is usally add at the end of the pointwise convolution.\n      self.bias = self.add_weight(\n          name=\"bias\",\n          shape=(self.filters,),\n          initializer=self.bias_initializer,\n          regularizer=self.bias_regularizer,\n          constraint=self.bias_constraint,\n          trainable=True,\n          dtype=self.dtype,\n      )\n    else:\n      self.bias = None\n\n    self.built = True\n\n  def compute_final_output_shape(\n      self, input_shape, kernel_size, strides, is_depthwise=True):\n    input_shape = tf.TensorShape(input_shape).as_list()\n    # By using list(), output_shape is a copy of input_shape, instead of a\n    # reference to input_shape.\n    output_shape = list(input_shape)\n    _, c_axis, h_axis, w_axis = self._get_input_axis()\n\n    kernel_h, kernel_w = kernel_size\n    stride_h, stride_w = strides\n\n    if self.output_padding is None:\n      out_pad_h = out_pad_w = None\n    else:\n      out_pad_h, out_pad_w = self.output_padding\n\n    if is_depthwise:\n      # Convolution is performed separately on each spatial domain.\n      output_shape[c_axis] = input_shape[c_axis]\n    else:\n      # Pointwise convolution maps input channels to output filters.\n      output_shape[c_axis] = self.filters\n\n    output_shape[h_axis] = deconv_output_length(\n        output_shape[h_axis],\n        kernel_h,\n        padding=self.padding,\n        output_padding=out_pad_h,\n        stride=stride_h,\n        dilation=self.dilation_rate[0],\n    )\n    output_shape[w_axis] = deconv_output_length(\n        output_shape[w_axis],\n        kernel_w,\n        padding=self.padding,\n        output_padding=out_pad_w,\n        stride=stride_w,\n        dilation=self.dilation_rate[1],\n    )\n    return tf.TensorShape(output_shape)\n\n  def conv_transpose_op(self, inputs, filters, strides, padding,\n                        output_padding, dilation_rate,\n                        kernel_quantizer, kernel_weights, use_bias,\n                        bias_quantizer, bias, activation, is_depthwise):\n    \"\"\"Transpose convolution op that shared by both depthwise and pointwise.\"\"\"\n\n    batch_size, out_height, out_width, kernel_h, kernel_w = (\n        self._get_output_size(inputs, output_padding, padding, strides,\n                              dilation_rate, kernel_weights))\n\n    if kernel_quantizer:\n      quantized_kernel = kernel_quantizer(kernel_weights)\n    else:\n      quantized_kernel = kernel_weights\n\n    output_filters = 1 if is_depthwise else filters\n\n    if self.data_format == \"channels_first\":\n      output_shape = (batch_size, output_filters, out_height, out_width)\n    else:\n      output_shape = (batch_size, out_height, out_width, output_filters)\n\n    output_shape_tensor = array_ops.stack(output_shape)\n\n    # Split the input channels into groups.\n    x = tf.split(inputs, self._input_shape[-1], axis=-1)\n\n    if is_depthwise:\n      # For depthwise convolution, since CPU doesn't support grouped\n      # convolution, we run convolution on each slice of inputs and concat\n      # the results.\n      outputs = [\n          tf.keras.backend.conv2d_transpose(\n              x=x[i],\n              kernel=quantized_kernel[:, :, :, i : i + 1],\n              output_shape=output_shape_tensor,\n              strides=strides,\n              padding=padding,\n              data_format=self.data_format,\n              dilation_rate=dilation_rate,\n          )\n          for i in range(len(x))\n      ]\n\n      # Concat the channels.\n      outputs = tf.concat(outputs, axis=-1)\n\n    else:\n      outputs = tf.keras.backend.conv2d_transpose(\n          inputs,\n          quantized_kernel,\n          output_shape_tensor,\n          strides=strides,\n          padding=padding,\n          data_format=self.data_format,\n          dilation_rate=dilation_rate,\n      )\n\n    if not context.executing_eagerly():\n      # Infer the static output shape:\n      out_shape = self.compute_final_output_shape(\n          input_shape=inputs.shape,\n          kernel_size=(kernel_h, kernel_w),\n          strides=strides,\n          is_depthwise=is_depthwise)\n      outputs.set_shape(out_shape)\n\n    if use_bias:\n      quantized_bias = bias_quantizer(bias) if bias_quantizer else bias\n      outputs = tf.keras.backend.bias_add(\n          outputs,\n          quantized_bias,\n          data_format=self.data_format)\n\n    if activation is not None:\n      return activation(outputs)\n\n    return outputs\n\n  def call(self, inputs):\n    input_shape = array_ops.shape(inputs)\n    _, input_channel, _, _ = self._get_input_dims(input_shape)\n\n    # First apply depthwise transposed convolution.\n    x = self.conv_transpose_op(\n        inputs=inputs,\n        # Depthwise convolution doesn't operate across channels. Thereofore its\n        # output channels is the same as input channels.\n        filters=input_channel,\n        strides=self.strides,\n        padding=self.padding,\n        output_padding=self.output_padding,\n        dilation_rate=self.dilation_rate,\n        kernel_quantizer=self.depthwise_kernel_quantizer_internal,\n        kernel_weights=self.depthwise_kernel,\n        use_bias=False,  # Usually set bias=False for depthwise conv.\n        bias_quantizer=None,\n        bias=None,\n        activation=self.depthwise_activation,\n        is_depthwise=True)\n\n    # Then apply pointwise transposed convolution\n    x = self.conv_transpose_op(\n        inputs=x,\n        # Pointwise convolution maps input channels to output filters.\n        filters=self.filters,\n        strides=(1, 1),   # strides is set to (1, 1) for pointwise conv.\n        # Though it will not applied in pointwise conv, we need to set\n        # padding here to pass value checking in keras utility functions.\n        padding=self.padding,\n        output_padding=None,  # Prevent output_padding from adding twice.\n        dilation_rate=self.dilation_rate,\n        kernel_quantizer=self.pointwise_kernel_quantizer_internal,\n        kernel_weights=self.pointwise_kernel,\n        use_bias=self.use_bias,\n        bias_quantizer=self.bias_quantizer_internal,\n        bias=self.bias,\n        activation=self.pointwise_activation,\n        is_depthwise=False)\n\n    return x\n\n  def get_config(self):\n    config = super().get_config()\n    config.update({\n        \"filters\": self.filters,\n        \"kernel_size\": self.kernel_size,\n        \"strides\": self.strides,\n        \"padding\": self.padding,\n        \"output_padding\": self.output_padding,\n        \"dilation_rate\": self.dilation_rate,\n        \"data_format\": self.data_format,\n        \"depth_multiplier\": self.depth_multiplier,\n        \"activation\": self.activation,\n        \"use_bias\": self.use_bias,\n        \"depthwise_kernel_quantizer\": constraints.serialize(\n            self.depthwise_kernel_quantizer_internal),\n        \"pointwise_kernel_quantizer\": constraints.serialize(\n            self.pointwise_kernel_quantizer_internal),\n        \"bias_quantizer\": constraints.serialize(\n            self.bias_quantizer_internal,\n            ),\n    })\n    return config\n\n  def get_quantizers(self):\n    return [\n        self.depthwise_kernel_quantizer_internal,\n        self.pointwise_kernel_quantizer_internal,\n        self.bias_quantizer_internal,\n        self.depthwise_activation,\n        self.pointwise_activation,\n    ]\n\n  def get_prunable_weights(self):\n    w = [self.depthwise_kernel, self.pointwise_kernel]\n    if self.use_bias:\n      w.append(self.bias)\n\n    return w\n"
  },
  {
    "path": "qkeras/qtools/DnC/divide_and_conquer.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"divide_and_conquer hardware cost profiling.\n\nGiven a target throughput and a ML model, this implementation determines\nthe key HW design parameters (bitwidth, unroll factors) for ML area\noptimization in a pipelined architecture.\n\nIt generates recommended design parameters to assist downstream HW synthesis\ndesign. With this, it provides accurate HW cost modeling for ML training\nand ML complexity evaluation such as AV2/ROOF_ML.\n\"\"\"\n\nimport enum\nimport logging\nfrom typing import Any, List, Union\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom qkeras import base_quantizer\nfrom qkeras import quantizers\nfrom qkeras.qtools import generate_layer_data_type_map\nfrom qkeras.qtools import qgraph\nfrom qkeras.qtools import qtools_util\nfrom qkeras.qtools.DnC import dnc_layer_cost_ace\n\n\nclass CostMode(enum.Enum):\n  ACE = 1  # cost is computed from theoretical equations.\n  PE_AREA = 2  # cost is computed from compute area only.\n  PE_BW_AREA = 3  # cost is computed from both compute and memory bandwidth.\n\n\n# pylint: disable=invalid-name\nclass DivideConquerGraph:\n  \"\"\"This class creates model graph structure and methods to access layers.\"\"\"\n\n  def __init__(\n      self,\n      model: tf.keras.Model,\n      source_quantizers: base_quantizer.BaseQuantizer = None,\n  ):\n    self._model = model\n    self._source_quantizer_list = source_quantizers or [\n        quantizers.quantized_bits(8, 0, 1)]\n\n    (self._graph, self._source_quantizer_list) = qgraph.CreateGraph(\n        model, source_quantizers, \"quantized_bits(8, 0, 1)\")\n\n    # Propagate output quantizer info into the graph edges.\n    qgraph.GraphPropagateActivationsToEdges(self._graph)\n\n    self._layer_map = generate_layer_data_type_map.generate_layer_data_type_map(\n        self._graph, self._source_quantizer_list, is_inference=False,\n        keras_accumulator=None, for_reference=False)[\"layer_data_type_map\"]\n\n    # Create layer-to-index mapping dict.\n    self._layer_to_idx_dict = {}\n    for idx in self._graph._node.keys():\n      self._layer_to_idx_dict[self.idx_to_layer(idx)] = idx\n\n  def idx_to_layer(self, idx: int):\n    # Map layer index to the layer object.\n    return self._graph._node[idx][\"layer\"][0]\n\n  def layer_to_idx(self, layer: tf.keras.layers.Layer):\n    # Map a layer object to index.\n    return self._layer_to_idx_dict.get(layer, None)\n\n  def get_first_node(self):\n    # Get the source node of the graph.\n    return qgraph.SOURCE\n\n  def is_first_node(self, node: Union[int, tf.keras.layers.Layer]):\n    # Find whether a given node is the first node of the graph.\n    # Node could be either index value or layer object.\n    idx = node if isinstance(node, int) else self.layer_to_idx(node)\n    return idx == qgraph.SOURCE\n\n  def get_last_node(self):\n    # Find the last node of the graph.\n    return qgraph.SINK\n\n  def is_last_node(self, node: Union[int, tf.keras.layers.Layer]):\n    # Find whether a given node is the last node of the graph.\n    # Node could be either index value or layer object.\n    idx = node if isinstance(node, int) else self.layer_to_idx(node)\n    return idx == qgraph.SINK\n\n  def get_prev_nodes(self, node: Union[int, tf.keras.layers.Layer]):\n    # Find the predecessor nodes in the graph of the given node.\n    # Node could be either index value or layer object.\n    idx = node if isinstance(node, int) else self.layer_to_idx(node)\n    return list(self._graph.predecessors(idx))\n\n  def get_next_nodes(self, node: Union[int, tf.keras.layers.Layer]):\n    # Find the successor nodes in the graph of the given node.\n    # node could be either index value or layer object.\n    idx = node if isinstance(node, int) else self.layer_to_idx(node)\n    return list(self._graph.successors(idx))\n\n  def get_layer_quantizer_bitwidth(\n      self, node: Union[int, tf.keras.layers.Layer]):\n    \"\"\"Find various quantizer bitwidth of the current layer.\"\"\"\n    layer = self.idx_to_layer(node) if isinstance(node, int) else node\n\n    if layer:\n      layer_item = self._layer_map[layer]\n      weight_quantizer = qtools_util.get_val(layer_item, \"weight_quantizer\")\n      mac_quantizer = qtools_util.get_val(layer_item, \"multiplier\")\n      acc_quantizer = qtools_util.get_val(layer_item, \"accumulator\")\n      input_quantizer_list = qtools_util.get_val(\n          layer_item, \"input_quantizer_list\")\n      output_quantizer = qtools_util.get_val(layer_item, \"output_quantizer\")\n\n      return  {\n          # TODO(lishanok@): Handle multiple input quantizers\n          # in non-sequential models.\n          \"input_bits\": input_quantizer_list[0].bits,\n          # When the current layer has no concept of weight, there won't\n          # be any weight quantizer.\n          \"weight_bits\": weight_quantizer.bits if weight_quantizer else 0,\n          # If mac bits don't exist, that means we don't have x * w type of\n          # operations. In this case, pass input_bits through.\n          \"mac_bits\": (\n              mac_quantizer.output.bits if mac_quantizer else\n              input_quantizer_list[0].bits),\n          \"acc_bits\": (\n              acc_quantizer.output.bits if acc_quantizer else\n              input_quantizer_list[0].bits),\n          \"output_bits\": output_quantizer.bits}\n    else:\n      # For the \"dummy\" head and tail nodes in the graph that we inserted at\n      # the begining and ending of the model graph, we run this branch.\n      return {\n          \"input_bits\": 0,\n          \"weight_bits\": 0,\n          \"mac_bits\": 0,\n          \"acc_bits\": 0,\n          \"output_bits\": 0\n      }\n\n  def get_layer_mac_count(self, node: Union[int, tf.keras.layers.Layer]):\n    \"\"\"Find the number of multiplier ops in the current layer.\"\"\"\n    layer = self.idx_to_layer(node) if isinstance(node, int) else node\n\n    return (\n        qtools_util.get_val(self._layer_map[layer], \"operation_count\", 0)\n        if layer else 0)\n\n  def get_layer_shapes(self, node: Union[int, tf.keras.layers.Layer]):\n    layer = self.idx_to_layer(node) if isinstance(node, int) else node\n\n    # Multiple inputs with merge layers.\n    input_shape_list = layer.input_shape if layer else 0\n    if not isinstance(input_shape_list, list):\n      input_shape_list = [input_shape_list]\n\n    return {\n        \"weight_shape\": (\n            qtools_util.get_val(self._layer_map[layer], \"w_shapes\", 0)\n            if layer else 0),\n        \"output_shape\": (\n            qtools_util.get_val(self._layer_map[layer], \"output_shapes\", 0)\n            if layer else 0),\n        \"input_shape_list\": (input_shape_list)}\n\n\nclass Choice:\n  \"\"\"This class stores a combination of HW design param values.\"\"\"\n\n  def __init__(self, l: float = 0, k: float = 0, cin_unroll: int = 0,\n               cout_unroll: int = 0, kh_unroll: int = 0, kw_unroll: int = 0):\n    \"\"\"Intializer for a combination of hardware design parameters.\n\n    Args:\n      l: Ratio between OutElementPerClk and ComputeOutElementPerClk\n      k: Ratio between InElementPerClk and ComputeInElementPerClk\n      cin_unroll: Unroll factors for input channel\n      cout_unroll: Unroll factors for output channel\n      kh_unroll: Unroll factors for kernel height\n      kw_unroll: Unroll factors for kernel width\n    \"\"\"\n\n    self.k = k\n    self.l = l\n    self.cin_unroll = cin_unroll\n    self.cout_unroll = cout_unroll\n    self.kh_unroll = kh_unroll\n    self.kw_unroll = kw_unroll\n\n  def __str__(self):\n    return (f\"Choice(k={self.k}, l={self.l}, cin_unroll={self.cin_unroll}, \"\n            f\"cout_unroll={self.cout_unroll} kh_unroll={self.kh_unroll}, \"\n            f\"kw_unroll={self.kw_unroll})\")\n\n\ndef get_valid_unrolls(layer: tf.keras.layers.Layer, cout_unroll: int,\n                      target_pe_throughput: float):\n  \"\"\"Get valid unroll values where resulting throughput>=Target throughput.\"\"\"\n\n  input_channel = qtools_util.get_layer_info(layer, \"input_channel\")\n  output_channel = qtools_util.get_layer_info(layer, \"output_channel\")\n  kernel_height = qtools_util.get_layer_info(layer, \"kernel_height\")\n  kernel_width = qtools_util.get_layer_info(layer, \"kernel_width\")\n  layer_type = qtools_util.get_layer_info(layer, \"layer_type\")\n\n  if layer_type in [\"QDepthwiseConv2D\", \"QAveragePooling2D\", \"MaxPooling2D\",\n                    \"QGlobalAveragePooling2D\", \"GlobalMaxPooling2D\"]:\n    # Since ops are done in each channel without cross-channel ops,\n    # cin_unroll == cout_unroll in hardware.\n    cin_unroll_list = [cout_unroll]\n  else:\n    # Cin_unroll needs to be a divisor of layer.input_channel\n    cin_unroll_list = qtools_util.find_divisors(input_channel)\n\n  # kw_unroll needs to be a divisor of layer.kernel_width\n  kw_unroll_list = qtools_util.find_divisors(kernel_width)\n  # kh_unroll needs to be a divisor of layer.kernel_height\n  kh_unroll_list = qtools_util.find_divisors(kernel_height)\n\n  valid_unrolls = []\n  for cin_unroll in cin_unroll_list:\n    for kw_unroll in kw_unroll_list:\n      for kh_unroll in kh_unroll_list:\n        logging.debug(\"............cin_unroll: %d kh_unroll: %d kw_unroll: %d\",\n                      cin_unroll, kh_unroll, kw_unroll)\n        # Caculate computation throughput.\n        pe_throughput = get_pe_throughput(\n            layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll,\n            input_channel, output_channel, kernel_height, kernel_width)\n        logging.debug(\"............pe_throughput: %.2f\", pe_throughput)\n        if pe_throughput >= target_pe_throughput:\n          # Save the valid combination of unroll factors to valid_unrolls.\n          valid_unrolls.append((cin_unroll, kh_unroll, kw_unroll))\n\n  return valid_unrolls\n\n\ndef get_per_layer_cost(layer_quantizer_bitwidth, layer_mac_count, layer_shapes,\n                       cin_unroll, cout_unroll, kh_unroll, kw_unroll,\n                       InElementPerClk, OutElementPerClk, mode):\n  \"\"\"Area per layer, including both PE and memory Bandwidth.\"\"\"\n\n  # TODO(lishanok@): needs to add modes that support data-driven cost modeling.\n  assert mode == CostMode.ACE, \"Only CostMode.ACE is supported for now.\"\n\n  # Compute memory is calculated according to ACE metric, translated to gates.\n  mac_gates = dnc_layer_cost_ace.get_ace_mac_gates(\n      xbit=layer_quantizer_bitwidth[\"input_bits\"],\n      wbit=layer_quantizer_bitwidth[\"weight_bits\"],\n      abit=layer_quantizer_bitwidth[\"acc_bits\"],\n      regen_params=False)\n\n  # pe_area is not dependent on total num of MACs in the layer.\n  pe_area = (mac_gates * cin_unroll * cout_unroll * kh_unroll * kw_unroll)\n\n  # Memory includes input, output and weight memory, translated to gates.\n  # TODO(lishanok@): weights could be stored in either SRAM or ROM, dependent\n  # on user specification.\n  memory_area = (\n      InElementPerClk * layer_quantizer_bitwidth[\"input_bits\"] *\n      dnc_layer_cost_ace.MemoryGatesPerBit[\"Register\"] +\n      OutElementPerClk * layer_quantizer_bitwidth[\"output_bits\"] *\n      dnc_layer_cost_ace.MemoryGatesPerBit[\"Register\"] +\n      np.prod(layer_shapes[\"weight_shape\"]) *\n      layer_quantizer_bitwidth[\"weight_bits\"] *\n      dnc_layer_cost_ace.MemoryGatesPerBit[\"ROM\"])\n\n  return (pe_area + memory_area)\n\n\ndef get_valid_candidates(input_value, output_to_input_ratio_max):\n  candidate_list = qtools_util.find_divisors(input_value)\n  # Add the other scenario where ComputeElementPerClk is multiple\n  # of ElementPerClk.\n  if output_to_input_ratio_max >= 2:\n    candidate_list += [input_value * x for x in list(\n        range(2, output_to_input_ratio_max+1))]\n\n  return candidate_list\n\n\ndef get_InBufferThru(InElementPerClk, input_channel):\n  return InElementPerClk / input_channel\n\n\ndef get_OutBufferThru(OutElementPerClk, output_channel, kernel_height,\n                      kernel_width, layer_type):\n  if layer_type in [\"UpSampling2D\"]:\n    return OutElementPerClk / (\n        output_channel * kernel_height * kernel_width)\n  else:\n    return OutElementPerClk / output_channel\n\n\ndef is_bufferThru_greater_than_targetThru(\n    layer_type: str, InElementPerClk: int, OutElementPerClk: int,\n    input_channel: int, output_channel: int, kernel_height: int,\n    kernel_width: int, target_out_throughput: float,\n    target_in_throughput: float):\n  \"\"\"Verify whether the resulting buffer throughput > target throughput.\"\"\"\n\n  # Calculate throughput of input buffer.\n  InBuf_throughput = get_InBufferThru(InElementPerClk, input_channel)\n  # Calculate throughput of output buffer.\n  OutBuf_throughput = get_OutBufferThru(\n      layer_type=layer_type,\n      OutElementPerClk=OutElementPerClk, output_channel=output_channel,\n      kernel_height=kernel_height, kernel_width=kernel_width)\n\n  logging.debug(\n      \"...............InBuf_throughput: %.2f OutBuf_throughput: %.2f\",\n      InBuf_throughput, OutBuf_throughput)\n\n  # Valid unroll values must meet buffer throughput requirements.\n  return (InBuf_throughput >= target_out_throughput and\n          OutBuf_throughput >= target_in_throughput)\n\n\ndef set_best_global_cost_in_paths(\n    OutElementPerClk_list, paths, layer_idx, cur_layer_idx,\n    layer_quantizer_bitwidth, layer_mac_count, layer_shapes, mode):\n  \"\"\"Find the best global cost of the entire model and update the paths dict.\n\n  Args:\n    OutElementPerClk_list: list of OutElementPerClk for the current layer.\n    paths: Dict that contains the choices that each layer has.\n    layer_idx: Int. The index value of the current layer's predecessor.\n    cur_layer_idx: current layer's index value.\n    layer_quantizer_bitwidth: Dict that contains layer-related quantizer\n      bitwidth, including acc_bits, mac_bits, input_bits and output_bits.\n    layer_mac_count: Int. Use the number of multiplication as the operation\n      count. To include the number of accumulations, we should multiply the\n      value by 2, assuming accumulation count ~= multiplication count.\n    layer_shapes: Dict with keys: weight_shape, input_shape_list and\n      output_shape.\n    mode: CostMode. The mode to calculate per layer cost.\n\n  Returns:\n    None.\n  \"\"\"\n\n  def calculate_cost(OutElementPerClk):\n    cur_layer_cost = get_per_layer_cost(\n        layer_quantizer_bitwidth, layer_mac_count, layer_shapes, 0, 0, 0, 0, 0,\n        OutElementPerClk, mode)\n    accumulative_cost = cur_layer_cost + paths[layer_idx][\n        OutElementPerClk][\"acc_cost\"]\n    return (cur_layer_cost, accumulative_cost, OutElementPerClk)\n\n  cost_and_values = list(map(calculate_cost, OutElementPerClk_list))\n\n  layer_cost, min_accumulative_cost, best_OutElementPerClk = (\n      min(cost_and_values, key=lambda x: x[1]))\n\n  # For the initial node, we find the best path which contains a sentinel\n  # choice, cost with that path, and the chosen OutElementPerClk\n  # that will point to the corresponding choice of the following layer.\n  paths[cur_layer_idx] = {\n      best_OutElementPerClk: {\n          \"choice\": Choice().__str__(),\n          \"cur_cost\": layer_cost,\n          \"acc_cost\": min_accumulative_cost,\n          \"OutElementPerClk\": best_OutElementPerClk\n      }}\n\n\ndef backtrack(graph, paths):\n  \"\"\"Backtracking of the best path from the first layer to the last.\"\"\"\n  best_path = {}\n  # Get the second node from the graph as the first node is a sentinel node.\n  layer_idx = graph.get_first_node()\n\n  logging.debug(\"=======================\")\n  logging.debug(\"Trimmed Paths:\")\n  logging.debug(\"paths: %s\", paths)\n  logging.debug(\"=======================\")\n\n  # Find the best choice of the first layer.\n  # TODO(lishanok@): extend code to non-sequential model where there are\n  # multiple input layers\n  best_OutElementPerClk = list(paths[layer_idx].keys())[0]\n  best_entry = paths[layer_idx][best_OutElementPerClk]\n  # Add layer name to improve readability.\n  layer = graph.idx_to_layer(layer_idx)\n  best_entry[\"layer_name\"] = layer.name if layer else \"None\"\n  best_path[layer_idx] = best_entry\n  best_OutElementPerClk = best_entry[\"OutElementPerClk\"]\n  best_accumlative_cost = best_entry[\"acc_cost\"]\n\n  layer_idx = graph.get_next_nodes(layer_idx)[0]\n  # Given the best choice of 1st layer, find the best choice for all following\n  # layers by backtracking.\n  while not graph.is_last_node(layer_idx):\n    # Find current layer's best choice from the ptr (ie. best_OutElementPerClk)\n    # stored in the best choice of the previous layer.\n    best_entry = paths[layer_idx][best_OutElementPerClk]\n    layer = graph.idx_to_layer(layer_idx)\n    best_entry[\"layer_name\"] = layer.name if layer else \"None\"\n    best_path[layer_idx] = best_entry\n    # Update the ptr to the next layer.\n    best_OutElementPerClk = best_entry[\"OutElementPerClk\"]\n\n    # get the next node from the graph\n    # TODO(lishanok@): extend the code to non-sequential model where there are\n    # multiple next layers.\n    layer_idx = graph.get_next_nodes(layer_idx)[0]\n\n  # best_path stores the best hw param combination and cost for each layer.\n  return best_path, best_accumlative_cost\n\n\ndef update_cur_best_choices(\n    cur_best_choices: List[Any], OutElementPerClk: int,\n    prev_OutElementPerClk: int, cur_layer_cost: float,\n    accumulative_cost: float, choice: Choice):\n  \"\"\"Update the cur_best_choices dict.\n\n  At each layer, different choices of unroll factors will generate a\n  prev_OutElementPerClk value. Some of the choices might generate the same\n  prev_OutElementPerClk. So for each pre_OutElementPerClk, we only store\n  the best choice which has the min cost.\n  \"\"\"\n\n  entry = cur_best_choices.get(prev_OutElementPerClk, None)\n  existing_accumulative_cost = entry[\"acc_cost\"] if entry else np.inf\n  logging.debug(\"...............cost of cur_best_choices [%d]: %.2f\",\n                prev_OutElementPerClk, existing_accumulative_cost)\n  if accumulative_cost < existing_accumulative_cost:\n    # Stores the best choice and its cost for the given\n    # prev_OutElementPerClk. We also store the ptr to next layer's\n    # OutElementPerClk for future backtracking purpose.\n    cur_best_choices[prev_OutElementPerClk] = {\n        \"choice\": choice.__str__(),\n        \"cur_cost\": cur_layer_cost,\n        \"acc_cost\": accumulative_cost,\n        \"OutElementPerClk\": OutElementPerClk}\n    logging.debug(\n        \"...............Find better cost! Update cur_best_choices[%d]: %s\",\n        prev_OutElementPerClk, cur_best_choices[prev_OutElementPerClk])\n\n\ndef get_ComputeInElementPerClk(layer_type, cin_unroll,\n                               cout_unroll, kh_unroll, kw_unroll):\n  if layer_type in [\"QConv2D\", \"QDense\"]:\n    return cin_unroll * kh_unroll * kw_unroll\n  elif layer_type in [\"QDepthwiseConv2D\", \"QAveragePooling2D\", \"MaxPooling2D\"]:\n    return cout_unroll * kh_unroll * kw_unroll\n  elif layer_type in [\"QGlobalAveragePooling2D\", \"GlobalMaxPooling2D\",\n                      \"UpSampling2D\"]:\n    return cout_unroll\n  elif layer_type in [\"Concatenate\"]:\n    return cin_unroll\n\n\ndef get_InElementPerClk_base(ComputInElementPerClk, kh_unroll, kw_unroll):\n  return int(ComputInElementPerClk / (kh_unroll * kw_unroll))\n\n\ndef get_pe_throughput(layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll,\n                      input_channel, output_channel, kernel_height,\n                      kernel_width):\n  \"\"\"Calculate compute throughput for the given unroll factors.\"\"\"\n  if layer_type in [\"QConv2D\", \"QDense\"]:\n    return 1.0 * cin_unroll * cout_unroll * kh_unroll * kw_unroll / (\n        input_channel * output_channel * kernel_height * kernel_width)\n  elif layer_type in [\"QDepthwiseConv2D\", \"QAveragePooling2D\", \"MaxPooling2D\",\n                      \"UpSampling2D\"]:\n    return 1.0 * cout_unroll * kh_unroll * kw_unroll / (\n        output_channel * kernel_height * kernel_width)\n  elif layer_type in [\"QGlobalAveragePooling2D\", \"GlobalMaxPooling2D\",\n                      \"Concatenate\"]:\n    return 1.0 * cout_unroll / output_channel\n  else:\n    raise ValueError(f\"Unspported layer type: {layer_type}\")\n\n\ndef get_target_throughputs(layer, target_out_throughput):\n  \"\"\"Update throughput for a given layer.\"\"\"\n\n  # For layer that do not change the number of inference pixels,\n  # throughput remains the same. For layers that decrease or increase the\n  # number of inference pixels, the target throughput needs to update\n  # accordingly.\n\n  def multiply_elements_except_none(my_tuple):\n    # Convert None values to np.nan and then use np.nanprod to calculate\n    # the product\n    return np.nanprod([x if x is not None else np.nan for x in my_tuple])\n\n  if layer:\n    input_size = multiply_elements_except_none(layer.input_shape[:-1])\n    output_size = multiply_elements_except_none(layer.output_shape[:-1])\n    target_in_throughput = target_out_throughput * input_size / output_size\n  else:\n    target_in_throughput = target_out_throughput\n\n  # Per new design, target_pe_throughput equals to target_out_throughput.\n  target_pe_throughput = target_out_throughput\n  return target_in_throughput, target_pe_throughput\n\n\ndef calc_hw_params(graph, target_OutElementPerClk, target_out_throughput,\n                   input_quantizer_bits,\n                   compute_to_memory_max_ratio=4,\n                   memory_to_unroll_max_ratio=4,\n                   mode=CostMode.ACE):\n  \"\"\"Calculate HW params that minimizes total cost.\n\n  Args:\n    graph: DivideConquerGraph Object. Model graph.\n    target_OutElementPerClk: Int. Target number of elements per clock\n      cycle that the hardware needs to output.\n    target_out_throughput: Float. Target number of inferences per clock\n      cycle that the hardware needs to make.\n    input_quantizer_bits: Int. Model's input quantizer bits.\n    compute_to_memory_max_ratio: Int. Max allowed ratio between\n      ComputeOutElement and OutElement\n    memory_to_unroll_max_ratio: Int. Max allowed ratio between\n      InElementPerClk and CinUnroll\n    mode: CostMode. The mode to calculate per layer cost. Default is ACE.\n\n  Returns:\n    best_path: Dict. Stores the best hw param value at each layer and their\n      irrespective cost.\n    best_cost: Float. The best global cost of the entire model.\n  \"\"\"\n\n  # Paths stores the best choices for every layer.\n  # For the layer_idx, for each OutElementPerClk, we can calculate the best hw\n  # param choice. We store all these best choices, each choice will\n  # correspond to one OutElementPerClk key. Path therefore has the format:\n  # {layer: {OutElementPerClk: (choice, cost, downstream_OutElementPerClk)}}\n  paths = {}\n\n  # We start the computation from the last node.\n  layer_idx = graph.get_last_node()\n\n  # Store the hw choices for the last node (a dummy node) for the sake\n  # of completion.\n  paths[layer_idx] = {\n      target_OutElementPerClk: {\n          \"choice\": Choice().__str__(),\n          \"cur_cost\": 0,\n          \"acc_cost\": 0,\n          \"OutElementPerClk\": -1}}\n\n  logging.debug(\"====== Extracting HW params combinations per layer =====\")\n\n  # The following code calculates cost backward, from last layer to the first.\n  while  graph.get_prev_nodes(layer_idx):\n    # Find precessor of the layer.\n    # TODO(lishanok@): extend this code to multiple prev layers.\n    cur_layer_idx = graph.get_prev_nodes(layer_idx)[0]\n    cur_layer = graph.idx_to_layer(cur_layer_idx)\n    logging.debug(\"processing layer_idx:%d name:%s type:%s ***\",\n                  cur_layer_idx, getattr(cur_layer, \"name\", None),\n                  cur_layer.__class__.__name__)\n\n    target_in_throughput, target_pe_throughput = get_target_throughputs(\n        cur_layer, target_out_throughput)\n\n    # Previous layer will generate a list of candidates for OutElementPerClk\n    # values for the current layer.\n    OutElementPerClk_list = list(paths[layer_idx].keys())\n    logging.debug(\"OutElementPerClk_list:%s\", OutElementPerClk_list)\n\n    layer_quantizer_bitwidth = graph.get_layer_quantizer_bitwidth(cur_layer)\n    layer_mac_count = graph.get_layer_mac_count(cur_layer)\n    layer_shapes = graph.get_layer_shapes(cur_layer)\n\n    # TODO(lishanok@): need to extend to multiple input layers, i.e., more\n    # than 1 layer will reach graph's first node. We should only exit if all\n    # input layers are processed.\n    if graph.is_first_node(cur_layer_idx):\n      # Computation reaches the 1st node of the graph. We can now find the best\n      # path of all OutElementPerClk choices at the first layer.\n      set_best_global_cost_in_paths(\n          OutElementPerClk_list, paths, layer_idx, cur_layer_idx,\n          layer_quantizer_bitwidth, layer_mac_count, layer_shapes, mode)\n      break\n\n    # Get layer-related information\n    input_channel = qtools_util.get_layer_info(cur_layer, \"input_channel\")\n    output_channel = qtools_util.get_layer_info(cur_layer, \"output_channel\")\n    kernel_height = qtools_util.get_layer_info(cur_layer, \"kernel_height\")\n    kernel_width = qtools_util.get_layer_info(cur_layer, \"kernel_width\")\n    layer_type = qtools_util.get_layer_info(cur_layer, \"layer_type\")\n    output_channel_divisors = qtools_util.find_divisors(output_channel)\n\n    logging.debug(\"input_channel: %d, output_channel: %d, kernel_height: %d, \"\n                  \"kernel_width: %d, weight_quantizer_bits: %d\",\n                  input_channel, output_channel, kernel_height, kernel_width,\n                  layer_quantizer_bitwidth[\"weight_bits\"])\n\n    cur_best_choices = {}\n    for OutElementPerClk in OutElementPerClk_list:\n      logging.debug(\"...OutElementPerClk: %d\", OutElementPerClk)\n\n      # Pass through OutElementPerClk and cost for non-essential layers.\n      if layer_type in [\"QBatchNormalization\", \"QActivation\", \"Dropout\",\n                        \"Reshape\", \"Activation\", \"ZeroPadding2D\"]:\n        logging.debug(\"...... Passing through layer_type: %s with 0 cost\",\n                      layer_type)\n\n        # Update the best choices dict with only 1 key-value pair. By\n        # considering current light-computation layer in the graph\n        # as a pass-through node, we set layer cost=0, and set the predecessor\n        # node's OutElementPerClk the same as current node's OutElementPerClk.\n        update_cur_best_choices(\n            cur_best_choices, OutElementPerClk=OutElementPerClk,\n            prev_OutElementPerClk=OutElementPerClk, cur_layer_cost=0,\n            accumulative_cost=paths[layer_idx][OutElementPerClk][\"acc_cost\"],\n            choice=Choice())\n\n        # Exit current iteration since there is no design param to explore\n        # for these layer types.\n        continue\n\n      # For each of the possible OutElementPerClk values provided by the next\n      # layer, we derive possible HW params choices of the current layer.\n      for ComputeOutElementPerClk in get_valid_candidates(\n          OutElementPerClk, compute_to_memory_max_ratio):\n        logging.debug(\"......ComputeOutElementPerClk: %d\",\n                      ComputeOutElementPerClk)\n\n        l = OutElementPerClk / ComputeOutElementPerClk\n        cout_unroll = ComputeOutElementPerClk\n\n        # cout_unroll needs to be a divisor of output_channels\n        if cout_unroll not in output_channel_divisors:\n          continue\n\n        logging.debug(\n            \".........OutElementPerClk / ComputeOutElementPerClk = %.2f,\"\n            \"cout_unroll=%.2f\", l, cout_unroll)\n        # Find valid unroll values that meet pe throughput requirement.\n        valid_unrolls = get_valid_unrolls(cur_layer, cout_unroll,\n                                          target_pe_throughput)\n        if not valid_unrolls:\n          # Skip if no valid unroll values are found.\n          logging.debug(\".........No valid unroll values found!\")\n          continue\n\n        for (cin_unroll, kh_unroll, kw_unroll) in valid_unrolls:\n          # Check throughput requirement of each combination of unroll values.\n          logging.debug(\".........cin_unroll: %d, kh_unroll: %d, kw_unroll: %d\",\n                        cin_unroll, kh_unroll, kw_unroll)\n          ComputInElementPerClk = get_ComputeInElementPerClk(\n              layer_type, cin_unroll=cin_unroll, cout_unroll=cout_unroll,\n              kh_unroll=kh_unroll, kw_unroll=kw_unroll)\n\n          # InElementPerClk = k*ComputeInElementPerClk/(kh_unroll * kw_unroll)\n          # TODO(lishanok@): Confirm if it works for Concatenate layer.\n          InElementPerClk_base = get_InElementPerClk_base(\n              ComputInElementPerClk=ComputInElementPerClk, kh_unroll=kh_unroll,\n              kw_unroll=kw_unroll)\n          for InElementPerClk in get_valid_candidates(\n              InElementPerClk_base, memory_to_unroll_max_ratio):\n            # With given cin_unroll, check throughput requirement of each\n            # possible candidate of InElementPerClk.\n            logging.debug(\"............InElementPerClk: %d\", InElementPerClk)\n            k = cin_unroll / InElementPerClk\n            # prev_OutElementPerClk is the predecessor node's OutElementPerClk\n            prev_OutElementPerClk = InElementPerClk\n\n            if is_bufferThru_greater_than_targetThru(\n                layer_type=layer_type, InElementPerClk=InElementPerClk,\n                OutElementPerClk=OutElementPerClk, input_channel=input_channel,\n                output_channel=output_channel, kernel_height=kernel_height,\n                kernel_width=kernel_width,\n                target_out_throughput=target_out_throughput,\n                target_in_throughput=target_in_throughput):\n              # If valid unroll values meet buffer throughput requirements,\n              # comput cost.\n              # cost = current layer's cost + total of downstream layers' cost.\n              # Since we derive cost iteratively starting from the last layer,\n              # paths already store the total cost of the downstream layers.\n              cur_layer_cost = get_per_layer_cost(\n                  layer_quantizer_bitwidth, layer_mac_count, layer_shapes,\n                  cin_unroll, cout_unroll, kh_unroll, kw_unroll,\n                  InElementPerClk, OutElementPerClk, mode)\n              accumulative_cost = (\n                  cur_layer_cost + paths[layer_idx][OutElementPerClk][\n                      \"acc_cost\"])\n\n              logging.debug(\"...............Buf throughput is good! \"\n                            \"Accumulative_cost: %.2f\", accumulative_cost)\n\n              # Each choice is a hw param combination.\n              choice = Choice(l, k, cin_unroll, cout_unroll, kh_unroll,\n                              kw_unroll)\n\n              update_cur_best_choices(cur_best_choices, OutElementPerClk,\n                                      prev_OutElementPerClk, cur_layer_cost,\n                                      accumulative_cost, choice)\n\n    if not cur_best_choices:\n      logging.error(\"Cannot find any valid HW choice for layer %s! Exit!\",\n                    cur_layer.name)\n      return {}, None\n\n    logging.debug(\"=======================\")\n\n    # Store the best choices of hw params for the current layer. Proceed to\n    # the previous layer.\n    paths[cur_layer_idx] = cur_best_choices\n    layer_idx = cur_layer_idx\n    # Predicessor node's OutBuf throughput is sucessor node's InBuf throughput.\n    target_out_throughput = target_in_throughput\n\n  return backtrack(graph, paths)\n\n\ndef estimate_model_cost(\n    model: tf.keras.Model,\n    input_quantizer_bits: int = 8,\n    target_OutElementPerClk: int = 10,\n    target_out_throughput: float = 1.0,\n    compute_to_memory_max_ratio: int = 4,\n    memory_to_unroll_max_ratio: int = 4,\n    mode: CostMode = CostMode.ACE):\n  \"\"\"Main function to divide and conquer cost modeling.\n\n  Args:\n    model: QKeras model.\n    input_quantizer_bits: Model's input quantizer bits.\n    target_OutElementPerClk: Target number of elements per clock\n      cycle that the hardware needs to output.\n    target_out_throughput: Target number of inferences per clock\n      cycle that the hardware needs to make.\n    compute_to_memory_max_ratio: Max allowed ratio between\n      ComputeOutElement and OutElement\n    memory_to_unroll_max_ratio: Max allowed ratio between\n      InElementPerClk and CinUnroll\n    mode: The mode to calculate per layer cost.\n\n  Returns:\n    best_path: Dict. Stores the best hw param value at each layer and their\n      irrespective cost.\n    best_cost: Float. The best global cost of the entire model.\n  \"\"\"\n\n  logging.info(\"Estimating model design params and cost...\")\n  # Generate graph\n  graph = DivideConquerGraph(model)\n  # Call the main function to generate optimal HW configs for all layers\n  best_path, best_cost = calc_hw_params(\n      graph=graph, target_OutElementPerClk=target_OutElementPerClk,\n      target_out_throughput=target_out_throughput,\n      input_quantizer_bits=input_quantizer_bits,\n      compute_to_memory_max_ratio=(\n          compute_to_memory_max_ratio),\n      memory_to_unroll_max_ratio=(\n          memory_to_unroll_max_ratio),\n      mode=mode\n  )\n\n  logging.info(\"best_design_params: %s\", best_path)\n\n  return (best_path, best_cost)\n"
  },
  {
    "path": "qkeras/qtools/DnC/dnc_layer_cost_ace.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"divide_and_conquer per layer cost modeling using ACE and data fitting.\n\nFor a given layer with its hardware design params, predict its cost\nin actual ASIC implementation using ACE metric and actual MAC gates data points.\n\"\"\"\n\nimport io\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy.optimize import curve_fit\n\n\n# Rule-of-thumb mapping between bits and gates in memory area estimate.\nMemoryGatesPerBit = {\n    'Register': 10.0,\n    'SRAM': 1.0,\n    'ROM': 0.1,\n}\n\n\n# Previously calculated 3D polynomial coefficients with relative MAE<5%.\nMAC_POLY3D_PARAMS = np.array([7.70469119, 13.76199652, -92.15756665])\n\n\n# MAC area data points generated from go/mac_vs_area.\nMAC24 = pd.read_csv(io.StringIO('''\n283,280,286,313,325,336,356,,\n274,290,325,372,401,428,485,,\n285,325,388,510,568,614,713,,\n308,372,509,750,865,1002,1167,,\n336,427,617,1003,1151,1309,,,\n356,480,722,1165,,,,,\n'''), header=None)\n\nMAC32 = pd.read_csv(io.StringIO('''\n391,365,377,410,453,433,458,507,\n364,382,418,466,497,521,578,685,\n378,418,485,594,659,721,832,1035,\n408,466,596,843,1029,1151,1321,1642,\n432,521,724,1153,1363,1512,1797,,\n457,578,830,1330,1551,1782,2273,,\n'''), header=None)\n\nMAC40 = pd.read_csv(io.StringIO('''\n458,457,470,500,522,527,551,605,664\n457,475,513,561,597,616,670,782,888\n470,513,579,699,766,816,928,1150,1358\n499,561,699,996,1161,1273,1499,1850,2189\n527,612,818,1275,1545,1691,2054,2516,\n549,670,927,1496,1798,2035,2490,3294,\n'''), header=None)\n\nMAC48 = pd.read_csv(io.StringIO('''\n595,550,566,594,659,624,642,694,745\n551,566,607,654,727,707,763,881,984\n566,607,679,794,871,921,1017,1270,1489\n594,655,793,1097,1285,1401,1668,2101,2378\n624,711,921,1397,1816,1950,2277,2763,3301\n642,762,1015,1669,1974,2264,2718,3631,4415\n'''), header=None)\n\n\ndef mac_gates_polynomial_3d(xyz, a, b, c):\n  \"\"\"Using a 3d polynomial function to model MAC area.\n\n  This function models the MAC area to be the sum of multipler, accumulator\n  and a constant shift. Particularly, multiplier area is modeled to be linear\n  # to input_bits * weight_bits, per ACE rule.\n\n  Args:\n    xyz: tuple includes input, weight and accumulator bits.\n    a: polynomial coefficient 0.\n    b: polynomial coefficient 1.\n    c: polynomial coefficient 2.\n\n  Returns:\n    MAC area predicted by the function.\n  \"\"\"\n  x, y, z = xyz\n  return a * x * y + b * z + c\n\n\ndef gen_mac_gate_model(do_plot=False):\n  \"\"\"Generate the polynomial cost model coefficients using given data.\n\n  Args:\n    do_plot: Bool indicates whether plot the raw data and the fitted curve.\n\n  Returns:\n    params: The esitimated params of the polynomical function.\n    mae_predict: Calculate the mean absolute error of the predictions.\n    parameter_std_deviation: one standard deviation errors on the parameters,\n      indicating the uncertainties of the params.\n  \"\"\"\n  # acc bits, 1st index\n  abit = np.array([24, 32, 40, 48])\n  abit = np.repeat(abit, 54)\n\n  # weight bits, 2nd index\n  wbit = np.array([1, 2, 4, 8, 12, 16])\n  wbit = np.tile(np.repeat(wbit, 9), 4)\n\n  # input bits, 3rd index\n  xbit = np.array([1, 2, 4, 8, 10, 12, 16, 24, 32])\n  xbit = np.tile(xbit, 24)\n\n  # Record all mac area data points associated with each accumulator bitwidth\n  mac_arrs = []\n  # Record the start and end index of the mac area data points\n  # associated with each accumulator bitwidth\n  mac_arrs_index = {}\n  # Record index of all valid data points\n  valid_index = []\n  start_pos = 0\n\n  for (mac_acc, acc_bits) in zip(\n      [MAC24, MAC32, MAC40, MAC48], [24, 32, 40, 48]):\n    cur_mac = mac_acc.to_numpy().reshape(-1)\n    # Filter out nan data points\n    cur_valid_index = ~np.isnan(cur_mac)\n    cur_valid_mac = cur_mac[cur_valid_index]\n    # Record the data length for each accumulator bits\n    end_pos = start_pos + len(cur_valid_mac)\n    mac_arrs_index[acc_bits] = (start_pos, end_pos)\n    # Append mac areas of each accumulator bits to a list\n    mac_arrs += list(cur_valid_mac)\n    start_pos = end_pos\n    valid_index += list(cur_valid_index)\n\n  # Filter out invalid data\n  xbit = xbit[valid_index]\n  wbit = wbit[valid_index]\n  abit = abit[valid_index]\n\n  # curve fitting for all data points\n  params, covariance = curve_fit(\n      mac_gates_polynomial_3d, (xbit, wbit, abit), mac_arrs)\n\n  # Compute one standard deviation errors on the parameters.\n  parameter_std_deviation = np.sqrt(np.diag(covariance))\n\n  # Calculate the mean absolute error between prediction and given data.\n  mac_predict = mac_gates_polynomial_3d((xbit, wbit, abit), *params)\n  mae = np.mean(np.abs(mac_predict - mac_arrs))\n  mae_predict = mae / np.mean(mac_arrs)\n\n  if do_plot:\n    # Plot all raw data points\n    fig = plt.figure(figsize=(10, 10))\n    ax = fig.add_subplot(111, projection='3d')\n\n    ax.scatter(xbit, wbit, mac_arrs, label='Data')\n\n    ax.set_xlabel('X_bits')\n    ax.set_ylabel('W_bits')\n    ax.set_zlabel('MAC')\n\n    plt.title('MAC area data points')\n    plt.show()\n\n    # Generate a mesh grid for plotting.\n    x_fit = np.linspace(min(xbit), max(xbit), 50)\n    w_fit = np.linspace(min(wbit), max(wbit), 50)\n    xmesh, wmesh = np.meshgrid(x_fit, w_fit)\n\n    fig = plt.figure(figsize=(16, 16))\n    index = 1\n\n    # Plotting 3D fitting curve for each accumulator bitwidth\n    for acc_bits in [24, 32, 40, 48]:\n      ax = fig.add_subplot(2, 2, index, projection='3d')\n\n      start_pos = mac_arrs_index[acc_bits][0]\n      end_pos = mac_arrs_index[acc_bits][1]\n      ax.scatter(xbit[start_pos:end_pos], wbit[start_pos:end_pos],\n                 mac_arrs[start_pos:end_pos], label='Data')\n\n      amesh = np.full(shape=(50, 50), fill_value=acc_bits)\n      poly_fit = mac_gates_polynomial_3d((xmesh, wmesh, amesh), *params)\n\n      ax.plot_surface(\n          xmesh, wmesh, poly_fit, cmap='viridis', alpha=0.8,\n          label=f'Fitted Surface | acc_bits={acc_bits}')\n\n      ax.set_xlabel('X')\n      ax.set_ylabel('W')\n      ax.set_zlabel('MAC')\n      ax.set_title(f'accumulator bitwidth: {acc_bits}')\n      index += 1\n\n    plt.show()\n\n  return params, mae_predict, parameter_std_deviation\n\n\ndef get_ace_mac_gates(xbit, wbit, abit, regen_params=False):\n  \"\"\"Function to estimate MAC area, including 1 multipler and 1 accumulator.\n\n  Args:\n    xbit: int. input bits.\n    wbit: int. weight bits.\n    abit: int. accumulator bits.\n    regen_params: Bool. If True, regenerate the MAC cost model coefficients.\n      If False, reuse the previously generated model coefficients.\n\n  Returns:\n    Estimated MAC gates.\n  \"\"\"\n  if regen_params:\n    mac_params, _, _ = gen_mac_gate_model(do_plot=True)\n  else:\n    mac_params = MAC_POLY3D_PARAMS\n\n  return mac_gates_polynomial_3d((xbit, wbit, abit), *mac_params)\n"
  },
  {
    "path": "qkeras/qtools/__init__.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Export qtools package.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom .run_qtools import QTools\nfrom .settings import cfg as qtools_cfg\n"
  },
  {
    "path": "qkeras/qtools/config_public.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"configuration file for external usage.\"\"\"\n\nconfig_settings = {\n    \"default_source_quantizer\": \"quantized_bits(8, 0, 1)\",\n    \"default_interm_quantizer\": \"quantized_bits(8, 0, 1)\",\n\n    \"horowitz\": {\n        \"fpm_add\": [0.003125, 0],\n        \"fpm_mul\": [0.002994791667, 0.001041666667, 0],\n        \"fp16_add\": [0.4],\n        \"fp16_mul\": [1.1],\n        \"fp32_add\": [0.9],\n        \"fp32_mul\": [3.7],\n        \"sram_rd\": [9.02427321e-04, -2.68847858e-02, 2.08900804e-01, 0.0],\n        \"dram_rd\": [20.3125, 0]\n    },\n\n    \"include_energy\": {\n        \"QActivation\": [\"outputs\"],\n        \"QAdaptiveActivation\": [\"outputs\"],\n        \"Activation\": [\"outputs\"],\n        \"QBatchNormalization\": [\"parameters\"],\n        \"BatchNormalization\": [\"parameters\"],\n        \"Add\": [\"op_cost\"],\n        \"Subtract\": [\"op_cost\"],\n        \"MaxPooling2D\": [\"op_cost\"],\n        \"default\": [\"inputs\", \"parameters\", \"op_cost\"]\n    }\n}\n"
  },
  {
    "path": "qkeras/qtools/examples/example_generate_json.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Example code to generate weight and MAC sizes in a json file.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow.keras as keras\n\nfrom qkeras import QActivation\nfrom qkeras import QDense\nfrom qkeras import quantizers\nfrom qkeras.qtools import run_qtools\n\n\ndef hybrid_model():\n  \"\"\"hybrid model that mixes qkeras and keras layers.\"\"\"\n\n  x = x_in = keras.layers.Input((784,), name=\"input\")\n  x = keras.layers.Dense(300, name=\"d0\")(x)\n  x = keras.layers.Activation(\"relu\", name=\"d0_act\")(x)\n  x = QDense(100, kernel_quantizer=quantizers.quantized_po2(4),\n             bias_quantizer=quantizers.quantized_po2(4),\n             name=\"d1\")(x)\n  x = QActivation(\"quantized_relu(4,0)\", name=\"d1_qr4\")(x)\n  x = QDense(\n      10, kernel_quantizer=quantizers.quantized_po2(4),\n      bias_quantizer=quantizers.quantized_po2(4),\n      name=\"d2\")(x)\n  x = keras.layers.Activation(\"softmax\", name=\"softmax\")(x)\n\n  return keras.Model(inputs=[x_in], outputs=[x])\n\n\ndef generate_json(in_model):\n  \"\"\"example to generate data type map for a given model.\n\n  Args:\n    in_model: qkeras model object\n\n  Usage:\n    input_quantizer_list:\n      A list of input quantizers for the model. It could be in the form of:\n        1. a list of quantizers, each quantizer for each one of the model inputs\n        2. one single quantizer, which will be used for all of the model inputs\n        3. None. Default input quantizer defined in config_xxx.py will be used\n        for all of the model inputs\n\n    for_reference: get energy for a reference model/trial model\n      1. True: get baseline energy for a given model. Use keras_quantizer/keras_\n        accumulator (or default_interm_quantizer in config_xxx.py if keras_\n        quantizer/keras_accumulator not given) to quantizer all layers in a\n        model in order to calculate its energy. It servers the purpose of\n        setting up a baseline energy for a given model architecture.\n      2. False: get \"real\" energy for a given model use user-specified\n        quantizers. For layers that are not quantized (keras layer) or have no\n        user-specified quantizers (qkeras layers without quantizers specified),\n        keras_quantizer and keras_accumulator(or default_interm_quantizer in\n        config_xxx.py if keras_quantizer/keras_accumulator not given)\n        will be used as their quantizers.\n\n     process: technology process to use in configuration (horowitz, ...)\n\n     weights_path: absolute path to the model weights\n\n     is_inference: whether model has been trained already, which is needed to\n         compute tighter bounds for QBatchNormalization Power estimation\n\n     Other parameters (defined in config_xxx.py):\n       1. \"default_source_quantizer\" is used as default input quantizer\n          if user do not specify any input quantizers,\n       2. \"default_interm_quantizer\": is used as default quantizer for any\n          intermediate variables such as multiplier, accumulator, weight/bias\n          in a qkeras layer if user do not secifiy the corresponding variable\n       3. process_name: energy calculation parameters for different processes.\n          \"horowitz\" is the process we use by default.\n       4. \"include_energy\": what energy to include at each layer\n          when calculation the total energy of the entire model.\n          \"parameters\": memory access energy for loading model parameters.\n          \"inputs\": memory access energy to reading inputs\n          \"outputs\": memory access energy for writing outputs\n          \"op_cost\": operation energy for multiplication and accumulation\n  \"\"\"\n\n  input_quantizer_list = [quantizers.quantized_bits(8, 0, 1)]\n  reference_internal = \"int8\"\n  reference_accumulator = \"int32\"\n\n  # generate QTools object which contains model data type map in json format\n  q = run_qtools.QTools(\n      in_model,\n      # energy calculation using a given process\n      process=\"horowitz\",\n      # quantizers for model inputs\n      source_quantizers=input_quantizer_list,\n      # training or inference with a pre-trained model\n      is_inference=False,\n      # path to pre-trained model weights\n      weights_path=None,\n      # keras_quantizer to quantize weight/bias in non-quantized keras layers\n      keras_quantizer=reference_internal,\n      # keras_accumulator to quantize MAC in un-quantized keras layers\n      keras_accumulator=reference_accumulator,\n      # calculating baseline energy or not\n      for_reference=False)\n\n  # print data type map\n  q.qtools_stats_print()\n\n  # dump the layer data map to a json file\n  # json_name = \"output.json\"\n  # q.qtools_stats_to_json(json_name)\n\n\nif __name__ == \"__main__\":\n  model = hybrid_model()\n  model.summary()\n\n  generate_json(model)\n"
  },
  {
    "path": "qkeras/qtools/examples/example_get_energy.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Example code to generate weight and MAC sizes in a json file.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow.keras as keras\n\nfrom qkeras import QActivation\nfrom qkeras import QDense\nfrom qkeras import quantizers\nfrom qkeras.qtools import run_qtools\nfrom qkeras.qtools import settings as qtools_settings\n\n\ndef hybrid_model():\n  \"\"\"hybrid model that mixes qkeras and keras layers.\"\"\"\n\n  x = x_in = keras.layers.Input((784,), name=\"input\")\n  x = keras.layers.Dense(300, name=\"d0\")(x)\n  x = keras.layers.Activation(\"relu\", name=\"d0_act\")(x)\n  x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n             bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n             name=\"d1\")(x)\n  x = QActivation(\"quantized_relu(4,0)\", name=\"d1_qr4\")(x)\n  x = QDense(\n      10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n      name=\"d2\")(x)\n  x = keras.layers.Activation(\"softmax\", name=\"softmax\")(x)\n\n  return keras.Model(inputs=[x_in], outputs=[x])\n\n\nif __name__ == \"__main__\":\n  # input parameters:\n  # process: technology process to use in configuration (horowitz, ...)\n  # weights_on_memory: whether to store parameters in dram, sram, or fixed\n  # activations_on_memory: store activations in dram or sram\n  # rd_wr_on_io: whether load data from dram to sram (consider sram as a cache\n  #   for dram. If false, we will assume data will be already in SRAM\n  # source_quantizers: quantizers for model input\n  # is_inference: whether model has been trained already, which is\n  #   needed to compute tighter bounds for QBatchNormalization Power estimation.\n  # reference_internal: size to use for weight/bias/activation in\n  #   get_reference energy calculation (int8, fp16, fp32)\n  # reference_accumulator: accumulator and multiplier type in get_reference\n  #   energy calculation\n  model = hybrid_model()\n  model.summary()\n\n  reference_internal = \"int8\"\n  reference_accumulator = \"int32\"\n\n  # By setting for_reference=True, we create QTools object which uses\n  # keras_quantizer to quantize weights/bias and\n  # keras_accumulator to quantize MAC variables for all layers. Obviously, this\n  # overwrites any quantizers that user specified in the qkeras layers. The\n  # purpose of doing so is to enable user to calculate a baseline energy number\n  # for a given model architecture and compare it against quantized models.\n  q = run_qtools.QTools(\n      model,\n      # energy calculation using a given process\n      process=\"horowitz\",\n      # quantizers for model input\n      source_quantizers=[quantizers.quantized_bits(8, 0, 1)],\n      is_inference=False,\n      # absolute path (including filename) of the model weights\n      weights_path=None,\n      # keras_quantizer to quantize weight/bias in un-quantized keras layers\n      keras_quantizer=reference_internal,\n      # keras_quantizer to quantize MAC in un-quantized keras layers\n      keras_accumulator=reference_accumulator,\n      # whether calculate baseline energy\n      for_reference=True)\n\n  # caculate energy of the derived data type map.\n  ref_energy_dict = q.pe(\n      # whether to store parameters in dram, sram, or fixed\n      weights_on_memory=\"sram\",\n      # store activations in dram or sram\n      activations_on_memory=\"sram\",\n      # minimum sram size in number of bits\n      min_sram_size=8*16*1024*1024,\n      # whether load data from dram to sram (consider sram as a cache\n      # for dram. If false, we will assume data will be already in SRAM\n      rd_wr_on_io=False)\n\n  # get stats of energy distribution in each layer\n  reference_energy_profile = q.extract_energy_profile(\n      qtools_settings.cfg.include_energy, ref_energy_dict)\n  # extract sum of energy of each layer according to the rule specified in\n  # qtools_settings.cfg.include_energy\n  total_reference_energy = q.extract_energy_sum(\n      qtools_settings.cfg.include_energy, ref_energy_dict)\n  print(\"Baseline energy profile:\", reference_energy_profile)\n  print(\"Total baseline energy:\", total_reference_energy)\n\n  # By setting for_reference=False, we quantize the model using quantizers\n  # specified by users in qkeras layers. For hybrid models where there are\n  # mixture of unquantized keras layers and quantized qkeras layers, we use\n  # keras_quantizer to quantize weights/bias and keras_accumulator to quantize\n  # MAC variables for all keras layers.\n  q = run_qtools.QTools(\n      model, process=\"horowitz\",\n      source_quantizers=[quantizers.quantized_bits(8, 0, 1)],\n      is_inference=False, weights_path=None,\n      keras_quantizer=reference_internal,\n      keras_accumulator=reference_accumulator,\n      for_reference=False)\n  trial_energy_dict = q.pe(\n      weights_on_memory=\"sram\",\n      activations_on_memory=\"sram\",\n      min_sram_size=8*16*1024*1024,\n      rd_wr_on_io=False)\n  trial_energy_profile = q.extract_energy_profile(\n      qtools_settings.cfg.include_energy, trial_energy_dict)\n  total_trial_energy = q.extract_energy_sum(\n      qtools_settings.cfg.include_energy, trial_energy_dict)\n  print(\"energy profile:\", trial_energy_profile)\n  print(\"Total energy:\", total_trial_energy)\n"
  },
  {
    "path": "qkeras/qtools/generate_layer_data_type_map.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Generates MAC, input and output datatype for a qkeras model.\"\"\"\nimport collections\nimport copy\nimport numpy as np\nimport sys\n\nimport networkx as nx\nfrom qkeras.qtools import qgraph\nfrom qkeras.qtools import qtools_util\nfrom qkeras.qtools import quantized_operators\nfrom qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module\nfrom qkeras.qtools.settings import cfg\nfrom qkeras.qtools.quantized_operators import adder_factory\nfrom qkeras.qtools.quantized_operators.fused_bn_factory import FusedBNFactory\n\nclass TagMissingError(ValueError):\n  pass\n\n\nLayerDataType = collections.namedtuple(\n    \"LayerDataType\",\n    [\n        \"input_quantizer_list\",\n        \"multiplier\",\n        \"accumulator\",\n\n        \"weight_quantizer\",\n        \"w_shapes\",\n\n        \"bias_quantizer\",\n        \"b_shapes\",\n\n        \"output_quantizer\",\n        \"output_shapes\",\n\n        \"operation_count\",\n    ],\n)\n\nQKERAS_LAYERS = [\n    \"QDense\",\n    \"QConv1D\",\n    \"QConv2D\",\n    \"QDepthwiseConv2D\",\n    \"QConv2D\",\n    \"QConv2DTranspose\",\n]\n\nKERAS_LAYERS = [\n    \"Dense\",\n    \"Conv1D\",\n    \"Conv2D\",\n    \"DepthwiseConv2D\",\n    \"Conv2DTranspose\",\n]\n\n\ndef get_bn_quantizers(layer, quantizer_factory, cfg, keras_quantizer,\n                      input_quantizer, is_inference, for_reference,\n                      model_weights_already_quantized):\n  \"\"\"Extract quantizers from a given batchnorm layer.\"\"\"\n\n  # QKeras layers might be mixed with keras layers.\n  if for_reference or not hasattr(layer, \"get_quantizers\"):\n    # Keras BatchNorm layer mixed with quantized model\n    # -> no reference mode\n    gamma_quantizer = quantizer_factory.make_default_quantizer(\n        mode=cfg.default_interm_quantizer)\n    beta_quantizer = quantizer_factory.make_default_quantizer(\n        mode=cfg.default_interm_quantizer)\n    mean_quantizer = quantizer_factory.make_default_quantizer(\n        mode=cfg.default_interm_quantizer)\n    variance_quantizer = quantizer_factory.make_default_quantizer(\n        mode=cfg.default_interm_quantizer)\n    inverse_quantizer = quantizer_factory.make_default_quantizer(\n        mode=cfg.default_interm_quantizer)\n\n    if keras_quantizer:\n      gamma_quantizer = quantizer_factory.make_default_quantizer(\n          mode=keras_quantizer)\n      beta_quantizer = quantizer_factory.make_default_quantizer(\n          mode=keras_quantizer)\n      mean_quantizer = quantizer_factory.make_default_quantizer(\n          mode=keras_quantizer)\n      variance_quantizer = quantizer_factory.make_default_quantizer(\n          mode=keras_quantizer)\n      inverse_quantizer = quantizer_factory.make_default_quantizer(\n          mode=keras_quantizer)\n  else:\n    (qkeras_gamma_quantizer, qkeras_beta_quantizer,\n     qkeras_mean_quantizer, qkeras_variance_quantizer,\n     qkeras_inverse_quantizer) = layer.get_quantizers()\n\n    if not qkeras_beta_quantizer:\n      beta_quantizer = quantizer_factory.clone_quantizer(input_quantizer)\n    else:\n      beta_quantizer = quantizer_factory.make_quantizer(\n          qkeras_beta_quantizer)\n\n    if not qkeras_mean_quantizer:\n      mean_quantizer = quantizer_factory.clone_quantizer(input_quantizer)\n    else:\n      mean_quantizer = quantizer_factory.make_quantizer(\n          qkeras_mean_quantizer)\n\n    if not qkeras_variance_quantizer:\n      variance_quantizer = quantizer_factory.make_default_quantizer(\n          mode=cfg.default_interm_quantizer)\n    else:\n      # If variance is float, convert to input_quantizer.\n      variance_quantizer = quantizer_factory.make_quantizer(\n          qkeras_variance_quantizer)\n\n    if not qkeras_gamma_quantizer:\n      gamma_quantizer = quantizer_factory.make_default_quantizer(\n          mode=cfg.default_interm_quantizer)\n    else:\n      gamma_quantizer = quantizer_factory.make_quantizer(\n          qkeras_gamma_quantizer)\n\n    if not qkeras_inverse_quantizer:\n      inverse_quantizer = quantizer_factory.make_default_quantizer(\n          mode=cfg.default_interm_quantizer)\n    else:\n      inverse_quantizer = quantizer_factory.make_quantizer(\n          qkeras_inverse_quantizer)\n\n  # During inference, gamma, beta and variance are constants\n  # if they are po2 quantizers, we need to modify their bits\n  # with actual values and also update graph with the\n  # corresponding output_quantizer on the edge.\n  if is_inference:\n    weights = qtools_util.get_weights(\n        layer, model_weights_already_quantized)\n    # If no scale(gamma), num_weights --\n    # If no center(beta_quantizer) num_weights --\n    num_weights = 4\n    if not layer.scale:\n      num_weights -= 1\n    if not layer.center:\n      num_weights -= 1\n\n    if (layer.scale and gamma_quantizer is not None and gamma_quantizer.is_po2):\n      gamma_quantizer.update_inference_values(weights[0])\n    if (variance_quantizer is not None and variance_quantizer.is_po2):\n      variance_quantizer.update_inference_values(\n          weights[num_weights-1])\n\n  return (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer,\n          inverse_quantizer)\n\n\ndef update_output_quantizer_in_graph(graph, node_id, quantizer_factory,\n                                     new_quantizer, for_reference):\n  \"\"\"update the edge with output quantizer type.\"\"\"\n\n  node = graph.nodes[node_id]\n  qkeras_output_quantizer = node[\"out_quantizer\"]\n\n  # If existing graph doesn't have a valid output quantizer\n  # update graph with the new quantizer\n  if (for_reference or not qkeras_output_quantizer or\n      not quantizer_factory.is_quantizer_supported(qkeras_output_quantizer)):\n    qkeras_output_quantizer = new_quantizer\n    qgraph.GraphUpdateEdge(graph, node_id, qkeras_output_quantizer)\n\n  # If activation specified, convert activation quantizer to qtools quantizer\n  # If activation not secified, convert the new quantizer to qtools quantizer\n  output_quantizer = quantizer_factory.make_quantizer(qkeras_output_quantizer)\n\n  # Output_quantizer is used for updating dictionary in json\n  return output_quantizer\n\n\ndef generate_layer_data_type_map(\n    graph, source_quantizer_list, is_inference,\n    keras_quantizer=None, keras_accumulator=None,\n    for_reference=False, debug=False,\n    model_weights_already_quantized=True,\n    hw_weight_dict=None):\n  \"\"\"main funciton to generate datatype for each layer.\n\n  For each type of layer, this function calculates the sizes and minimum\n  number of bits required to represent the parameters and variables (e.g.,\n  weights, bias, multiplier and accumulator - MAC, etc.) embedded in\n  these layers.\n\n  Args:\n    graph: input graph that traverses the model\n    source_quantizer_list: a list of quantizers for model inputs\n    is_inference: whether model is pre-trained with weights available\n    keras_quantizer: default quantizer used to quantize weights and bias\n    keras_accumulator: default MAC quantizer to quantize multiplier,\n      accumulator and output\n    for_reference: whether to generate a map for a baseline model\n    debug: whether to print debug messages\n    model_weights_already_quantized: bool. If model weights are already\n      quantized, no need to apply quantizer to weights here in this function.\n    hw_weight_dict: weight dictonary for hardware inference. For example, fused\n      bn op inference in hardware will need additional fused weights, which\n      can be extracted from this dictionary. This dictionary is the output from\n      utils.py/model_save_quantized_weights function.\n\n  Returns:\n    a result containing the following fields:\n    source_quantizer_list similar as input\n    output_layers: names of the layers that are output layers\n    input_layers: names of the layers that are input_layers,\n    layer_data_type_map: data type map of each layer\n  \"\"\"\n\n  quantizer_factory = quantizer_factory_module.QuantizerFactory()\n  layer_data_type_map = collections.OrderedDict()\n\n  # get the output layers\n\n  output_layers = []\n  input_layers = []\n  predecessors = list(graph.predecessors(qgraph.SINK))\n  successors = list(graph.successors(qgraph.SOURCE))\n\n  for u in predecessors:\n    if u == qgraph.SOURCE or u == qgraph.SINK:\n      continue\n    output_layers.append(graph.nodes[u][\"layer\"][0])\n\n  for u in successors:\n    if u == qgraph.SOURCE or u == qgraph.SINK:\n      continue\n    input_layers.append(graph.nodes[u][\"layer\"][0])\n\n  for node_id in nx.topological_sort(graph):\n    node = graph.nodes[node_id]\n    node_type = node[\"type\"][-1]\n    layer = node[\"layer\"][0]\n    is_input_layer = layer in input_layers\n\n    w_shapes = None\n    b_shapes = None\n    output_shapes = None\n    qkeras_weight_quantizer = None\n\n    if hasattr(layer, \"output_shape\"):\n      output_shapes = layer.output_shape\n\n    if hasattr(layer, \"get_weights\"):\n      weights = layer.get_weights()\n      if len(weights) != 0:\n        w_shapes = layer.get_weights()[0].shape\n        b_shapes = weights[0].shape[-1]\n\n    if debug:\n      print(\"########\")\n      if layer is not None:\n        print(layer.name)\n      else:\n        print(\"None\")\n\n    # Deals with keras layer or lack of input quantizer in qkeras layer.\n    input_qe_list = qtools_util.get_input_quantizers_advanced(\n        graph, node_id, is_input_layer, quantizer_factory, cfg)\n\n    if input_qe_list and node_id != qgraph.SINK:\n      input_quantizer_list = []\n      for node in input_qe_list:\n        input_quantizer_list.append(node[0])\n\n      # Calculates number of operations (multiplication/accumulation).\n      # Previously Merge layers's inputs all have the same shape, however, in\n      # MobilenetV3 we found that there is shape broadcast in the keras\n      # Multiply layer. Therefore we use the shape with max size as the\n      # input shape\n      if len(input_qe_list) > 0:\n        maxsize = -1\n        max_id = 0\n        for (idx, item) in enumerate(input_qe_list):\n          shape = item[1][\"shape\"]\n          size = np.prod(shape[1:])\n          if size > maxsize:\n            maxsize = size\n            max_id = idx\n        input_shape = input_qe_list[max_id][1][\"shape\"]\n      else:\n        (_, edge_0) = input_qe_list[0]\n        input_shape = edge_0[\"shape\"]\n\n      operation_count = qtools_util.get_operation_count(\n          layer, input_shape)\n\n    # Merges layers with multiple inputs.\n    if qtools_util.is_merge_layers(layer):\n\n      # merge_factory.make_quantizer automatically calculates the merge output\n      # quantizer bitwidth according to input quantizer type.\n      merge_factory = quantized_operators.MergeFactory()\n      merge_quantizer = merge_factory.make_quantizer(\n          input_qe_list, layer.__class__.__name__)\n\n      if for_reference:\n        # The for_reference option overwrites the auto-calculated merge output\n        # quantizer\n        if keras_accumulator:\n          # gate_factor and gate_bits remain the same as previously\n          # calculated; only change output quantizer as the keras_accumulator\n          merge_quantizer.output = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n        else:\n          merge_quantizer.output = quantizer_factory.make_default_quantizer(\n              mode=cfg.default_interm_quantizer)\n\n      output_quantizer = update_output_quantizer_in_graph(\n          graph, node_id, quantizer_factory, merge_quantizer.output,\n          for_reference)\n\n      layer_data_type_map[layer] = LayerDataType(\n          input_quantizer_list,\n          merge_quantizer,\n          None,\n\n          None,\n          None,\n\n          None,\n          None,\n\n          output_quantizer,\n          output_shapes,\n\n          operation_count\n      )\n\n    # MaxPooling/reshape/flatten/UpSampling1D/2D/3D\n    elif (qtools_util.is_shape_alternation_layers(layer) or\n          \"UpSampling\" in layer.__class__.__name__):\n      input_quantizer = input_quantizer_list[0]\n\n      # Output quantizer\n      output_quantizer = update_output_quantizer_in_graph(\n          graph, node_id, quantizer_factory, input_quantizer, for_reference)\n\n      layer_data_type_map[layer] = LayerDataType(\n          input_quantizer_list,\n          None,\n          None,\n\n          None,\n          None,\n\n          None,\n          None,\n\n          output_quantizer,\n          output_shapes,\n\n          operation_count\n      )\n\n    # AveragePooling and GlobalAveragePooling\n    elif layer.__class__.__name__ in [\n        \"AveragePooling2D\", \"AvgPool2D\", \"GlobalAvgPool2D\",\n        \"GlobalAveragePooling2D\", \"QAveragePooling2D\",\n        \"QGlobalAveragePooling2D\"]:\n      (input_quantizer, _) = input_qe_list[0]\n      qtools_average_quantizer = None\n      # This is a hack. We don't want to implement a new accumulator class\n      # just for averagpooling. So we re-use accumulator type in conv/dense\n      # layers which need multiplier and kernel as input parameters.\n      # In order to do so, we fake a multiplier which treat the pool_size as\n      # the kernel. since kernel needs 4 dimension, k_h, k_w, C_in, C_out,\n      # we set the last two dimension as [1, 1]\n      if layer.__class__.__name__ in [\"AveragePooling2D\", \"AvgPool2D\",\n                                      \"QAveragePooling2D\"]:\n        pool_size = tuple(list(layer.pool_size) + [1, 1])\n      else:\n        pool_size = tuple(list(input_shape)[1:-1] + [1, 1])\n\n      # Automatically calculates the accumulator bitwidth according to input\n      # quantizer type for both quantized pooling and regular pooling layers\n      multiplier_factory = quantized_operators.MultiplierFactory()\n      fake_multiplier = multiplier_factory.make_multiplier(\n          input_quantizer, input_quantizer)\n      fake_multiplier.output = input_quantizer\n      accumulator_factory = quantized_operators.AccumulatorFactory()\n      accumulator = accumulator_factory.make_accumulator(\n          pool_size, fake_multiplier, use_bias=False)\n\n      # For quantized pooling layers, we also need to consider the division\n      # precision, which is controlled by the average quantizer\n      if layer.__class__.__name__ in [\"QAveragePooling2D\",\n                                      \"QGlobalAveragePooling2D\"]:\n        # For the quantized layer, there is an average_quantizer used for\n        # the inverse of division operation.\n        qkeras_average_quantizer = layer.get_quantizers()[0]\n        qtools_average_quantizer = quantizer_factory.make_quantizer(\n            qkeras_average_quantizer)\n        multiplier = multiplier_factory.make_multiplier(\n            accumulator.output, qtools_average_quantizer)\n      else:\n        multiplier = None\n      if debug:\n        print(\"accumulator:\", accumulator.output.bits)\n\n      # Re-calcualte accumulator/multiplier type when it's using\n      # for_reference option\n      if for_reference:\n        if keras_accumulator:\n          # If keras_accumulator exists, use keras_accumulator as multiplier\n          # or accumulator type\n          if multiplier:\n            # Quantized layers need to define multiplier type\n            multiplier.output = quantizer_factory.make_default_quantizer(\n                mode=keras_accumulator)\n          accumulator.output = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n        else:\n          # If user didn't provide keras_accumulator, use the default settings\n          # in cfg to define multiplier/accumulator type\n          if multiplier:\n            multiplier.output = quantizer_factory.make_default_quantizer(\n                mode=cfg.default_interm_quantizer)\n          accumulator.output = quantizer_factory.make_default_quantizer(\n              mode=cfg.default_interm_quantizer)\n        layer_quantizer = accumulator.output\n\n      # set the output quantizer\n      if layer.__class__.__name__ in [\"QAveragePooling2D\",\n                                      \"QGlobalAveragePooling2D\"]:\n        # If is quantized layer, last operation is multiply (averaging).\n        layer_quantizer = multiplier.output\n      else:\n        layer_quantizer = accumulator.output\n      output_quantizer = update_output_quantizer_in_graph(\n          graph, node_id, quantizer_factory, layer_quantizer, for_reference)\n\n      layer_data_type_map[layer] = {\n          \"input_quantizer_list\": input_quantizer_list,\n          \"average_quantizer\": qtools_average_quantizer,\n          \"pool_sum_accumulator\": accumulator,\n          \"pool_avg_multiplier\": multiplier,\n          \"output_quantizer\": output_quantizer,\n          \"output_shapes\": output_shapes,\n          \"operation_count\": operation_count\n      }\n\n    # If it's a Quantized Activation layer.\n    elif node_type in [\"QActivation\", \"QAdaptiveActivation\", \"Activation\"]:\n\n      if for_reference or not hasattr(layer, \"quantizer\"):\n        # Keras activation layer -> use default_interm_quantizer\n        layer_quantizer = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n\n        if keras_accumulator:\n          layer_quantizer = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n      else:\n        layer_quantizer = layer.quantizer\n\n        if not quantizer_factory.is_quantizer_supported(layer_quantizer):\n          raise TagMissingError(\n              \"Unsupported activation quantizer {} on this layer: {}\".format(\n                  layer_quantizer, layer))\n\n        if not layer_quantizer:\n          layer_quantizer = quantizer_factory.make_default_quantizer(\n              mode=cfg.default_interm_quantizer)\n\n      output_quantizer = update_output_quantizer_in_graph(\n          graph, node_id, quantizer_factory, layer_quantizer, for_reference)\n\n      layer_data_type_map[layer] = LayerDataType(\n          input_quantizer_list,\n          None,\n          None,\n          None,\n          w_shapes,\n          None,\n          b_shapes,\n          output_quantizer,\n          output_shapes,\n          operation_count\n      )\n\n    elif node_type in [\"QBatchNormalization\", \"BatchNormalization\"]:\n      # If this batchnorm layer needs to be fused with the previous layer,\n      # we pass the input quantizer type as the output type in qraph.\n\n      (input_quantizer, _) = input_qe_list[0]\n\n      if  (hw_weight_dict is not None and\n           hw_weight_dict[layer.name][\"enable_bn_fusing\"]):\n        if for_reference and keras_accumulator and not is_input_layer:\n          input_quantizer = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n        output_quantizer = update_output_quantizer_in_graph(\n            graph, node_id, quantizer_factory, input_quantizer, for_reference)\n        layer_data_type_map[layer] = {\n            \"input_quantizer_list\": input_quantizer_list,\n            \"output_quantizer\": output_quantizer,\n            \"output_shapes\": input_shape,\n            \"operation_count\": operation_count\n        }\n      else:\n        (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer,\n         _) = get_bn_quantizers(layer, quantizer_factory, cfg, keras_quantizer,\n                                input_quantizer, is_inference, for_reference,\n                                model_weights_already_quantized)\n\n        qbn = quantized_operators.QBNFactory()\n        qbn.make_quantizer(\n            input_quantizer, gamma_quantizer, beta_quantizer,\n            mean_quantizer, variance_quantizer, layer.scale, layer.center\n        )\n\n        def set_output(op, output):\n          if op:\n            op.output = output\n\n        if for_reference or not hasattr(layer, \"get_quantizers\"):\n          set_output(\n              qbn.internal_divide_quantizer,\n              quantizer_factory.make_default_quantizer(\n                  mode=cfg.default_interm_quantizer))\n\n          set_output(\n              qbn.internal_multiplier,\n              quantizer_factory.make_default_quantizer(\n                  mode=cfg.default_interm_quantizer))\n\n          set_output(\n              qbn.internal_accumulator,\n              quantizer_factory.make_default_quantizer(\n                  mode=cfg.default_interm_quantizer))\n\n          set_output(\n              qbn.internal_output,\n              quantizer_factory.make_default_quantizer(\n                  mode=cfg.default_interm_quantizer))\n\n          if keras_accumulator:\n            set_output(\n                qbn.internal_divide_quantizer,\n                quantizer_factory.make_default_quantizer(\n                    mode=keras_accumulator))\n\n            set_output(\n                qbn.internal_multiplier,\n                quantizer_factory.make_default_quantizer(\n                    mode=keras_accumulator))\n\n            set_output(\n                qbn.internal_accumulator,\n                quantizer_factory.make_default_quantizer(\n                    mode=keras_accumulator))\n\n            set_output(\n                qbn.internal_output.output,\n                quantizer_factory.make_default_quantizer(\n                    mode=keras_accumulator))\n\n        gamma_range = None\n        if hasattr(layer, \"gamma_range\"):\n          gamma_range = layer.gamma_range\n\n        beta_range = None\n        if hasattr(layer, \"beta_range\"):\n          beta_range = layer.beta_range\n\n        if not layer.center:\n          qbn.beta_quantizer = None\n\n        if not layer.scale:\n          qbn.gamma_quantizer = None\n\n        layer_quantizer = qbn.internal_output.output\n        output_quantizer = update_output_quantizer_in_graph(\n            graph, node_id, quantizer_factory, layer_quantizer, for_reference)\n        layer_data_type_map[layer] = {\n            \"input_quantizer_list\": input_quantizer_list,\n            \"gamma_quantizer\": gamma_quantizer,\n            \"beta_quantizer\": beta_quantizer,\n            \"mean_quantizer\": mean_quantizer,\n            \"variance_quantizer\": variance_quantizer,\n            \"gamma_range\": gamma_range,\n            \"beta_range\": beta_range,\n            \"internal_divide_quantizer\": qbn.internal_divide_quantizer,\n            \"internal_multiplier\": qbn.internal_multiplier,\n            \"internal_accumulator\": qbn.internal_accumulator,\n            \"output_quantizer\": output_quantizer,\n            \"output_shapes\": input_shape,\n            \"operation_count\": operation_count\n        }\n    # If qdense, qconv, qpool, qoctave\n    elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:\n      (input_quantizer, _) = input_qe_list[0]\n\n      if for_reference or not hasattr(layer, \"get_quantizers\"):\n        # for_reference: force all quantizers to keras_quantizer\n        weight_quantizer = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n        bias_quantizer = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n\n        if keras_quantizer:\n          weight_quantizer = quantizer_factory.make_default_quantizer(\n              mode=keras_quantizer)\n          bias_quantizer = quantizer_factory.make_default_quantizer(\n              mode=keras_quantizer)\n      else:\n        # qkeras layer\n        qkeras_weight_quantizer = layer.get_quantizers()[0]\n        qkeras_bias_quantizer = layer.get_quantizers()[1]\n\n        if not quantizer_factory.is_quantizer_supported(\n            qkeras_weight_quantizer):\n          raise TagMissingError(\n              \"Unsupported weight quantizer {} on this layer: {}\".format(\n                  qkeras_weight_quantizer, layer))\n\n        if not quantizer_factory.is_quantizer_supported(\n            qkeras_bias_quantizer):\n          raise TagMissingError(\n              \"Unsupported bias quantizer {} on this layer: {}\".format(\n                  qkeras_bias_quantizer, layer))\n\n        weight_quantizer = quantizer_factory.make_quantizer(\n            qkeras_weight_quantizer)\n        bias_quantizer = quantizer_factory.make_quantizer(\n            qkeras_bias_quantizer)\n\n      # TODO(lishanok): During inference, if weight and bias is po2,\n      #  need to update corresponding quantizer type with min and max\n      #  of the constant values.\n      if is_inference:\n        weights = qtools_util.get_weights(\n            layer, model_weights_already_quantized)\n        if weight_quantizer.is_po2:\n          weight_quantizer.update_inference_values(weights[0])\n\n        if bias_quantizer.is_po2:\n          bias_quantizer.update_inference_values(weights[1])\n\n      multiplier_factory = quantized_operators.MultiplierFactory()\n      multiplier = multiplier_factory.make_multiplier(\n          weight_quantizer, input_quantizer)\n\n      enable_bn_fusing = (\n          hw_weight_dict is not None and hw_weight_dict.get(layer.name, None)\n          and hw_weight_dict[layer.name].get(\"enable_bn_fusing\", None))\n\n      if enable_bn_fusing and qkeras_weight_quantizer:\n        # When conv layer is fused wiht bn, multiplier bitwidth is ajusted by\n        # kernel quantizer scale values (for auto_po2 type of quantizer only).\n        # For conv layer without fusing, multiplier bitwidth is not adjusted\n        # even if auto_po2 is used in quantizer. Instead, we directly adjusted\n        # the accumulator and store that in fused_accumulator.\n        qtools_util.adjust_multiplier_for_auto_po2(\n            multiplier, qkeras_weight_quantizer)\n\n      weights = layer.get_weights()\n      kernel = weights[0]\n\n      kernel_shape = kernel.shape\n      # depthwise_kernel_shape = kernel_size + (input_dim, depth_multiplier)\n      # When computing accumulator bitwidth for dw conv2d layer, we do not\n      # need to count the last two dimensions\n      if node_type in [\"QDepthwiseConv2D\", \"DepthwiseConv2D\"]:\n        kernel_shape = kernel.shape[:-2] + (1, 1)\n\n      kernel_accumulator_factory = quantized_operators.AccumulatorFactory()\n      # Sets use_bias=False so that the accumulator doesn't account for bias\n      # bitwdith.\n      kernel_accumulator = kernel_accumulator_factory.make_accumulator(\n          kernel_shape, multiplier, use_bias=False)\n\n      if not layer.use_bias:\n        bias_quantizer = None\n        accumulator = kernel_accumulator\n      else:\n        # Add bias quantizer bitwidth to the overall accumulator\n        bias_accumulator_instance = adder_factory.IAdder()\n        accumulator = bias_accumulator_instance.make_quantizer(\n            kernel_accumulator.output, bias_quantizer)\n      if debug:\n        print(layer.name or \"None\")\n        print(\"weight_quantizer:\", weight_quantizer.bits)\n        print(\"input_quantizer:\", input_quantizer.bits)\n        print(\"multiplier_quantizer:\", multiplier.output.bits)\n        print(\"multiplier_gate_bits:\", multiplier.gate_bits)\n        print(\"accumulator:\", accumulator.output.bits)\n\n      if for_reference or not hasattr(layer, \"get_quantizers\"):\n        accumulator.output = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n        multiplier.output = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n\n        if keras_accumulator:\n          accumulator.output = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n          multiplier.output = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n\n      if enable_bn_fusing:\n        bn_layer_name = hw_weight_dict[layer.name][\"fused_bn_layer_name\"]\n        successor_ids = list(graph.successors(node_id))\n        bn_layer = graph.nodes[successor_ids[0]][\"layer\"][0]\n        assert bn_layer.name == bn_layer_name, (\n            \"Batchnorm layer in the graph has different name from hw_weight\"\n            f\"_dict: {layer.name} vs {bn_layer_name}. Check both places to \"\n            \"ensure they are matching.\")\n\n        # Add additional datatype for bn fused weights\n        (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer,\n         inverse_quantizer) = get_bn_quantizers(\n             bn_layer, quantizer_factory, cfg, keras_quantizer, input_quantizer,\n             is_inference, for_reference, model_weights_already_quantized)\n\n        qkeras_inverse_quantizer = bn_layer.inverse_quantizer_internal\n        fused_bn = FusedBNFactory()\n        fused_bn.make_quantizer(\n            prev_output_quantizer=kernel_accumulator.output,\n            prev_bias_quantizer=bias_quantizer,\n            beta_quantizer=beta_quantizer,\n            mean_quantizer=mean_quantizer,\n            inverse_quantizer=inverse_quantizer,\n            use_beta=bn_layer.center,\n            use_bias=layer.use_bias,\n            qkeras_inverse_quantizer=qkeras_inverse_quantizer\n        )\n        if for_reference or not hasattr(layer, \"get_quantizers\"):\n          fused_bn.internal_accumulator.output = (\n              quantizer_factory.make_default_quantizer(\n                  mode=cfg.default_interm_quantizer))\n          if keras_accumulator:\n            fused_bn.internal_accumulator.output = (\n                quantizer_factory.make_default_quantizer(\n                    mode=keras_accumulator))\n          fused_bn.internal_output.output = fused_bn.internal_accumulator.output\n\n        layer_quantizer = fused_bn.internal_accumulator.output\n        output_quantizer = update_output_quantizer_in_graph(\n            graph, node_id, quantizer_factory, layer_quantizer, for_reference)\n        layer_data_type_map[layer] = {\n            \"input_quantizer_list\": input_quantizer_list,\n            \"multiplier\": multiplier,\n            \"accumulator\": accumulator,\n            \"weight_quantizer\": weight_quantizer,\n            \"w_shapes\": w_shapes,\n            \"bias_quantizer\": bias_quantizer,\n            \"b_shapes\": b_shapes,\n            \"bn_inverse_quantizer\": inverse_quantizer,\n            \"bn_mean_quantizer\": mean_quantizer,\n            \"bn_beta_quantizer\": beta_quantizer,\n            \"fused_accumulator\": fused_bn.internal_accumulator,\n            \"output_quantizer\": output_quantizer,\n            \"output_shapes\": output_shapes,\n            \"operation_count\": operation_count\n        }\n      else:\n        # Correct accumulator bitwith with the scale values from\n        # auto-po2 type of quantizers and store them in fused_accumulator.\n        if (\n            hasattr(qkeras_weight_quantizer, \"__str__\") and\n            \"quantized_bits\" in qkeras_weight_quantizer.__str__() and\n            qkeras_weight_quantizer.alpha == \"auto_po2\"):\n          fused_accumulator = qtools_util.adjust_accumulator_for_auto_po2(\n              layer, multiplier, qkeras_weight_quantizer, bias_quantizer)\n        else:\n          fused_accumulator = accumulator\n\n        layer_quantizer = accumulator.output\n        output_quantizer = update_output_quantizer_in_graph(\n            graph, node_id, quantizer_factory, layer_quantizer, for_reference)\n\n        layer_data_type_map[layer] = {\n            \"input_quantizer_list\": input_quantizer_list,\n            \"multiplier\": multiplier,\n            \"accumulator\": accumulator,\n            \"weight_quantizer\": weight_quantizer,\n            \"w_shapes\": w_shapes,\n            \"bias_quantizer\": bias_quantizer,\n            \"b_shapes\": b_shapes,\n            \"fused_accumulator\": fused_accumulator,\n            \"output_quantizer\": output_quantizer,\n            \"output_shapes\": output_shapes,\n            \"operation_count\": operation_count\n        }\n    elif node_type in [\"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n      # Datatype for Folded Conv/DepthwiseConv layer\n      # TODO(lishanok): Add additional support for Folded Dense layer\n      (input_quantizer, _) = input_qe_list[0]\n      if for_reference or not hasattr(layer, \"get_quantizers\"):\n        # For_reference: force all quantizers to keras_quantizer.\n        weight_quantizer = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n        bias_quantizer = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n\n        if keras_quantizer:\n          weight_quantizer = quantizer_factory.make_default_quantizer(\n              mode=keras_quantizer)\n          bias_quantizer = quantizer_factory.make_default_quantizer(\n              mode=keras_quantizer)\n      else:\n        # QKeras layer\n        qkeras_weight_quantizer = layer.get_quantizers()[0]\n        qkeras_bias_quantizer = layer.get_quantizers()[1]\n        if not quantizer_factory.is_quantizer_supported(\n            qkeras_weight_quantizer):\n          raise TagMissingError(\n              \"Unsupported weight quantizer {} on this layer: {}\".format(\n                  qkeras_weight_quantizer, layer))\n\n        if not quantizer_factory.is_quantizer_supported(\n            qkeras_bias_quantizer):\n          raise TagMissingError(\n              \"Unsupported bias quantizer {} on this layer: {}\".format(\n                  qkeras_bias_quantizer, layer))\n\n        weight_quantizer = quantizer_factory.make_quantizer(\n            qkeras_weight_quantizer)\n\n        if qkeras_bias_quantizer:\n          bias_quantizer = quantizer_factory.make_quantizer(\n              qkeras_bias_quantizer)\n        else:\n          bias_quantizer = None\n\n      # TODO(lishanok): During inference, if weight and bias is po2,\n      #  need to update corresponding quantizer type with min and max\n      #  of the constant values\n      if is_inference:\n        weights = qtools_util.get_weights(\n            layer, model_weights_already_quantized)\n        if weight_quantizer.is_po2:\n          weight_quantizer.update_inference_values(weights[0])\n\n        if bias_quantizer and bias_quantizer.is_po2:\n          bias_quantizer.update_inference_values(weights[1])\n\n      multiplier_factory = quantized_operators.MultiplierFactory()\n      multiplier = multiplier_factory.make_multiplier(\n          weight_quantizer, input_quantizer)\n      if qkeras_weight_quantizer:\n        qtools_util.adjust_multiplier_for_auto_po2(\n            multiplier, qkeras_weight_quantizer)\n      weights = layer.get_weights()\n      kernel = weights[0]\n\n      accumulator_factory = quantized_operators.AccumulatorFactory()\n      accumulator = accumulator_factory.make_accumulator(\n          kernel.shape, multiplier, use_bias=True if bias_quantizer else False)\n\n      if not bias_quantizer:\n        # Set bias the same as accumulator type.\n        bias_quantizer = copy.deepcopy(accumulator.output)\n        if not accumulator.output.is_floating_point:\n          # For fixed point accumulator, needs to add 1 to its bits to avoid\n          # possible satuation.\n          accumulator.output.bits += 1\n          accumulator.output.int_bits += 1\n      if for_reference or not hasattr(layer, \"get_quantizers\"):\n        accumulator.output = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n        multiplier.output = quantizer_factory.make_default_quantizer(\n            mode=cfg.default_interm_quantizer)\n\n        if keras_accumulator:\n          accumulator.output = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n          multiplier.output = quantizer_factory.make_default_quantizer(\n              mode=keras_accumulator)\n\n      layer_quantizer = accumulator.output\n      output_quantizer = update_output_quantizer_in_graph(\n          graph, node_id, quantizer_factory, layer_quantizer, for_reference)\n\n      layer_data_type_map[layer] = LayerDataType(\n          input_quantizer_list,\n          multiplier,\n          accumulator,\n          weight_quantizer,\n          w_shapes,\n          bias_quantizer,\n          b_shapes,\n          output_quantizer,\n          output_shapes,\n          operation_count\n      )\n\n    elif node_type:\n      # Any other unsupported layer types -> pass the input quantizer\n      # type to output in qraph\n      print(f\"[WARNING] QTools cannot parse {node_type}. The input quatnizer\"\n            \" of this layer is directly passed through to the output!\",\n            file=sys.stderr)\n\n      (input_quantizer, _) = input_qe_list[0]\n\n      if for_reference and keras_accumulator and not is_input_layer:\n        input_quantizer = quantizer_factory.make_default_quantizer(\n            mode=keras_accumulator)\n\n      output_quantizer = update_output_quantizer_in_graph(\n          graph, node_id, quantizer_factory, input_quantizer, for_reference)\n\n      layer_data_type_map[layer] = LayerDataType(input_quantizer_list, None,\n                                                 None, None, None, None, None,\n                                                 output_quantizer,\n                                                 output_shapes, operation_count)\n\n  result = {\n      \"source_quantizer_list\": source_quantizer_list,\n      \"output_layers\": output_layers,\n      \"input_layers\": input_layers,\n      \"layer_data_type_map\": layer_data_type_map\n  }\n\n  return result\n"
  },
  {
    "path": "qkeras/qtools/interface.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"I/O implementation.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\n\nfrom qkeras.qtools import generate_layer_data_type_map\nfrom qkeras.qtools import qtools_util\n\n\ndef print_qstats(graph):\n  \"\"\"Prints quantization statistics for the model.\"\"\"\n\n  layer_data_type_map = generate_layer_data_type_map(graph)\n\n  multipliers_counter = collections.Counter()\n\n  print(\"\")\n  print(\"Number of operations in model:\")\n  for layer, data_type in layer_data_type_map.items():\n    multiplier = data_type.multiplier\n    multiplier_detail_str = \"{}_{}, total_bits:{}, int_bits:{}\".format(\n        \"signed\" if multiplier.output.is_signed == 1 else \"unsigned\",\n        multiplier.implemented_as(),\n        multiplier.output.bits,\n        multiplier.output.int_bits,\n    )\n\n    print(\"{}: {} x {}\".format(\n        layer.name,\n        data_type.operation_count,\n        multiplier_detail_str,\n    ))\n\n    multipliers_counter[\n        multiplier_detail_str] += data_type.operation_count\n\n  print(\"\")\n  print(\"Number of operation types in model:\")\n  for (multiplier_detail_str,\n       total_multiplier_operation_count) in multipliers_counter.items():\n    print(\"{}, x {}\".format(multiplier_detail_str,\n                            total_multiplier_operation_count))\n\n\ndef populate_quantizer(quantizer, shape=None, implemented_as=None):\n  \"\"\"write all the needed fields in the quantizer to dictionary.\"\"\"\n\n  mydict = collections.OrderedDict()\n\n  if quantizer is not None:\n    mydict[\"quantizer_type\"] = quantizer.name\n\n    # floats\n    if quantizer.is_floating_point:\n      mydict[\"bits\"] = quantizer.bits\n\n    # po2\n    elif quantizer.is_po2:\n      mydict[\"bits\"] = quantizer.bits\n      mydict[\"is_signed\"] = quantizer.is_signed\n      mydict[\"max_value\"] = quantizer.max_val_po2\n\n    # binary\n    elif quantizer.mode in [3, 4]:\n\n      mydict[\"bits\"] = quantizer.bits\n      mydict[\"int_bits\"] = quantizer.int_bits\n      mydict[\"is_signed\"] = quantizer.is_signed\n      if quantizer.mode == 4:\n        mydict[\"values\"] = [0, 1]\n      else:\n        mydict[\"values\"] = [-1, 1]\n\n    # ternary(-1, 0, 1)\n    elif quantizer.mode == 2:\n      mydict[\"bits\"] = 2\n      mydict[\"int_bits\"] = 2\n      mydict[\"is_signed\"] = 1\n      mydict[\"values\"] = [0, -1, 1]\n\n    # quantized_bits\n    elif quantizer.mode == 0:\n      mydict[\"bits\"] = quantizer.bits\n      mydict[\"int_bits\"] = quantizer.int_bits + quantizer.is_signed\n      mydict[\"is_signed\"] = quantizer.is_signed\n\n    if shape is not None:\n      if isinstance(shape, tuple) and shape[0] is None:\n        shape = list(shape)\n        shape[0] = -1\n        mydict[\"shape\"] = tuple(shape)\n      else:\n        mydict[\"shape\"] = shape\n\n    if implemented_as is not None:\n      mydict[\"op_type\"] = implemented_as\n  return mydict\n\n\ndef map_to_json(mydict):\n  \"\"\"write the dictionary to json format.\"\"\"\n\n  source_quantizer_list = mydict[\"source_quantizer_list\"]\n  layer_data_type_map = mydict[\"layer_data_type_map\"]\n\n  output_dict = collections.OrderedDict()\n\n  q_list = []\n  for source_quantizer in source_quantizer_list:\n    tmp = populate_quantizer(source_quantizer)\n    q_list.append(tmp)\n\n  if bool(q_list):\n    output_dict[\"source_quantizers\"] = q_list\n\n  def set_layer_item(layer_item, key, feature, shape=None,\n                     is_compound_datatype=False, output_key_name=None):\n    \"\"\"Generates the quantizer entry to a given layer_item.\n\n    This function extracts relevanant quantizer fields using the key (\n    quantizer name) from a given feature (layer entry from layer_data_type_map).\n\n    Args:\n      layer_item: Layer entry in the output dictionary. It includes the\n        info such as quantizers, output shape, etc. of each layer\n      key: Quantizer, such as kernel/bias quantizer, etc. If feature\n      feature: layer_data_type_map entry of each layer. This feature will be\n        parsed and converted to layer_item for the output dictionary.\n      shape: quantizer input shape\n      is_compound_datatype: Bool. Wether the quantizer is a compound\n        or unitary quantizer type. For example, kernel quantizer and bias\n        quantizer are unitary quantizer types, multiplier and accumulator\n        are compound quantizer types.\n      output_key_name: str. Change key to output_key_name in layer_item. If\n        None, will use the existing key.\n\n    Return:\n      None\n    \"\"\"\n    val = qtools_util.get_val(feature, key)\n    if val is not None:\n      quantizer = val\n      implemented_as = None\n      if is_compound_datatype:\n        quantizer = val.output\n        implemented_as = val.implemented_as()\n      if output_key_name is None:\n        key_name = key\n      else:\n        key_name = output_key_name\n      tmp = populate_quantizer(\n          quantizer, shape=shape, implemented_as=implemented_as)\n      if bool(tmp):\n        layer_item[key_name] = tmp\n\n  for layer, feature in layer_data_type_map.items():\n    layer_item = collections.OrderedDict()\n    layer_item[\"layer_type\"] = layer.__class__.__name__\n    layer_item[\"input_quantizer_list\"] = [\n        populate_quantizer(q) for q in qtools_util.get_val(\n            feature, \"input_quantizer_list\")]\n\n    set_layer_item(layer_item, key=\"output_quantizer\", feature=feature,\n                   shape=qtools_util.get_val(feature, \"output_shapes\"))\n\n    if layer_item[\"layer_type\"] in [\n        \"QBatchNormalization\", \"BatchNormalization\"]:\n\n      for key in [\"gamma_quantizer\", \"beta_quantizer\", \"mean_quantizer\",\n                  \"variance_quantizer\", \"variance_quantizer\"]:\n        set_layer_item(layer_item, key=key, feature=feature)\n\n      for key in [\"internal_divide_quantizer\",\n                  \"internal_multiplier\", \"internal_accumulator\"]:\n        set_layer_item(layer_item, key=key, feature=feature,\n                       is_compound_datatype=True)\n\n    elif layer_item[\"layer_type\"] in [\n        \"AveragePooling2D\", \"AvgPool2D\", \"GlobalAvgPool2D\",\n        \"GlobalAveragePooling2D\", \"QAveragePooling2D\",\n        \"QGlobalAveragePooling2D\"]:\n      set_layer_item(layer_item, key=\"average_quantizer\", feature=feature)\n      for key in [\"pool_sum_accumulator\", \"pool_avg_multiplier\"]:\n        set_layer_item(layer_item, key=key, feature=feature,\n                       is_compound_datatype=True)\n\n    else:\n      # populate the feature to dictionary\n      set_layer_item(layer_item, key=\"weight_quantizer\", feature=feature,\n                     shape=qtools_util.get_val(feature, \"w_shapes\"))\n      set_layer_item(layer_item, key=\"bias_quantizer\", feature=feature,\n                     shape=qtools_util.get_val(feature, \"b_shapes\"))\n\n      output_key_name = None\n      if qtools_util.is_merge_layers(layer):\n        output_key_name = layer.__class__.__name__ + \"_quantizer\"\n      set_layer_item(layer_item, key=\"multiplier\", feature=feature,\n                     is_compound_datatype=True,\n                     output_key_name=output_key_name)\n      set_layer_item(layer_item, key=\"accumulator\", feature=feature,\n                     is_compound_datatype=True)\n\n      if qtools_util.get_val(feature, \"fused_accumulator\"):\n        # Add fused weights to the dictionary\n        for key in [\"bn_beta_quantizer\", \"bn_mean_quantizer\",\n                    \"bn_inverse_quantizer\"]:\n          set_layer_item(layer_item, key=key, feature=feature)\n\n        set_layer_item(layer_item, key=\"fused_accumulator\", feature=feature,\n                       is_compound_datatype=True)\n\n    layer_item[\"operation_count\"] = qtools_util.get_val(\n        feature, \"operation_count\")\n    output_dict[layer.name] = layer_item\n\n  return output_dict\n"
  },
  {
    "path": "qkeras/qtools/qenergy/__init__.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Export qenergy package.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom .qenergy import energy_estimate\n"
  },
  {
    "path": "qkeras/qtools/qenergy/qenergy.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Calculate energy consumption of a given quantized model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nfrom qkeras.qtools.generate_layer_data_type_map import KERAS_LAYERS\nfrom qkeras.qtools.generate_layer_data_type_map import QKERAS_LAYERS\nfrom qkeras.qtools.quantized_operators.quantizer_impl import IQuantizer\nfrom qkeras.qtools.settings import cfg\nfrom qkeras.qtools import qtools_util\n\n\n# Model based on：\n#   Mark Horowitz, Computing’s Energy Problem (and what we can\n#   do about it). IEEE ISSCC, pp. 10–14, 2014\n#   www.youtube.com/watch?v=eZdOkDtYMoo&feature=youtu.be&t=497\n\n# all metrics converted to pJ/bit\n\nOP = {\n    \"fp32\": {\n        \"add\": lambda x: max(cfg.fp32_add(x), 0),\n        \"mul\": lambda x: max(cfg.fp32_mul(x), 0)\n    },\n    \"fp16\": {\n        \"add\": lambda x: max(cfg.fp16_add(x), 0),\n        \"mul\": lambda x: max(cfg.fp16_mul(x), 0)\n    },\n    \"fpm\": {\n        \"add\": lambda x: max(cfg.fpm_add(x), 0),\n        \"mux\": lambda x: max(cfg.fpm_add(x), 0),\n        \"xor\": lambda x: max(cfg.fpm_add(x), 0),\n        \"and\": lambda x: max(cfg.fpm_add(x), 0),\n        \"or\": lambda x: max(cfg.fpm_add(x), 0),\n        \"shifter\": lambda x: max(cfg.fpm_add(x), 0),\n        \"mul\": lambda x: max(cfg.fpm_mul(x), 0)\n    },\n    \"sram\": {\"rd\": lambda x: max(cfg.sram_rd(x), 0),\n             \"wr\": lambda x: max(cfg.sram_rd(x), 0),\n             \"mul_factor\": cfg.sram_mul_factor},\n    \"dram\": {\"rd\": lambda x: max(cfg.dram_rd(x), 0),\n             \"wr\": lambda x: max(cfg.dram_rd(x), 0),\n             \"mul_factor\": cfg.dram_mul_factor}\n}\n\n\ndef get_op_type(quantizer):\n  assert isinstance(quantizer, IQuantizer)\n\n  if quantizer.is_floating_point:\n    return \"fp\" + str(quantizer.bits)\n  else:\n    return \"fpm\"\n\n\ndef memory_read_energy(is_input_layer, tensor_shape, mode, min_sram_size,\n                       rd_wr_on_io, quantizer_bits, is_tensor=True):\n  \"\"\"compute energy to bring tensors from DRAM to SRAM.\"\"\"\n\n  if is_input_layer:\n    if rd_wr_on_io:\n      mode = \"dram\"\n    else:\n      mode = \"sram\"\n\n  energy_mem = 0\n\n  if is_tensor:\n    tensor_shape = tensor_shape[1:]\n\n  total_bits = np.prod(tensor_shape) * quantizer_bits\n  total_bits_log2 = np.log2(max(total_bits, min_sram_size))\n\n  if mode == \"dram\":\n    # load input from dram; wx_sizes[1]-> input x quantizer bits\n    # total_bits * 20\n    energy_mem += OP[\"dram\"][\"rd\"](total_bits)\n    if rd_wr_on_io:\n      # write input to sram\n      # total_bits * sqrt(data_size/2^18)*0.3125\n      # bits1 = total_bits * OP[\"sram\"][\"mul_factor\"](np.prod(tensor_shape))\n      # energy_mem += OP[\"sram\"][\"wr\"](bits1)\n      energy_mem += (\n          np.ceil(total_bits * OP[\"sram\"][\"mul_factor\"]) *\n          OP[\"sram\"][\"wr\"](total_bits_log2)\n      )\n  elif mode == \"sram\":\n    # read input from sram\n    # total_bits * sqrt(data_size/2^18)*0.3125\n    # bits1 = total_bits * OP[\"sram\"][\"mul_factor\"](np.prod(tensor_shape))\n    # energy_mem += OP[\"sram\"][\"rd\"](bits1)\n    energy_mem += (\n        np.ceil(total_bits * OP[\"sram\"][\"mul_factor\"]) *\n        OP[\"sram\"][\"rd\"](total_bits_log2)\n    )\n\n  return energy_mem\n\n\ndef parameter_read_energy(\n    layer, layer_item, weights_on_memory, min_sram_size, rd_wr_on_io):\n  \"\"\"read weights/bias from memory.\"\"\"\n\n  node_type = layer.__class__.__name__\n  rd_energy = 0\n  if node_type in [\"QBatchNormalization\", \"BatchNormalization\"]:\n    gamma_quantizer = layer_item[\"gamma_quantizer\"]\n    beta_quantizer = layer_item[\"beta_quantizer\"]\n    mean_quantizer = layer_item[\"mean_quantizer\"]\n    variance_quantizer = layer_item[\"variance_quantizer\"]\n\n    # gamma, beta, mean, stddev\n    weights = layer.get_weights()\n    s = len(weights[0])\n    for q in [gamma_quantizer, beta_quantizer, mean_quantizer,\n              variance_quantizer]:\n      if q:\n        rd_energy += memory_read_energy(\n            False, (s), weights_on_memory, min_sram_size, rd_wr_on_io,\n            q.bits, is_tensor=False)\n\n  elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:\n    weight_quantizer = qtools_util.get_val(layer_item, \"weight_quantizer\")\n    w_shapes = qtools_util.get_val(layer_item, \"w_shapes\")\n    bias_quantizer = qtools_util.get_val(layer_item, \"bias_quantizer\")\n    b_shapes = qtools_util.get_val(layer_item, \"b_shapes\")\n\n    rd_energy += memory_read_energy(\n        False, w_shapes, weights_on_memory, min_sram_size, rd_wr_on_io,\n        weight_quantizer.bits, is_tensor=False\n    )\n\n    if bias_quantizer:\n      # if use_bias=0, no bias\n      bias_shapes = (b_shapes)\n      rd_energy += memory_read_energy(\n          False, bias_shapes, weights_on_memory, min_sram_size, rd_wr_on_io,\n          bias_quantizer.bits, is_tensor=False\n      )\n\n  return rd_energy\n\n\ndef memory_write_energy(is_output_layer, tensor_shape, mode, min_sram_size,\n                        rd_wr_on_io, quantizer_bits):\n  \"\"\"compute energy to bring tensors from SRAM to DRAM.\"\"\"\n  if is_output_layer:\n    if rd_wr_on_io:\n      mode = \"dram\"\n    else:\n      mode = \"sram\"\n\n  energy_mem = 0\n\n  tensor_shape = tensor_shape[1:]\n\n  total_bits = np.prod(tensor_shape) * quantizer_bits\n  total_bits_log2 = np.log2(max(total_bits, min_sram_size))\n\n  if mode == \"dram\":\n    # load input from dram; wx_sizes[1]-> input x quantizer bits\n    if rd_wr_on_io:\n      # read input from sram\n      # total_bits * sqrt(data_size/2^18)*0.3125\n      # bits1 = total_bits * OP[\"sram\"][\"mul_factor\"](np.prod(tensor_shape))\n      # energy_mem += OP[\"sram\"][\"rd\"](bits1)\n      energy_mem += (\n          np.ceil(total_bits * OP[\"sram\"][\"mul_factor\"]) *\n          OP[\"sram\"][\"rd\"](total_bits_log2)\n      )\n    # write output to dram\n    energy_mem += OP[\"dram\"][\"wr\"](total_bits)\n\n  elif mode == \"sram\":\n    # write to sram\n    # total_bits * sqrt(data_size/2^18)*0.3125\n    # bits1 = total_bits * OP[\"sram\"][\"mul_factor\"](np.prod(tensor_shape))\n    # energy_mem +=  OP[\"sram\"][\"wr\"](bits1)\n    energy_mem += (\n        np.ceil(total_bits * OP[\"sram\"][\"mul_factor\"]) *\n        OP[\"sram\"][\"wr\"](total_bits_log2)\n    )\n\n  return energy_mem\n\n\ndef energy_estimate(model, layer_map, weights_on_memory,\n                    activations_on_memory, min_sram_size,\n                    rd_wr_on_io):\n  \"\"\"estimate energy.\"\"\"\n\n  output_layers = layer_map[\"output_layers\"]\n  input_layers = layer_map[\"input_layers\"]\n  layer_data_type_map = layer_map[\"layer_data_type_map\"]\n\n  result = {}\n  total_energy = 0\n\n  # compute MAC and memory access energy for intermediate layers\n  for layer in model.layers:\n    if layer not in layer_data_type_map.keys():\n      continue\n\n    layer_item = layer_data_type_map[layer]\n    input_quantizer_list = qtools_util.get_val(\n        layer_item, \"input_quantizer_list\")\n    operation_count = qtools_util.get_val(layer_item, \"operation_count\")\n    output_shapes = qtools_util.get_val(layer_item, \"output_shapes\")\n    output_quantizer = qtools_util.get_val(layer_item, \"output_quantizer\")\n\n    is_input_layer = layer in input_layers\n    is_output_layer = layer in output_layers\n\n    input_rd_energy = 0\n    energy_op = 0\n    input_shape = layer.input_shape\n    if not isinstance(input_shape, list):\n      input_shape = [input_shape]\n\n    for (input_shape, input_quantizer) in zip(\n        input_shape, input_quantizer_list):\n      input_rd_energy += memory_read_energy(\n          is_input_layer, input_shape,\n          activations_on_memory, min_sram_size, rd_wr_on_io,\n          input_quantizer.bits)\n\n    parameter_rd_energy = parameter_read_energy(\n        layer, layer_item, weights_on_memory, min_sram_size,\n        rd_wr_on_io)\n\n    output_wr_energy = memory_write_energy(\n        is_output_layer, output_shapes,\n        activations_on_memory, min_sram_size, rd_wr_on_io,\n        output_quantizer.bits)\n    # QActivation Layer\n    if layer.__class__.__name__ in [\"QActivation\", \"QAdaptiveActivation\",\n                                    \"Activation\"]:\n      pass\n\n    # QBN Layer\n    elif layer.__class__.__name__ in [\n        \"QBatchNormalization\", \"BatchNormalization\"]:\n      # assume QBN is embedded with conv/dense layers\n      # -> no memory read/write cost\n\n      divider = layer_item[\"internal_divide_quantizer\"]\n      if divider:\n        gate_factor = divider.gate_factor\n        mode = divider.implemented_as()\n        energy_op += gate_factor * OP[\n            get_op_type(divider.output)][mode](divider.gate_bits)\n\n      multiplier = layer_item[\"internal_multiplier\"]\n      if multiplier:\n        gate_factor = multiplier.gate_factor\n        mode = multiplier.implemented_as()\n        energy_op += gate_factor * OP[\n            get_op_type(multiplier.output)][mode](multiplier.gate_bits)\n\n      energy_op *= operation_count\n\n    # Merge layer\n    elif layer.__class__.__name__ in [\"Add\", \"Multiply\", \"Subtract\"]:\n\n      # multiply or add operation energy\n      # TODO(lishanok): check energy for concatenate\n      merge_quantizer = qtools_util.get_val(layer_item, \"multiplier\")\n      mode = merge_quantizer.implemented_as()\n      number_of_inputs = len(qtools_util.get_val(\n          layer_item, \"input_quantizer_list\"))\n      gate_factor = merge_quantizer.gate_factor\n\n      q = get_op_type(merge_quantizer.output)\n      b = merge_quantizer.gate_bits\n      energy_op = (number_of_inputs - 1) * operation_count * gate_factor * OP[\n          q][mode](b)\n\n    # AveragePooling and GlobalAveragePooling\n    elif layer.__class__.__name__ in [\n        \"AveragePooling2D\", \"AvgPool2D\", \"GlobalAvgPool2D\",\n        \"GlobalAveragePooling2D\"]:\n      # accumulation operation energy\n      accumulator = qtools_util.get_val(layer_item, \"accumulator\")\n      add_energy = OP[get_op_type(accumulator.output)][\"add\"](\n          accumulator.output.bits)\n      energy_op = operation_count * add_energy\n\n    # MAC energy calculation\n    elif layer.__class__.__name__ in [\"QConv2D\", \"QConv1D\", \"QDepthwiseConv2D\",\n                                      \"QDense\", \"Conv2D\", \"Conv1D\",\n                                      \"DepthwiseConv2D\", \"Dense\"]:\n      multiplier = qtools_util.get_val(layer_item, \"multiplier\")\n      accumulator = qtools_util.get_val(layer_item, \"accumulator\")\n\n      # implementation mode: xor/andgate/shift etc.\n      mode = multiplier.implemented_as()\n      gate_factor = multiplier.gate_factor\n\n      op = get_op_type(multiplier.output)\n      bits = multiplier.gate_bits\n      c1 = gate_factor * OP[op][mode](bits)\n      c2 = OP[get_op_type(accumulator.output)][\"add\"](accumulator.output.bits)\n      energy_op = operation_count * (c1 + c2)\n\n    else:\n      pass\n\n    result[layer.name] = {\n        \"class_name\": layer.__class__.__name__,\n        \"energy\": {\n            \"inputs\": float(\"{0:.2f}\".format(input_rd_energy)),\n            \"outputs\": float(\"{0:.2f}\".format(output_wr_energy)),\n            \"parameters\": float(\"{0:.2f}\".format(parameter_rd_energy)),\n            \"op_cost\": float(\"{0:.2f}\".format(energy_op))\n        }\n    }\n    total_energy += (input_rd_energy + output_wr_energy +\n                     parameter_rd_energy + energy_op)\n\n  result[\"total_cost\"] = int(total_energy)\n\n  return result\n"
  },
  {
    "path": "qkeras/qtools/qgraph.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Creates networkx graph from a model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport logging\n\nimport networkx as nx\nimport tensorflow.keras.backend as K\n\nfrom tensorflow.keras.layers import InputLayer\nfrom qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module\nfrom qkeras.qtools.settings import cfg\n\nSOURCE = -1\nSINK = -2\n\n\nclass WrongInputQuantizerError(ValueError):\n  pass\n\n\ndef GraphRemoveNode(graph, v):\n  \"\"\"Removes node \"v\" from u -> v -> w, connecting u -> w.\"\"\"\n\n  incoming = [u for u in graph.predecessors(v) if u != v]\n  outgoing = [w for w in graph.successors(v) if w != v]\n\n  # add incoming edges\n  for u in incoming:\n    for w in outgoing:\n      in_attr = graph[u][v]\n      out_attr = graph[v][w]\n\n      assert list(in_attr[\"shape\"]) == list(out_attr[\"shape\"])\n\n      graph.add_edges_from([(u, w, out_attr)])\n\n  graph.remove_node(v)\n\n\ndef GraphRemoveNodeWithNodeType(graph, node_type):\n  \"\"\"Removes node with attribute node_type, reconnecting network.\"\"\"\n\n  nodes_to_remove = [v for v in graph.nodes\n                     if graph.nodes[v][\"type\"][-1] == node_type]\n\n  for v in nodes_to_remove:\n\n    GraphRemoveNode(graph, v)\n\n\ndef  GraphAddHiddenInputLayer(model, graph, input_quantizer_map):\n  \"\"\"For Keras Sequential model api, input layer is hidden. Need to add it.\"\"\"\n\n  node_id = -1\n  for (u, _) in graph.nodes.items():\n    if u >= node_id:\n      node_id = u\n    if u == SOURCE or u == SINK:\n      continue\n\n    if graph.nodes[u][\"type\"][-1] == \"InputLayer\":\n      return\n\n  # determine a node id for the newly added input layer\n  node_id += 1\n\n  # find the first layer of the sequential model\n  first_layer_nodes = []\n  for u in graph.nodes:\n    if u == SOURCE or u == SINK:\n      continue\n    predecessors = list(graph.predecessors(u))\n     # find the first layer which doesn't have a parent\n    if not predecessors:\n      first_layer_nodes.append(u)\n  assert len(first_layer_nodes) == 1\n  # since it is a sequential model, there is only one first layer\n  v_id = first_layer_nodes[0]\n\n  # create a input layer node\n  node_type = \"InputLayer\"\n  input_shape = model.layers[0].input_shape\n  layer = InputLayer(input_shape=input_shape[1:])\n  o_shape = input_shape\n  node = (node_id, {\"layer\": [layer], \"type\": [node_type],\n                    \"out_quantizer\": None})\n  graph.add_nodes_from([node])\n\n  # insert input_quantizers on the edge between input layer and its next layer\n  for (a, _) in input_quantizer_map.items():\n    edge = (node_id, v_id, {\n        \"shape\": [o_shape], \"tensor\": a,\n        \"quantizer\": input_quantizer_map[a]})\n\n  graph.add_edges_from([edge])\n\n\ndef GraphAddSingleSourceSingleSink(graph):\n\n  \"\"\"Connects graph to source and sink nodes.\"\"\"\n\n  edge_list = []\n\n  for u in graph.nodes:\n\n    if u == SOURCE or u == SINK:\n      continue\n\n    if graph.nodes[u][\"type\"][-1] == \"InputLayer\":\n      # If the layer has multiple nodes, you can use get_output_at(node_index)\n      tensor = graph.nodes[u][\"layer\"][-1].get_output_at(0)\n      # if tf 1.0+, we can do tensor.shape with the same effect\n      shape = tuple(tensor.get_shape().as_list())\n      shape = [shape]\n\n      edge_list.append((SOURCE, u, {\n          \"shape\": shape, \"tensor\": tensor, \"quantizer\": None}))\n\n    if graph.out_degree(u) == 0:\n      tensor = graph.nodes[u][\"layer\"][-1].get_output_at(0)\n      shape = tensor.shape\n\n      edge_list.append((u, SINK, {\n          \"shape\": shape, \"tensor\": tensor, \"quantizer\": None}))\n\n  graph.add_edges_from(edge_list)\n\n\ndef GenerateInputQuantizerList(input_quantizers,\n                               inputs_length,\n                               default_source_quantizer):\n  \"\"\"Generates the list of input quantizers.\"\"\"\n  # generate a list of input quantizers\n  input_quantizer_list = []\n  quantizer_factory = quantizer_factory_module.QuantizerFactory()\n  if input_quantizers is None:\n    logging.warning(\n        \"************ SOURCE has no quantizer type.\"\n        \" Use default quantizer instead\")\n\n    for _ in range(inputs_length):\n      input_quantizer_list.append(\n          quantizer_factory.make_default_quantizer(\n              mode=default_source_quantizer))\n  else:\n    if inputs_length == len(input_quantizers):\n      for quantizer in input_quantizers:\n        input_quantizer_list.append(quantizer_factory.make_quantizer(\n            quantizer))\n    # pass a single quantizer which will be used for all q list.\n    elif not isinstance(input_quantizers, list):\n      for _ in range(inputs_length):\n        input_quantizer_list.append(quantizer_factory.make_quantizer(\n            input_quantizers))\n    else:\n      raise WrongInputQuantizerError(\n          \"ERROR: Numer of input (%d) must be the same as number of source\"\n          \" quantizers (%d)\"%(inputs_length, len(input_quantizers)))\n\n  return input_quantizer_list\n\n\ndef AddToNodeDict(layer_items,\n                  layer,\n                  nodes_dict):\n  \"\"\"Adds layer to a node_dict, indexed by layer.(input or output).ref\"\"\"\n  i_list = layer_items\n  if not isinstance(layer_items, list):\n    i_list = [i_list.ref()]\n  else:\n    i_list = [tmp.ref() for tmp in i_list]\n\n  for i in i_list:\n    # dict: tensor -> layers have this tensor as input\n    if i not in nodes_dict.keys():\n      nodes_dict[i] = [layer]\n    else:\n      nodes_dict[i].append(layer)\n\n\ndef GenerateGraphFromModel(model,\n                           input_quantizers,\n                           default_source_quantizer):\n  \"\"\"Generates single source, single sink graph from model.\"\"\"\n\n  # node represents layers with attributes [layer, type(class_name)]\n  # edge represents the tensor flowing between two layers,\n  # attributes is [tensor, output_shape, QA(activation quantizer]\n\n  # input_quantizers are tagged on the edge between input\n  # layer and the following layer\n\n  # generate a list of input quantizers\n  input_quantizer_list = GenerateInputQuantizerList(input_quantizers,\n                                                    len(model.inputs),\n                                                    default_source_quantizer)\n\n  # dict that map input_tensor to its quantizer\n  input_quantizer_map = {}\n  for (idx, tensor) in enumerate(model.inputs):\n    input_quantizer_map[tensor.ref()] = input_quantizer_list[idx]\n\n  graph = nx.DiGraph()\n\n  source = SOURCE\n  sink = SINK\n\n  node_list = [\n      (source, {\"layer\": [None], \"type\": [None], \"out_quantizer\": None}),\n      (sink, {\"layer\": [None], \"type\": [None], \"out_quantizer\": None})\n  ]\n\n  for i, layer in enumerate(model.layers):\n\n    node_type = layer.__class__.__name__\n\n    node = (i, {\"layer\": [layer], \"type\": [node_type], \"out_quantizer\": None})\n    node_list.append(node)\n\n  node_dict = {layer: i for i, layer in enumerate(model.layers)}\n\n  graph.add_nodes_from(node_list)\n\n  # nodes = tensors\n  in_nodes = {}\n  out_nodes = {}\n  for layer in model.layers:\n    AddToNodeDict(layer.input, layer, in_nodes)\n    AddToNodeDict(layer.output, layer, out_nodes)\n\n  # union of all tensors; non-redundant\n  attr_set = set(in_nodes.keys()) | set(out_nodes.keys())\n\n  # add edges. we want edges annotated with tensors and shapes\n  edge_list = []\n\n  for a in attr_set:\n    # for a given tensor a, find the layer u that outputs this tensor\n    # and the layer v that has this tensor as input\n    u_list = out_nodes.get(a, [None])\n    v_list = in_nodes.get(a, [None])\n\n    for u in u_list:\n      for v in v_list:\n        if not u or not v:\n          continue\n\n        o_shape = u.output_shape\n\n        # layer -> layer_id\n        u_id = node_dict[u]\n        v_id = node_dict[v]\n\n        # insert input_quantizers on the edge between\n        # input layer and its next layer\n        if a in input_quantizer_map.keys():\n          edge_list.append((u_id, v_id, {\n              \"shape\": o_shape, \"tensor\": a,\n              \"quantizer\": input_quantizer_map[a]}))\n        else:\n          edge_list.append((u_id, v_id, {\n              \"shape\": o_shape, \"tensor\": a,\n              \"quantizer\": None}))\n\n  graph.add_edges_from(edge_list)\n  GraphAddHiddenInputLayer(model, graph, input_quantizer_map)\n\n  return (graph, input_quantizer_list)\n\n\ndef GraphGetInputs(graph):\n\n  \"\"\"Returns edges SOURCE->u that are inputs.\"\"\"\n\n  successors = list(graph.successors(SOURCE))\n\n  input_tensors = []\n\n  for u in successors:\n\n    if u == SOURCE or u == SINK:\n      continue\n\n    input_tensors.append(graph[SOURCE][u])\n\n  return input_tensors\n\n\ndef GraphGetOutputs(graph):\n\n  \"\"\"Returns edges u->SINK that are outputs.\"\"\"\n\n  predecessors = list(graph.predecessors(SINK))\n\n  output_tensors = []\n\n  for u in predecessors:\n\n    if u == SOURCE or u == SINK:\n      continue\n\n    output_tensors.append(graph[u][SINK])\n\n  return output_tensors\n\n\ndef GraphPropagateActivationsToEdges(graph, debug=False):\n  \"\"\"Traverses graph and move activations to edges.\n\n  1.If current dense/conv layer is specified with QA:\n    outgoing edge (output data type) will be QA type\n  2.If current dense/conv layer has no QA:\n    default type (float32) is used as output\n  3.If current layer is QA layer:\n    float32 is used by default as output type on the edge\n\n  Args:\n    graph: graph to inject activations to.\n    debug: debug mode\n\n  Returns:\n    None\n  \"\"\"\n\n  scheduler = list(nx.topological_sort(graph))\n\n  for vertex in scheduler[1:-1]:\n    # get rid of source and sink vertex\n    if debug:\n      print(\"########### GraphPropagateActivationsToEdges ############\")\n      print(\"vertex:\", vertex)\n\n    for u, v in graph.edges(vertex):\n      # u=vertex, v: outgoing edge vertex\n\n      if debug:\n        print(\"  outgoing ->\", v, graph.nodes[v][\"layer\"][0].name)\n\n      layer = graph.nodes[u][\"layer\"][0]\n      result = None\n      # if current layer has no QA specified\n      if not hasattr(layer, \"activation\"):\n        result = None\n      else:\n        activation_name = layer.activation.__name__ if hasattr(\n            layer.activation, \"__name__\") else None\n        q_activation_class_name = layer.activation.__class__.__name__ if hasattr(\n            layer.activation, \"__class__\") else None\n\n        if debug:\n          print(\"  layer type:\", layer.__class__.__name__)\n          print(\"  activation object:\", layer.activation)\n          print(\"  activation_name:\", activation_name)\n          print(\"  q_activation_class_name:\", q_activation_class_name)\n\n        # if current layer is QA\n        if (graph.nodes[u][\"type\"][0] in [\"QActivation\"] or\n            graph.nodes[u][\"type\"][0] in [\"QAdaptiveActivation\"]):\n          result = layer.quantizer\n\n        # if current layer is not QA layer but has QA specified within\n        elif hasattr(layer, \"activation\"):\n          if activation_name == \"linear\":\n            result = None\n          else:\n            result = layer.activation\n\n      if debug:\n        print(\"  {}->{}: {}\".format(u, v, result))\n\n      graph[u][v][\"quantizer\"] = result\n      # all edge_quantizer is the same for all edges starting\n      # from current vertex to different nodes\n      graph.nodes[vertex][\"out_quantizer\"] = result\n\n\ndef PrintGraph(graph, msg=\"\"):\n  \"\"\"Print graph structure.\"\"\"\n\n  print()\n  print(msg)\n  print()\n  print(\"nodes:\",\n        [(u, graph.nodes[u][\"layer\"][\n            0].name if graph.nodes[u][\"layer\"][0] is not None else \"\",\n          graph.nodes[u][\"type\"]) for u in graph.nodes])\n  print()\n  print(\"edges:\",\n        [(u, v, graph[u][v][\"shape\"],\n          graph[u][v][\"quantizer\"]) for u, v in graph.edges])\n\n\ndef CreateGraph(model, input_quantizers=None,\n                default_source_quantizer=cfg.default_source_quantizer,\n                debug=False):\n  \"\"\"create graph.\"\"\"\n\n  K.set_image_data_format(\"channels_last\")\n\n  (graph, source_quantizer_list) = GenerateGraphFromModel(\n      model, input_quantizers, default_source_quantizer)\n  GraphAddSingleSourceSingleSink(graph)\n  GraphRemoveNodeWithNodeType(graph, \"Dropout\")\n  GraphRemoveNodeWithNodeType(graph, \"InputLayer\")\n\n  scheduler = list(nx.topological_sort(graph))\n\n  if debug:\n    for vertex in scheduler[1:-1]:\n      for _, v in graph.edges(vertex):\n        if v == SINK:\n          continue\n        print(\"... calling\", graph.nodes[v][\n            \"layer\"][0].name, graph.nodes[v][\"type\"])\n\n  return (graph, source_quantizer_list)\n\n\ndef GraphUpdateEdge(graph, node_id, quantizer_on_edge):\n  \"\"\"update the graph edges outgoing from node_id with new quantizer.\"\"\"\n\n  for u, v in graph.edges(node_id):\n    graph[u][v][\"quantizer\"] = quantizer_on_edge\n"
  },
  {
    "path": "qkeras/qtools/qtools_util.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"utility functions.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\nimport sys\nimport numpy as np\nimport tensorflow.keras.backend as K\nimport tensorflow as tf\nfrom qkeras.qtools import quantized_operators\n\n\ndef get_val(feature, key, default_val=None):\n  # Return feature[key] or feature.key\n  if isinstance(feature, dict):\n    val = feature.get(key, default_val)\n  else:\n    val = getattr(feature, key, default_val)\n  return val\n\n\ndef is_shape_alternation_layers(layer):\n  lname = layer.__class__.__name__\n  if lname:\n    return \"MaxPool\" in lname or \"Reshape\" in lname or \"Flatten\" in lname\n  return False\n\n\ndef is_merge_layers(layer):\n\n  if layer.__class__.__name__ in [\n      \"Add\", \"Multiply\", \"Subtract\", \"Average\", \"Maximum\", \"Minimum\",\n      \"Concatenate\", \"Dot\"]:\n    return True\n  else:\n    return False\n\n\ndef get_input_quantizers(graph, node_id, quantizer_factory, debug=False):\n  \"\"\"get the current layer's input quantizer.\"\"\"\n\n  # in merge layers, therea are more than 1 input\n\n  output = []\n  for parent_node_id in graph.predecessors(node_id):\n\n    edge = graph.edges[(parent_node_id, node_id)]\n\n    if debug:\n      print(\"parent_node_id:\", parent_node_id)\n      print(edge)\n\n    quantizer_on_edge = edge[\"quantizer\"]\n    input_quantizer = quantizer_factory.make_quantizer(quantizer_on_edge)\n\n    output.append((input_quantizer, edge))\n\n  return output\n\n\ndef get_input_quantizers_advanced(graph, node_id,\n                                  is_input_layer, quantizer_factory,\n                                  cfg, debug=False):\n  \"\"\"get input quantizer, deal with keras layer or lack of input quantizer in qkeras layer.\"\"\"\n\n  # in merge layers, therea are more than 1 input\n  default_source_quantizer = cfg.default_source_quantizer\n  default_interm_quantizer = cfg.default_interm_quantizer\n\n  output = []\n  for parent_node_id in graph.predecessors(node_id):\n\n    edge = graph.edges[(parent_node_id, node_id)]\n\n    if debug:\n      print(\"parent_node_id:\", parent_node_id)\n      print(edge)\n\n    quantizer_on_edge = edge[\"quantizer\"]\n    input_quantizer = quantizer_factory.make_quantizer(quantizer_on_edge)\n\n    if is_input_layer and not input_quantizer:\n      # input layer without input_quantizer specified\n      #   ->use default_source_quantizer\n      input_quantizer = quantizer_factory.make_default_quantizer(\n          mode=default_source_quantizer)\n    elif not input_quantizer:\n      # if no input quantizer is available\n      #   -> use default quantizer from config.json\n      input_quantizer = quantizer_factory.make_default_quantizer(\n          mode=default_interm_quantizer)\n\n    output.append((input_quantizer, edge))\n\n  return output\n\n\ndef get_operation_count(layer, input_shape):\n  \"\"\"Determines number of multiplier operations in a qkeras layer.\"\"\"\n\n  # Check if the inputs are a list of Dimensions\n  if isinstance(input_shape, list):\n    input_shape = input_shape[0]\n\n  operation_count = 0\n\n  if is_merge_layers(layer) or is_shape_alternation_layers(layer):\n    operation_count = np.prod(input_shape[1:])\n\n  elif layer.__class__.__name__ in [\n      \"AveragePooling2D\", \"AvgPool2D\", \"GlobalAvgPool2D\",\n      \"GlobalAveragePooling2D\", \"QGlobalAveragePooling2D\"\n  ]:\n\n    if hasattr(layer, \"pool_size\"):\n      pool_size = layer.pool_size\n    else:\n      pool_size = input_shape[1:-1]\n    add_ops = np.prod(pool_size)\n\n    output_shape = layer.compute_output_shape(input_shape)\n    channels_o = output_shape[-1]\n\n    # total number of add ops\n    operation_count = channels_o * add_ops\n\n  elif \"UpSampling\" in layer.__class__.__name__:\n    # UpSampling1D/2D/3D\n    output_shape = layer.compute_output_shape(input_shape)\n    operation_count = np.prod(output_shape[1:])\n\n  elif (\"Activation\" in layer.__class__.__name__ or\n        \"BatchNormalization\" in layer.__class__.__name__):\n    operation_count = np.prod(input_shape[1:])\n\n  elif layer.__class__.__name__ in [\n      \"QConv2D\", \"Conv2D\", \"QConv2DBatchnorm\",\n      \"QConv2DTranspose\", \"Conv2DTranspose\"]:\n\n    output_shape = layer.compute_output_shape(input_shape)\n    _, _, _, channels_i = input_shape\n\n    _, height_o, width_o, channels_o = output_shape\n\n    weight = layer.get_weights()[0]\n\n    kernel_h, kernel_w, _, _ = weight.shape\n\n    operation_count = (\n        height_o * width_o * channels_o * kernel_h * kernel_w * channels_i)\n\n  elif layer.__class__.__name__ in [\"QConv1D\", \"Conv1D\"]:\n    output_shape = layer.compute_output_shape(input_shape)\n    _, _, channels_i = input_shape\n\n    _, time_o, channels_o = output_shape\n\n    weight = layer.get_weights()[0]\n\n    kernel_length, _, _ = weight.shape\n\n    operation_count = (\n        time_o * channels_o * kernel_length * channels_i)\n\n  elif layer.__class__.__name__ in [\"QDepthwiseConv2D\", \"DepthwiseConv2D\"]:\n    output_shape = layer.compute_output_shape(input_shape)\n    _, _, _, channels_i = input_shape\n\n    _, height_o, width_o, channels_o = output_shape\n\n    weight_1 = layer.get_weights()[0]\n\n    kernel_h, kernel_w, _, _ = weight_1.shape\n\n    operation_count = (\n        kernel_h * kernel_w * height_o * width_o * channels_i)\n\n  elif layer.__class__.__name__ in [\"QDense\", \"Dense\"]:\n    output_shape = layer.compute_output_shape(input_shape)\n    # Find the input and output shapes out of all possible dimensions.\n    # Usually, the first shape dimension will be the batch size, and the second\n    # shape dimension will be the number of channels. However, if the\n    # Dense layer is in Squeeze-and-Excite, the first shape dimension\n    # will be the batch size, the second and third shape dimension will be the\n    # spatial sizes (should both be 1), and the fourth shape dimensions will\n    # be the number of channels\n    #\n    # Note: asserts have been changed to sum(*shape > 1) <= 1 to avoid the case\n    # when the dense layer has an output with shape (None, 1), which results in\n    # sum(oshape > 1) = 0.\n    ishape = np.array([i for i in input_shape if i is not None])\n    assert sum(ishape > 1) <= 1, (\"Input Tensor shape in %s has \"\n                                  \"multiple >1 size dims\") % layer.name\n    size_i = np.max(ishape)\n\n    oshape = np.array([i for i in output_shape if i is not None])\n    assert sum(oshape > 1) <= 1, (\"Output Tensor shape in %s has \" +\n                                  \"multiple >1 size dims\") % layer.name\n    size_o = np.max(oshape)\n\n    operation_count = (size_i * size_o)\n\n  else:\n    print(\"operation count for {} is defaulted to 0\".format(\n        layer))\n\n  return int(operation_count)\n\n\ndef get_weights(layer, model_weights_already_quantized=True):\n  \"\"\"Get layer weights.\n\n  Args:\n    layer: given qkeras/keras layer\n    model_weights_already_quantized: bool. whether the given layer's weights\n      are already quantized. This is necessary because with certain quantizers,\n      eg., quantized_bits(alpha=\"auto_po2\"), we cannot quantize the same\n      weights more than once, as it will lead to different results.\n\n  Returns:\n    Quantized layer weights.\n  \"\"\"\n\n  weights = layer.get_weights()\n  out = copy.deepcopy(weights)\n  if not model_weights_already_quantized:\n    for j, weight in enumerate(weights):\n      if hasattr(layer, \"get_quantizers\") and layer.get_quantizers()[j]:\n        out[j] = K.eval(\n            layer.get_quantizers()[j](K.constant(weight)))\n  return out\n\n\ndef get_scale_from_quantized_bits_with_auto_po2(quantizer):\n  \"\"\"Get scale from quantized_bits with alpha=auto_po2.\"\"\"\n  if hasattr(quantizer.scale, \"numpy\"):\n    return quantizer.scale.numpy()\n  elif isinstance(quantizer.scale, np.ndarray):\n    return quantizer.scale\n  else:\n    return None\n\n\ndef adjust_multiplier_for_auto_po2(multiplier, qkeras_weight_quantizer):\n  \"\"\"Adjust multiplier when weight quantizer is auto_po2 type.\n\n  Multiplier_bits = bits_x + bits_w\n  Multiplier_intbits = log2(scale) + intbits_x + intbits_w\n\n  Because we might have different scale for auto_po2 quantizer at different\n  output channels, multiplier will have different integer bits at different\n  output channel accordingly, which is not desirable in hardware implementation.\n  Therefore we set a general multiplier quantizers so that it provides enough\n  fractional bits and integer bits for all output channels.\n  \"\"\"\n  print(\"adjust multiplier for auto_po2 ...\")\n  output_quantizer = multiplier.output\n  if (hasattr(qkeras_weight_quantizer, \"__str__\") and\n      \"quantized_bits\" in qkeras_weight_quantizer.__str__() and\n      qkeras_weight_quantizer.alpha == \"auto_po2\"):\n    bits = output_quantizer.bits\n    int_bits = output_quantizer.int_bits\n    scale = get_scale_from_quantized_bits_with_auto_po2(\n        qkeras_weight_quantizer)\n    if scale is not None:\n      if isinstance(scale, np.ndarray):\n        scale = np.squeeze(scale)\n        max_shift = int(np.log2(np.max(scale)))\n        min_shift = int(np.log2(np.min(scale)))\n      elif isinstance(scale, float):\n        max_shift = int(np.log2(scale))\n        min_shift = max_shift\n      else:\n        raise ValueError(f\"Scale should be either numpy array or float,\"\n                         f\"{type(scale)} is found instead!\")\n\n      # In order to set a general quantizer for different output channels,\n      # we need to set both fractional bits and integer bits as the max required\n      # bits for different output channels\n      max_fractional_bits = bits - int_bits - min_shift\n      max_int_bits = int_bits + max_shift\n      total_bits = max_int_bits + max_fractional_bits\n\n      output_quantizer.bits = total_bits\n      output_quantizer.int_bits = max_int_bits\n    else:\n      # If scale is None, it means the quantizer has\n      # never being called. Therfore we skip the bitwidth adjustment steps\n      print(\"[WARNING] The weight quantizer is never called even though it has \"\n            \"alpha=auto_po2. In this case we do not adjust the multiplier and \"\n            \"accumulator bit width since we don't know the exact values of \"\n            \"scale\", file=sys.stderr)\n  elif hasattr(qkeras_weight_quantizer, \"alpha\") and (\n      qkeras_weight_quantizer.alpha == \"auto_po2\"):\n    print(\"[WARNING] auto_po2 is detected on a non-quantized_bits quantizer.\"\n          \"Currently in QTools we do not yet support the auto_po2 with the \"\n          f\" given quantizer type: {type(qkeras_weight_quantizer)}.\"\n          \"Therefore we do not adjust the multiplier and accumulator bit width\")\n\n\ndef adjust_accumulator_for_auto_po2(\n    layer, multiplier, qkeras_weight_quantizer, bias_quantizer):\n  \"\"\"Adjust accumulator when weight quantizer is auto_po2 type.\"\"\"\n\n  fused_multiplier = copy.deepcopy(multiplier)\n  adjust_multiplier_for_auto_po2(fused_multiplier, qkeras_weight_quantizer)\n  weights = layer.get_weights()\n  kernel = weights[0]\n\n  kernel_shape = kernel.shape\n  # depthwise_kernel_shape = kernel_size + (input_dim, depth_multiplier)\n  # When computing accumulator bitwidth for dw conv2d layer, we do not\n  # need to count the last two dimensions\n  if layer.__class__.__name__ in [\"QDepthwiseConv2D\", \"DepthwiseConv2D\"]:\n    assert kernel_shape[-1] == 1, (\"depth_multiplier must be 1, \"\n                                   f\"{kernel_shape[-1]} found instead!\")\n    kernel_shape = kernel.shape[:-2] + (1, 1)\n\n  kernel_accumulator_factory = quantized_operators.AccumulatorFactory()\n  # Sets use_bias=False so that the accumulator doesn't account for bias\n  # bitwdith.\n  fused_kernel_accumulator = kernel_accumulator_factory.make_accumulator(\n      kernel_shape, fused_multiplier, use_bias=False)\n\n  if not layer.use_bias:\n    bias_quantizer = None\n    fused_accumulator = fused_kernel_accumulator\n  else:\n    # Add bias quantizer bitwidth to the overall accumulator\n    bias_accumulator_instance = quantized_operators.adder_factory.IAdder()\n    fused_accumulator = bias_accumulator_instance.make_quantizer(\n        fused_kernel_accumulator.output, bias_quantizer)\n\n  return fused_accumulator\n\n\ndef find_divisors(num):\n  return [i for i in range(1, num + 1) if num % i == 0]\n\n\ndef get_layer_info(layer: tf.keras.layers.Layer, attr_name: str):\n\n  layer_type = layer.__class__.__name__\n  supported_layer_types = [\n      \"QDense\", \"QConv2D\", \"QDepthwiseConv2D\", \"MaxPooling2D\",\n      \"GlobalMaxPooling2D\", \"QAveragePooling2D\", \"QGlobalAveragePooling2D\",\n      \"UpSampling2D\", \"Concatenate\", \"QBatchNormalization\", \"QActivation\",\n      \"Activation\", \"Dropout\", \"Reshape\", \"ZeroPadding2D\"]\n  assert layer_type in supported_layer_types, (\n      f\"For now only {supported_layer_types} layers are supported. \"\n      f\"Found {layer_type} instead.\")\n\n  # Get layer info such as input/output channels, kernel size and quantizers.\n  input_channel = layer.input_shape[-1]\n  output_channel = layer.output_shape[-1]\n\n  # Change default kernel_size to 1 to represent Dense Layer with Conv Layers.\n  kernel_height, kernel_width = layer.kernel_size if hasattr(\n      layer, \"kernel_size\") else (1, 1)\n\n  layer_dict = {\n      \"layer_type\": layer_type,\n      \"input_channel\": input_channel,\n      \"output_channel\": output_channel,\n      \"kernel_height\": kernel_height,\n      \"kernel_width\": kernel_width\n  }\n  return layer_dict.get(attr_name, None)\n\n\ndef is_upsampled(layer: tf.keras.layers.Layer):\n  # Evaluate if a given layer is doing upsampling.\n  return \"UpSampling\" in layer.__class__.__name__\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/__init__.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Export quantizer package.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom .accumulator_factory import AccumulatorFactory\nfrom .multiplier_factory import MultiplierFactory\nfrom .multiplier_impl import IMultiplier, FloatingPointMultiplier, FixedPointMultiplier, Mux, AndGate, Adder, XorGate, Shifter\nfrom .accumulator_impl import IAccumulator, FloatingPointAccumulator, FixedPointAccumulator\nfrom .quantizer_impl import IQuantizer, QuantizedBits, Binary, QuantizedRelu, Ternary, FloatingPoint, PowerOfTwo, ReluPowerOfTwo\nfrom .quantizer_factory import QuantizerFactory\nfrom .qbn_factory import QBNFactory\nfrom .fused_bn_factory import FusedBNFactory\nfrom .merge_factory import MergeFactory\nfrom .divider_factory import IDivider\nfrom .subtractor_factory import ISubtractor\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/accumulator_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Create accumulator quantizers.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\n\nfrom qkeras.qtools.quantized_operators import accumulator_impl\nfrom qkeras.qtools.quantized_operators import multiplier_impl\n\n\nclass AccumulatorFactory:\n  \"\"\"interface for accumulator type.\"\"\"\n\n  def make_accumulator(\n      self, kernel_shape,\n      multiplier: multiplier_impl.IMultiplier,\n      use_bias=True\n  ) -> accumulator_impl.IAccumulator:\n    \"\"\"Create an accumulator instance.\"\"\"\n\n    # Creates a local deep copy so that any changes we made to the multiplier\n    # will not impact the input multiplier type. This is necessary in case\n    # we call this function multiple times to get different multipliers.\n    local_multiplier = copy.deepcopy(multiplier)\n\n    # The type and bit width of the accumulator is deteremined from the\n    # multiplier implementation, and the shape of both kernel and bias\n\n    if local_multiplier.output.is_floating_point:\n      accumulator = accumulator_impl.FloatingPointAccumulator(\n          local_multiplier)\n\n    # po2*po2 is implemented as Adder; output type is po2\n    # in multiplier, po2 needs to be converted to FixedPoint\n    elif local_multiplier.output.is_po2:\n      accumulator = accumulator_impl.Po2Accumulator(\n          kernel_shape, local_multiplier, use_bias)\n\n    # fixed point\n    else:\n      accumulator = accumulator_impl.FixedPointAccumulator(\n          kernel_shape, local_multiplier, use_bias)\n\n    return accumulator\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/accumulator_impl.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Accumulator operation implementation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nfrom absl import logging\nimport numpy as np\n\nfrom qkeras.qtools.quantized_operators import multiplier_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\ndef po2_to_qbits(quantizer: quantizer_impl.IQuantizer):\n  \"\"\"convert po2 type to qbits type.\"\"\"\n\n  (min_exp, max_exp) = quantizer.get_min_max_exp()\n  # min_exp is number of bits needed on the right in qbits\n  # max_exp is number of bits needed on the left in qbits\n  unsigned_bits = min_exp + max_exp\n  int_bits = max_exp\n  sign_bit = quantizer.is_signed\n  bits = sign_bit + unsigned_bits\n\n  return (int(bits), int(int_bits))\n\n\nclass IAccumulator(abc.ABC):\n  \"\"\"abstract class for accumulator.\"\"\"\n\n  @staticmethod\n  @abc.abstractmethod\n  def implemented_as():\n    pass\n\n\nclass FloatingPointAccumulator(IAccumulator):\n  \"\"\"class for floating point accumulator.\"\"\"\n\n  def __init__(\n      self,\n      multiplier: multiplier_impl.IMultiplier\n  ):\n    super().__init__()\n\n    self.multiplier = multiplier\n    self.output = quantizer_impl.FloatingPoint(\n        bits=self.multiplier.output.bits)\n    self.output.bits = self.multiplier.output.bits\n    self.output.int_bits = -1\n    self.output.is_signed = self.multiplier.output.is_signed\n    self.output.is_floating_point = True\n    self.output.op_type = \"accumulator\"\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass FixedPointAccumulator(IAccumulator):\n  \"\"\"class for fixed point accumulator.\"\"\"\n\n  def __init__(\n      self,\n      kernel_shape,\n      multiplier: multiplier_impl.IMultiplier,\n      use_bias=True\n  ):\n    super().__init__()\n\n    if len(kernel_shape) not in (\n        2,\n        4,\n    ):\n      logging.fatal(\n          \"unsupported kernel shape, \"\n          \"it is neither a dense kernel of length 2,\"\n          \" nor a convolution kernel of length 4\")\n\n    kernel_shape_excluding_output_dim = kernel_shape[:-1]\n    kernel_add_ops = np.prod(kernel_shape_excluding_output_dim)\n\n    # bias are associate with filters; each filter adds 1 bias\n    bias_add = 1 if use_bias else 0\n\n    add_ops = kernel_add_ops + bias_add\n    self.log_add_ops = int(np.ceil(np.log2(add_ops)))\n\n    self.multiplier = multiplier\n    self.output = quantizer_impl.QuantizedBits()\n    self.output.bits = self.log_add_ops + self.multiplier.output.bits\n    self.output.int_bits = self.log_add_ops + self.multiplier.output.int_bits\n    self.output.is_signed = self.multiplier.output.is_signed\n    self.output.op_type = \"accumulator\"\n\n    assert not self.multiplier.output.is_floating_point\n    self.output.is_floating_point = False\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass Po2Accumulator(FixedPointAccumulator):\n  \"\"\"accumulator for po2.\"\"\"\n\n  # multiplier is po2. multiplier output needs to convert\n  # to Fixedpoint before Accumulator.\n\n  def __init__(\n      self,\n      kernel_shape,\n      multiplier: multiplier_impl.IMultiplier,\n      use_bias=True\n  ):\n    super().__init__(kernel_shape, multiplier, use_bias)\n\n    assert multiplier.output.is_po2\n    # convert multiplier output from po2 to quantized_bits\n    (bits_from_po2multiplier, int_bits_from_po2multiplier) = po2_to_qbits(\n        multiplier.output)\n\n    self.output.bits = self.log_add_ops + int(bits_from_po2multiplier)\n    self.output.int_bits = self.log_add_ops + int(int_bits_from_po2multiplier)\n    self.output.op_type = \"accumulator\"\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/adder_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"implement adder quantizer.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport copy\n\nfrom absl import logging\nfrom qkeras.qtools.quantized_operators import adder_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass IAdder(abc.ABC):\n  \"\"\"abstract class for adder.\"\"\"\n\n  def __init__(self):\n    self.adder_impl_table = [\n        [\n            adder_impl.FixedPointAdder,\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FloatingPointAdder\n        ],\n        [\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.Po2Adder,\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FloatingPointAdder\n        ],\n        [\n            adder_impl.FixedPointAdder,\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FloatingPointAdder\n        ],\n        [\n            adder_impl.FixedPointAdder,\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FloatingPointAdder\n        ],\n        [\n            adder_impl.FixedPointAdder,\n            adder_impl.Po2FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FixedPointAdder,\n            adder_impl.FloatingPointAdder\n        ],\n        [\n            adder_impl.FloatingPointAdder,\n            adder_impl.FloatingPointAdder,\n            adder_impl.FloatingPointAdder,\n            adder_impl.FloatingPointAdder,\n            adder_impl.FloatingPointAdder,\n            adder_impl.FloatingPointAdder\n        ]\n    ]\n\n  def make_quantizer(self, quantizer_1: quantizer_impl.IQuantizer,\n                     quantizer_2: quantizer_impl.IQuantizer):\n    \"\"\"make adder quantizer.\"\"\"\n\n    local_quantizer_1 = copy.deepcopy(quantizer_1)\n    local_quantizer_2 = copy.deepcopy(quantizer_2)\n\n    mode1 = local_quantizer_1.mode\n    mode2 = local_quantizer_2.mode\n\n    adder_impl_class = self.adder_impl_table[mode1][mode2]\n    logging.debug(\n        \"qbn adder implemented as class %s\",\n        adder_impl_class.implemented_as())\n\n    return adder_impl_class(\n        local_quantizer_1,\n        local_quantizer_2\n    )\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/adder_impl.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"adder operation implementation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\n\nfrom qkeras.qtools.quantized_operators import accumulator_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\ndef po2_qbits_converter(po2_quantizer: quantizer_impl.IQuantizer):\n  \"\"\"convert a po2 quantizer to fixedpoint quantizer.\"\"\"\n\n  (bits_from_po2, int_bits_from_po2) = accumulator_impl.po2_to_qbits(\n      po2_quantizer)\n  qbits_quantizer = quantizer_impl.QuantizedBits()\n  qbits_quantizer.bits = bits_from_po2\n  qbits_quantizer.int_bits = int_bits_from_po2\n  qbits_quantizer.is_signed = po2_quantizer.is_signed\n\n  return qbits_quantizer\n\n\nclass IAdderImpl(abc.ABC):\n  \"\"\"abstract class for adder.\"\"\"\n\n  @staticmethod\n  @abc.abstractmethod\n  def implemented_as():\n    pass\n\n\nclass FixedPointAdder(IAdderImpl):\n  \"\"\"adder for fixed point.\"\"\"\n\n  def __init__(self, quantizer_1, quantizer_2):\n    self.output = quantizer_impl.QuantizedBits()\n    self.output.int_bits = max(quantizer_1.int_bits,\n                               quantizer_2.int_bits) + 1\n    fractional_bits1 = (quantizer_1.bits - int(quantizer_1.is_signed)\n                        - quantizer_1.int_bits)\n    fractional_bits2 = (quantizer_2.bits - int(quantizer_2.is_signed)\n                        - quantizer_2.int_bits)\n    fractional_bits = max(fractional_bits1, fractional_bits2)\n    self.output.is_signed = quantizer_1.is_signed | quantizer_2.is_signed\n    self.output.bits = (self.output.int_bits + int(self.output.is_signed) +\n                        fractional_bits)\n    self.output.mode = 0\n    self.output.is_floating_point = False\n    self.output.is_po2 = 0\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass FloatingPointAdder(IAdderImpl):\n  \"\"\"floating point adder.\"\"\"\n\n  def __init__(self, quantizer_1, quantizer_2):\n    bits = max(quantizer_1.bits, quantizer_2.bits)\n    self.output = quantizer_impl.FloatingPoint(\n        bits=bits)\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass Po2FixedPointAdder(IAdderImpl):\n  \"\"\"adder between po2 and fixed point.\"\"\"\n\n  def __init__(self, quantizer_1, quantizer_2):\n\n    if quantizer_1.is_po2:\n      po2_quantizer = quantizer_1\n      fixedpoint_quantizer = quantizer_2\n    else:\n      po2_quantizer = quantizer_2\n      fixedpoint_quantizer = quantizer_1\n\n    # convert po2 to qbits first\n    po2_qbits_quantizer = po2_qbits_converter(po2_quantizer)\n\n    # qbits + qbits -> FixedPointAdder\n    self.output = FixedPointAdder(po2_qbits_quantizer,\n                                  fixedpoint_quantizer).output\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass Po2Adder(IAdderImpl):\n  \"\"\"adder for po2 type.\"\"\"\n\n  def __init__(self, quantizer_1, quantizer_2):\n    qbits_quantizer_1 = po2_qbits_converter(quantizer_1)\n    qbits_quantizer_2 = po2_qbits_converter(quantizer_2)\n    self.output = FixedPointAdder(qbits_quantizer_1,\n                                  qbits_quantizer_2).output\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/divider_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"\"create divider quantizer.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport copy\n\nfrom absl import logging\nfrom qkeras.qtools.quantized_operators import divider_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass UnacceptedQuantizerError(ValueError):\n  pass\n\n\nclass IDivider(abc.ABC):\n  \"\"\"abstract class for divider.\"\"\"\n\n  def __init__(self):\n    # also attached the output datatype in the table\n    self.divider_impl_table = [\n        [\n            # when qbits is denominator, use default bits for float result\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=quantizer_impl.FLOATINGPOINT_BITS)),\n            (divider_impl.Shifter, quantizer_impl.QuantizedBits()),\n            (None, None),\n            (None, None),\n            (None, None),\n            # when bits sets to None, will decide f16/f32 according\n            # to input quantizer\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None))\n        ],\n        [\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=quantizer_impl.FLOATINGPOINT_BITS)),\n            (divider_impl.Subtractor, quantizer_impl.PowerOfTwo()),\n            (None, None),\n            (None, None),\n            (None, None),\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None))\n        ],\n        [\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=quantizer_impl.FLOATINGPOINT_BITS)),\n            (divider_impl.Shifter, quantizer_impl.QuantizedBits()),\n            (None, None),\n            (None, None),\n            (None, None),\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None))\n        ],\n        [\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=quantizer_impl.FLOATINGPOINT_BITS)),\n            (divider_impl.Shifter, quantizer_impl.PowerOfTwo()),\n            (None, None),\n            (None, None),\n            (None, None),\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None))\n        ],\n        [\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=quantizer_impl.FLOATINGPOINT_BITS)),\n            (divider_impl.Shifter, quantizer_impl.PowerOfTwo()),\n            (None, None),\n            (None, None),\n            (None, None),\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None))\n        ],\n        [\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None)),\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None)),\n            (None, None),\n            (None, None),\n            (None, None),\n            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(\n                bits=None))\n        ]\n    ]\n\n  def make_quantizer(self, numerator_quantizer: quantizer_impl.IQuantizer,\n                     denominator_quantizer: quantizer_impl.IQuantizer):\n    \"\"\"make the quantizer.\"\"\"\n\n    # Create a local copy so that the changes made here won't change the input\n    local_numerator_quantizer = copy.deepcopy(numerator_quantizer)\n    local_denominator_quantizer = copy.deepcopy(denominator_quantizer)\n\n    mode1 = local_numerator_quantizer.mode\n    mode2 = local_denominator_quantizer.mode\n\n    (divider_impl_class, output_quantizer) = self.divider_impl_table[\n        mode1][mode2]\n\n    local_output_quantizer = copy.deepcopy(output_quantizer)\n\n    if divider_impl_class is None:\n      raise UnacceptedQuantizerError(\n          \"denominator quantizer {} not accepted!\".format(\n              denominator_quantizer.name))\n\n    logging.debug(\n        \"qbn adder implemented as class %s\",\n        divider_impl_class.implemented_as())\n\n    return divider_impl_class(\n        local_numerator_quantizer,\n        local_denominator_quantizer,\n        local_output_quantizer\n    )\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/divider_impl.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Divider operation implementation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport numpy as np\n\n\nclass IDividerImpl(abc.ABC):\n  \"\"\"abstract class for divider.\"\"\"\n\n  def __init__(self, numerator_quantizer, denominator_quantizer,\n               output_quantizer):\n    self.numerator_quantizier = numerator_quantizer\n    self.denominator_quantizer = denominator_quantizer\n    self.output = output_quantizer\n\n  @staticmethod\n  @abc.abstractmethod\n  def implemented_as():\n    pass\n\n\nclass FloatingPointDivider(IDividerImpl):\n  \"\"\"floating point divider.\"\"\"\n\n  def __init__(self, numerator_quantizer, denominator_quantizer,\n               output_quantizer):\n\n    super().__init__(numerator_quantizer, denominator_quantizer,\n                     output_quantizer)\n    if self.output.bits is None:\n      # decide f16/f32 according to numerator/denominator type\n      bits = 0\n      if numerator_quantizer.is_floating_point:\n        bits = max(bits, numerator_quantizer.bits)\n      if denominator_quantizer.is_floating_point:\n        bits = max(bits, denominator_quantizer.bits)\n\n      self.output.bits = bits\n\n    self.gate_bits = self.output.bits\n    self.gate_factor = 1\n\n  @staticmethod\n  def implemented_as():\n    # TODO(lishanok): change cost from \"mul\" to \"divide\"\n    return \"mul\"\n\n\nclass Shifter(IDividerImpl):\n  \"\"\"shifter type.\"\"\"\n\n  # other_datatype/po2\n  def __init__(self, numerator_quantizer, denominator_quantizer,\n               output_quantizer):\n    super().__init__(numerator_quantizer, denominator_quantizer,\n                     output_quantizer)\n\n    qbit_quantizer = numerator_quantizer\n    po2_quantizer = denominator_quantizer\n\n    (min_exp, max_exp) = po2_quantizer.get_min_max_exp()\n\n    # since it's a divider, min_exp and max_exp swap\n    # for calculating right and left shift\n    tmp = min_exp\n    min_exp = max_exp\n    max_exp = tmp\n\n    qbits_bits = qbit_quantizer.bits\n    qbits_int_bits = qbit_quantizer.int_bits\n\n    self.output.bits = int(qbits_bits + max_exp + min_exp)\n    if (not qbit_quantizer.is_signed) and po2_quantizer.is_signed:\n      # if qbit is signed, qbits_bits already has the sign_bit,\n      # no need to +1,\n      # if qbit is un_signed, po2 is unsigned, no need to +1\n      # if qbit is un_signed, po2 is signed, min_exp and max_exp\n      # didnot include sign_bit,\n      # therefore need to +1\n      self.output.bits += 1\n\n    self.output.int_bits = int(qbits_int_bits + max_exp)\n    self.output.is_signed = qbit_quantizer.is_signed |\\\n                            po2_quantizer.is_signed\n    self.output.is_floating_point = False\n\n    if po2_quantizer.inference_value_counts > 0:\n      # during qbn inference, count number of unique values\n      self.gate_factor = po2_quantizer.inference_value_counts * 0.3\n      self.gate_bits = qbits_bits\n    else:\n      # programmable shifter, similar to sum gate\n      self.gate_factor = 1\n      b = np.sqrt(2 ** po2_quantizer.bits * qbits_bits)\n      self.gate_bits = b * np.log10(b)\n\n  @staticmethod\n  def implemented_as():\n    return \"shifter\"\n\n\nclass Subtractor(IDividerImpl):\n  \"\"\"subtractor quantizer.\"\"\"\n\n  # subtractor is only possible when numerator and denominator\n  # are both po2 quantizers.\n\n  def __init__(self, numerator_quantizer, denominator_quantizer,\n               output_quantizer):\n    super().__init__(numerator_quantizer, denominator_quantizer,\n                     output_quantizer)\n\n    self.output.bits = max(numerator_quantizer.bits,\n                           denominator_quantizer.bits) + 1\n    self.output.int_bits = max(numerator_quantizer.int_bits,\n                               denominator_quantizer.int_bits) + 1\n    self.output.is_signed = 1\n    self.output.is_floating_point = False\n    self.output.is_po2 = 1\n\n    if (numerator_quantizer.max_val_po2 == -1 or\n        denominator_quantizer.max_val_po2 == -1):\n      self.output.max_val_po2 = -1\n    else:\n      # Adder is two po2_value multiply with each other\n      self.output.max_val_po2 = numerator_quantizer.max_val_po2 /\\\n                                denominator_quantizer.max_val_po2\n\n    if \"po2\" in output_quantizer.name:\n      # po2 * po2\n      if self.output.is_signed:\n        output_quantizer.name = \"quantized_po2\"\n      else:\n        output_quantizer.name = \"quantized_relu_po2\"\n\n    self.gate_bits = self.output.bits\n    self.gate_factor = 1\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/fused_bn_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"quantized batch normliaztion quantizer implementation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\nimport math\n\nimport numpy as np\n\nfrom qkeras import base_quantizer\nfrom qkeras.qtools import qtools_util\nfrom qkeras.qtools.quantized_operators import adder_factory\nfrom qkeras.qtools.quantized_operators import divider_factory\nfrom qkeras.qtools.quantized_operators import multiplier_factory\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\nclass FusedBNFactory:\n  \"\"\"determine which quantizer implementation to use.\n\n  Create an fused bn instance. The type and bit width of the output_quantizer\n  is deteremined from both the previous layer and batchnorm weight types:\n\n  z = bn(y) = bn_inv * x - fused_bias is the output of the previous\n  layer and the following bn layer, with:\n    bn_inv = gamma * rsqrt(variance^2+epsilon) is computed from the\n      bn layer weights with inverse_quantizer datatype\n    x is the previous layer's output\n    fused_bias = bn_inv * bias + beta - bn_inv*mean where bias is\n      the bias term from the previous layer, beta and mean are the bn\n      layer weights.\n  \"\"\"\n\n  def make_quantizer(\n      self,\n      prev_output_quantizer: quantizer_impl.IQuantizer,\n      beta_quantizer: quantizer_impl.IQuantizer,\n      mean_quantizer: quantizer_impl.IQuantizer,\n      inverse_quantizer: quantizer_impl.IQuantizer,\n      prev_bias_quantizer: quantizer_impl.IQuantizer,\n      use_beta: bool,\n      use_bias: bool,\n      qkeras_inverse_quantizer: base_quantizer.BaseQuantizer,\n  ):\n    \"\"\"Makes a fused_bn quantizer.\n\n    Args:\n      prev_output_quantizer: IQuantizer type. Previous layer output quantizer\n      beta_quantizer: IQuantizer type. bn layer beta quantizer\n      mean_quantizer: IQuantizer type.  layer mean quantizer\n      inverse_quantizer: IQuantizer type. bn layer inverse quantizer\n      prev_bias_quantizer: IQuantizer type. conv layer bias quantizer\n      use_beta: Bool. whether enabling beta in batch_normalization layer\n      use_bias: Bool. Whether bias is used in conv layer.\n      qkeras_inverse_quantizer: QKeras quantizer type. bn layer inverse\n        quantizer with QKeras quantizer type\n    Returns:\n      None\n    \"\"\"\n\n    assert not isinstance(inverse_quantizer, quantizer_impl.FloatingPoint), (\n        \"inverse_quantizer in batchnorm layer has to be set for \"\n        \"fused bn inference in hardware!\")\n\n    # bn_inv * x\n    multiplier_instance = multiplier_factory.MultiplierFactory()\n    multiplier_x = multiplier_instance.make_multiplier(\n        inverse_quantizer, prev_output_quantizer)\n\n    qtools_util.adjust_multiplier_for_auto_po2(\n        multiplier_x, qkeras_inverse_quantizer)\n\n    # fused_bias = bn_inv * bias + beta - bn_inv*mean\n    # This step derives the datatype for bn_inv * mean\n    multiplier_mean = multiplier_instance.make_multiplier(\n        inverse_quantizer, mean_quantizer)\n\n    qtools_util.adjust_multiplier_for_auto_po2(\n        multiplier_mean, qkeras_inverse_quantizer)\n\n    adder_instance = adder_factory.IAdder()\n    if use_bias:\n      # Derives datatype of bn_inv*bias\n      multiplier_bias = multiplier_instance.make_multiplier(\n          inverse_quantizer, prev_bias_quantizer)\n\n      qtools_util.adjust_multiplier_for_auto_po2(\n          multiplier_bias, qkeras_inverse_quantizer)\n\n      # Derives datatype of bn_inv*bias - bn_inv*mean\n      adder_1 = adder_instance.make_quantizer(\n          multiplier_bias.output, multiplier_mean.output)\n    else:\n      # There is no bias from the previous layer,\n      # therefore datatype of bn_inv*bias - bn_inv*mean is the same\n      # as bn_inv*mean\n      adder_1 = multiplier_mean\n\n    if use_beta:\n      # Derives datatype of fused_bias = bn_inv * bias + beta - bn_inv*mean\n      adder_bias = adder_instance.make_quantizer(\n          adder_1.output, beta_quantizer)\n    else:\n      # Since beta is not used, fused_bias = bn_inv * bias - bn_inv*mean\n      adder_bias = adder_1\n\n    # bn_inv * x - fused_bias\n    adder = adder_instance.make_quantizer(\n        multiplier_x.output, adder_bias.output)\n    self.internal_accumulator = adder\n    self.internal_output = adder\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/merge_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"create merge layer output quantizers.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\n\nfrom qkeras.qtools.quantized_operators import adder_impl\nfrom qkeras.qtools.quantized_operators import multiplier_factory\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass MergeFactory:\n  \"\"\"determine which merge implementation to use.\"\"\"\n\n  def make_quantizer(self, input_qe_list, layer_type):\n    \"\"\"make quantier.\"\"\"\n\n    if layer_type == \"Add\":\n      return Add(input_qe_list)\n    elif layer_type == \"Multiply\":\n      return Multiply(input_qe_list)\n    elif layer_type == \"Maximum\":\n      return Maximum(input_qe_list)\n    elif layer_type == \"Minimum\":\n      return Minimum(input_qe_list)\n    elif layer_type == \"Average\":\n      return Average(input_qe_list)\n    elif layer_type == \"Concatenate\":\n      return Concatenate(input_qe_list)\n    elif layer_type == \"Dot\":\n      return Dot(input_qe_list)\n\n\nclass IMerger(abc.ABC):\n  \"\"\"abstract class for merge quantizer.\"\"\"\n\n  def __init__(self, input_qe_list):\n    self.input_quantizers = []\n    self.edges = []\n\n    for node in input_qe_list:\n      self.input_quantizers.append(node[0])\n      self.edges.append(node[1])\n\n\nclass Add(IMerger):\n  \"\"\"add a list of inputs.\"\"\"\n\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n\n  def __init__(self, input_qe_list):\n    super().__init__(input_qe_list)\n\n    max_bits = -1\n    max_int_bits = -1\n    is_signed = False\n\n    bits = 0\n    is_floating_point = False\n    for quantizer in self.input_quantizers:\n      if quantizer.is_floating_point:\n        is_floating_point = True\n        bits = max(bits, quantizer.bits)\n      else:\n        if quantizer.is_po2:\n          qbits_quantizer = adder_impl.po2_qbits_converter(\n              quantizer)\n        else:\n          qbits_quantizer = quantizer\n\n        if qbits_quantizer.bits > max_bits:\n          max_bits = qbits_quantizer.bits\n\n        if qbits_quantizer.int_bits > max_int_bits:\n          max_int_bits = qbits_quantizer.int_bits\n\n      is_signed |= quantizer.is_signed\n\n    if is_floating_point:\n      self.output = quantizer_impl.FloatingPoint(\n          bits=bits)\n    else:\n      self.output = quantizer_impl.QuantizedBits()\n      self.output.bits = max_bits + 1\n      self.output.int_bits = max_int_bits + 1\n      self.output.is_signed = is_signed\n      self.output.mode = 0\n      self.output.is_floating_point = False\n      self.output.is_po2 = 0\n\n    self.gate_factor = 1\n    self.gate_bits = self.output.bits\n\n  def implemented_as(self):\n    return \"add\"\n\n\nclass Multiply(IMerger):\n  \"\"\"multiplies (element-wise) a list of inputs.\"\"\"\n\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n\n  def  __init__(self, input_qe_list):\n    super().__init__(input_qe_list)\n    multiplier_instance = multiplier_factory.MultiplierFactory()\n\n    quantizer = self.input_quantizers[0]\n    for cur in self.input_quantizers[1:]:\n      tmp = multiplier_instance.make_multiplier(quantizer, cur)\n      quantizer = tmp.output\n\n    self.output = quantizer\n\n    # TODO(lishanok): only use the last multiplier here\n    self.impl_class = tmp\n    self.gate_factor = tmp.gate_factor\n    self.gate_bits = tmp.gate_bits\n\n  def implemented_as(self):\n    return self.impl_class.implemented_as()\n\n\nclass Maximum(IMerger):\n  \"\"\"maximum of a list of inputs.\"\"\"\n\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n\n  def __init__(self, input_qe_list):\n    super().__init__(input_qe_list)\n\n    is_same = True\n    is_floating_point = False\n    bits = 0\n\n    quantizer = self.input_quantizers[0]\n    for cur in self.input_quantizers[1:]:\n      if (quantizer.name != cur.name or quantizer.bits != cur.bits or\n          quantizer.int_bits != cur.int_bits or\n          quantizer.is_signed != cur.is_signed):\n        is_same = False\n        break\n\n    if is_same:\n      self.output = quantizer\n    else:\n      max_bits = -1\n      max_int_bits = -1\n      is_signed = False\n      for quantizer in self.input_quantizers:\n        if quantizer.is_floating_point:\n          is_floating_point = True\n          bits = max(bits, quantizer.bits)\n        else:\n          if quantizer.is_po2:\n            qbits_quantizer = adder_impl.po2_qbits_converter(\n                quantizer)\n          else:\n            qbits_quantizer = quantizer\n\n          if qbits_quantizer.bits > max_bits:\n            max_bits = qbits_quantizer.bits\n\n          if qbits_quantizer.int_bits > max_int_bits:\n            max_int_bits = qbits_quantizer.int_bits\n\n        is_signed |= quantizer.is_signed\n\n      if is_floating_point:\n        self.output = quantizer_impl.FloatingPoint(\n            bits=bits)\n      else:\n        self.output = quantizer_impl.QuantizedBits()\n        self.output.bits = max_bits\n        self.output.int_bits = max_int_bits\n        self.output.is_signed = is_signed\n        self.output.mode = 0\n        self.output.is_floating_point = False\n        self.output.is_po2 = 0\n\n    self.gate_factor = 0.2\n    self.gate_bits = self.output.bits\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass Minimum(Maximum):\n  \"\"\"minimum (element-wise) a list of inputs.\"\"\"\n\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n  pass\n\n\nclass Average(Maximum):\n  \"\"\"average (element-wise) a list of inputs.\"\"\"\n\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n  def __init__(self, input_qe_list):\n    super().__init__(input_qe_list)\n\n    self.gate_factor = 1\n    self.gate_bits = self.output.bits\n\n\nclass Concatenate(Maximum):\n  \"\"\"Layer that concatenates a list of inputs.\"\"\"\n\n  # It takes as input a list of tensors, all of the same\n  # shape except for the concatenation axis, and returns\n  # a single tensor, the concatenation of all inputs..\n  def __init__(self, input_qe_list):\n    super().__init__(input_qe_list)\n\n    self.gate_factor = 0\n    self.gate_bits = self.output.bits\n\n\n# TODO(lishanok): finish DOT ndimension tensor logic\nclass Dot(IMerger):\n  \"\"\"dot product between samples in two tensors.\"\"\"\n\n  # E.g. if applied to a list of two tensors a and b\n  # of shape (batch_size, n), the\n  # output will be a tensor of shape (batch_size, 1)\n  # where each entry i will be\\\n  # the dot product between a[i] and b[i].\n\n  pass\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/multiplier_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Create multiplier quantizer.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom absl import logging\nimport copy\n\nfrom qkeras.qtools.quantized_operators import multiplier_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass MultiplierFactory:\n  \"\"\"determine which multiplier implementation to use.\"\"\"\n\n  def __init__(self):\n    # the table below is found in this slides:\n    # https://docs.google.com/presentation/d/1pcmoB6ZpX0IqjhSwgzO-oQwpMRYwIcDe/edit#slide=id.p40\n    # also attached the output datatype in the table\n    self.multiplier_impl_table = [\n        [\n            (\n                multiplier_impl.FixedPointMultiplier,\n                quantizer_impl.QuantizedBits()\n            ),\n            (multiplier_impl.Shifter, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.AndGate, quantizer_impl.QuantizedBits()),\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(\n                    bits=None)\n            )\n        ],\n        [\n            (multiplier_impl.Shifter, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.Adder, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.AndGate, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.FloatingPointMultiplier,\n             quantizer_impl.FloatingPoint(bits=None)\n            )\n        ],\n        [\n            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.Mux, quantizer_impl.Ternary()),\n            (multiplier_impl.Mux, quantizer_impl.Ternary()),\n            (multiplier_impl.AndGate, quantizer_impl.Ternary()),\n            (multiplier_impl.FloatingPointMultiplier,\n             quantizer_impl.FloatingPoint(bits=None))\n        ],\n        [\n            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.Mux, quantizer_impl.Ternary()),\n            (multiplier_impl.XorGate, quantizer_impl.Binary(\n                use_01=False)),\n            (multiplier_impl.AndGate, quantizer_impl.Ternary()),\n            (multiplier_impl.FloatingPointMultiplier,\n             quantizer_impl.FloatingPoint(bits=None))\n        ],\n        [\n            (multiplier_impl.AndGate, quantizer_impl.QuantizedBits()),\n            (multiplier_impl.AndGate, quantizer_impl.PowerOfTwo()),\n            (multiplier_impl.AndGate, quantizer_impl.Ternary()),\n            (multiplier_impl.AndGate, quantizer_impl.Ternary()),\n            (multiplier_impl.AndGate, quantizer_impl.Binary(\n                use_01=True)),\n            (multiplier_impl.FloatingPointMultiplier,\n             quantizer_impl.FloatingPoint(bits=None))\n        ],\n        [\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(bits=None)\n            ),\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(bits=None)\n            ),\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(bits=None)\n            ),\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(bits=None)\n            ),\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(bits=None)\n            ),\n            (\n                multiplier_impl.FloatingPointMultiplier,\n                quantizer_impl.FloatingPoint(bits=None)\n            )\n        ]\n    ]\n\n  def make_multiplier(\n      self, weight_quantizer: quantizer_impl.IQuantizer,\n      input_quantizer: quantizer_impl.IQuantizer\n  ) -> multiplier_impl.IMultiplier:\n    \"\"\"Create a multiplier instance.\n\n    The type and bit width of the multiplier is deteremined from the\n    quantizer type of both the kernel (weight) and input tensor.\n\n    The table below illustrates the rule of inferring multiplier type from the\n    quantizer type of both the kernel (weight) and input tensor\n\n                                        x\n                      qb(n)   +/-,exp  t(-1,0,+1) b(-1,+1) b(0,1) float32\n        qb(n)            *     << >>,-     ?,-       ?,-       ?\n        +/-,exp        << >>,-   +         ?,-        ^      ?,-\n      w t(-1,0,+1)      ?,-     ?,-        ?,^       ?,^      ^\n        b(-1,+1)        ?,-      ^         ?,^        ^       ^\n        b(0,1)           ?      ?,-         ^         ^       ^      &\n        float32\n\n    Args:\n      weight_quantizer: weight quantizer type\n      input_quantizer: input quantizer type\n\n    Returns:\n      An IMultiplier instance.\n    \"\"\"\n\n    assert weight_quantizer is not None\n    assert input_quantizer is not None\n\n    (multiplier_impl_class, output_quantizer) = self.multiplier_impl_table[\n        weight_quantizer.mode][input_quantizer.mode]\n\n    # Need to create local copies becuase different multiplier instances\n    # created from the factory might make changes to these quantizers.\n\n    local_weight_quantizer = copy.deepcopy(weight_quantizer)\n    local_input_quantizer = copy.deepcopy(input_quantizer)\n    local_output_quantizer = copy.deepcopy(output_quantizer)\n    logging.debug(\n        \"multiplier implemented as class %s\",\n        multiplier_impl_class.implemented_as())\n\n    assert issubclass(multiplier_impl_class, multiplier_impl.IMultiplier)\n\n    return multiplier_impl_class(\n        local_weight_quantizer,\n        local_input_quantizer,\n        local_output_quantizer\n    )\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/multiplier_impl.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"multiplier operation implementations.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport numpy as np\n\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass IMultiplier(abc.ABC):\n  \"\"\"abstract class for multiplier.\n\n  This class is about how multiplier is implemented in hardware, which can be\n     mux gate, shifter, adder, etc.\n  \"\"\"\n\n  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,\n               input_quantizer: quantizer_impl.IQuantizer,\n               output_quantizer: quantizer_impl.IQuantizer):\n    self.input = input_quantizer\n    self.weights = weight_quantizer\n    self.output = output_quantizer\n    self.output.op_type = \"multiplier\"\n\n  @staticmethod\n  @abc.abstractmethod\n  def implemented_as():\n    pass\n\n  def name(self) -> str:\n    return self.output.name\n\n  def output_quantizer(self):\n    return self.output\n\n\ndef assert_neither_input_and_weights_is_floating_point(\n    multiplier: IMultiplier):\n  \"\"\"assert non float type.\"\"\"\n\n  assert not multiplier.input.is_floating_point\n  assert not multiplier.weights.is_floating_point\n\n\nclass Mux(IMultiplier):\n  \"\"\"Use mux for the hardware implementation of multiplier.\"\"\"\n\n  # binary(1,-1)/ternary * other_datatype\n  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,\n               input_quantizer: quantizer_impl.IQuantizer,\n               output_quantizer: quantizer_impl.IQuantizer):\n    super().__init__(weight_quantizer, input_quantizer,\n                     output_quantizer)\n    self.output.is_signed = self.input.is_signed | self.weights.is_signed\n\n    if any(s in weight_quantizer.name for s in [\"binary\", \"ternary\"]):\n      self.output.bits = input_quantizer.bits\n      self.output.int_bits = input_quantizer.int_bits\n      if not input_quantizer.is_signed and weight_quantizer.is_signed:\n        self.output.bits += 1\n\n      # multiplier factor for gate counts\n      # gate_factor is the relative energy of given gate comparing\n      # to an Add gate, giving that Add gate is 1\n      if \"binary\" in weight_quantizer.name:\n        self.gate_factor = 0.3\n      else:\n        self.gate_factor = 2 * 0.3\n      self.gate_bits = input_quantizer.bits\n\n    else:\n      self.output.bits = weight_quantizer.bits\n      self.output.int_bits = weight_quantizer.int_bits\n      if not weight_quantizer.is_signed and input_quantizer.is_signed:\n        self.output.bits += 1\n\n      # multiplier factor for gate counts\n      if input_quantizer.name == \"binary\":\n        self.gate_factor = 0.3\n      else:\n        self.gate_factor = 2 * 0.3\n      self.gate_bits = weight_quantizer.bits\n\n    if \"po2\" in output_quantizer.name:\n      if self.output.is_signed:\n        output_quantizer.name = \"quantized_po2\"\n      else:\n        output_quantizer.name = \"quantized_relu_po2\"\n\n      if \"po2\" in weight_quantizer.name:\n        self.output.max_val_po2 = weight_quantizer.max_val_po2\n      else:\n        self.output.max_val_po2 = input_quantizer.max_val_po2\n\n      self.output.int_bits = self.output.bits\n\n  @staticmethod\n  def implemented_as():\n    return \"mux\"\n\n\nclass XorGate(IMultiplier):\n  \"\"\"Use XorGate for hardware implementation of a multiplier.\"\"\"\n\n  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,\n               input_quantizer: quantizer_impl.IQuantizer,\n               output_quantizer: quantizer_impl.IQuantizer):\n    super().__init__(weight_quantizer, input_quantizer, output_quantizer)\n\n    if output_quantizer.name != \"ternary\":\n      self.output.bits = max(self.input.bits, self.weights.bits)\n      self.output.int_bits = max(self.input.int_bits, self.weights.int_bits)\n      self.output.is_signed = self.input.is_signed | self.weights.is_signed\n      assert_neither_input_and_weights_is_floating_point(self)\n      self.output.is_floating_point = False\n\n    self.gate_factor = 0.3\n    self.gate_bits = 1\n\n  @staticmethod\n  def implemented_as():\n    return \"xor\"\n\n\nclass Shifter(IMultiplier):\n  \"\"\"shifter gate.\n\n  po2*qbit is implemented as a shifter. output is qbits type.\n\n  determin number of bits in the output qbits type:\n    1. min_exp in po2: number of bits to be expanded on the\n        right (decimal bits) in qbits\n        for example, min_exp = -2 -> po2 =2^min_exp = 2^(-2) :\n        this means, po2*qbit -> qbit value right shifted for 2 bits\n    2. max_exp in po2: number of bits to be expanded on\n        the left (int_bits) in qbits\n\n  How to calculate min_exp and max_exp:\n    1.if po2 is_signed (quantized_po2)\n      *one bit for sign for the entire po2 value;\n      *exp has non_sign_bits = bits - 1 number of bits,\n      *furthermore, 1 bit from non_sign_bits is used as sign bit in exp;\n      *value range for exp is [-2 ** (non_sign_bits - 1),\n       2 ** (non_sign_bits - 1) - 1]\n    2.if not_signed (quantized_relu_po2)\n      * 0 bit for the entire po2 value\n      * exp has non_sign_bits = bits\n      * rest is the same as above\n\n  determine sign bit in the output qbits:\n    1. qbits no_sign and po2 is_sign: since max_exp and min_exp\n        are computed without sign bit\n       we need to add 1 sign bit to the final result;\n    2. qbits is_sign: since qbits already has a sign bit,\n        no extra sign bit needed\n    3. qbits no_sign and po2 no_sign: no extra sign bit needed\n\n  Attributes:\n    input: input_quantizer\n    weight: weight_quantizer\n    output: output_quantizer\n    gate_factor: relative energy comparing to an Adder\n    gate_bits: number of bits for energy calculation.\n  \"\"\"\n\n  def __init__(\n      self, weight_quantizer: quantizer_impl.IQuantizer,\n      input_quantizer: quantizer_impl.IQuantizer,\n      output_quantizer: quantizer_impl.IQuantizer\n  ):\n    super().__init__(weight_quantizer, input_quantizer, output_quantizer)\n\n    # locate the po2 quantizer\n    mode_w = weight_quantizer.mode\n    if mode_w == 1:\n      po2_quantizer = weight_quantizer\n      qbit_quantizer = input_quantizer\n    else:\n      po2_quantizer = input_quantizer\n      qbit_quantizer = weight_quantizer\n\n    # find min_exp and max_exp of po2 quantizer\n    (min_exp, max_exp) = po2_quantizer.get_min_max_exp()\n    qbits_bits = qbit_quantizer.bits\n    qbits_int_bits = qbit_quantizer.int_bits\n\n    self.output.bits = int(qbits_bits + max_exp + min_exp)\n    if (not qbit_quantizer.is_signed) and po2_quantizer.is_signed:\n      # if qbit is signed, qbits_bits already has the sign_bit, no need to +1\n      # if qbit is un_signed, po2 is unsigned, no need to +1\n      # if qbit is un_signed, po2 is signed, min_exp and max_exp\n      # didnot include sign_bit,\n      # therefore need to +1\n      self.output.bits += 1\n\n    self.output.int_bits = int(qbits_int_bits + max_exp)\n    self.output.is_signed = qbit_quantizer.is_signed | po2_quantizer.is_signed\n\n    assert_neither_input_and_weights_is_floating_point(self)\n    self.output.is_floating_point = False\n\n    if po2_quantizer.inference_value_counts > 0:\n      self.gate_factor = po2_quantizer.inference_value_counts * 0.3\n      self.gate_bits = qbits_bits\n    else:\n      self.gate_factor = 1\n      b = np.sqrt(2 ** po2_quantizer.bits * qbits_bits)\n      self.gate_bits = b * np.log10(b)\n\n  @staticmethod\n  def implemented_as():\n    return \"shifter\"\n\n\nclass AndGate(IMultiplier):\n  \"\"\"and gate implementation.\"\"\"\n\n  # binary(0,1) * any_datatype\n  def __init__(\n      self, weight_quantizer: quantizer_impl.IQuantizer,\n      input_quantizer: quantizer_impl.IQuantizer,\n      output_quantizer: quantizer_impl.IQuantizer\n  ):\n    super().__init__(weight_quantizer, input_quantizer, output_quantizer)\n\n    # if output is ternary, no need for further computation\n    if self.output.name != \"ternary\":\n      self.output.bits = max(self.input.bits, self.weights.bits)\n\n      self.output.is_signed = self.input.is_signed | self.weights.is_signed\n      self.output.is_floating_point = self.input.is_floating_point |\\\n                                      self.weights.is_floating_point\n\n      if weight_quantizer.name == \"binary\" and weight_quantizer.use_01:\n        # binary(0,1) * datatype -> int_bits = datatype.int_bits\n        self.output.int_bits = input_quantizer.int_bits\n      else:\n        self.output.int_bits = weight_quantizer.int_bits\n\n      if \"po2\" in output_quantizer.name:\n        # binary * po2\n        if self.output.is_signed:\n          output_quantizer.name = \"quantized_po2\"\n        else:\n          output_quantizer.name = \"quantized_relu_po2\"\n\n        if \"po2\" in weight_quantizer.name:\n          self.output.max_val_po2 = weight_quantizer.max_val_po2\n        else:\n          self.output.max_val_po2 = input_quantizer.max_val_po2\n\n    self.gate_bits = self.output.bits\n    self.gate_factor = 0.1\n\n  @staticmethod\n  def implemented_as():\n    return \"and\"\n\n\nclass Adder(IMultiplier):\n  \"\"\"adder implementation.\"\"\"\n\n  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,\n               input_quantizer: quantizer_impl.IQuantizer,\n               output_quantizer: quantizer_impl.IQuantizer):\n    super().__init__(weight_quantizer, input_quantizer,\n                     output_quantizer)\n    self.output.bits = max(self.input.bits, self.weights.bits) + 1\n    self.output.int_bits = max(self.input.int_bits,\n                               self.weights.int_bits) + 1\n    self.output.is_signed = self.input.is_signed | self.weights.is_signed\n    assert_neither_input_and_weights_is_floating_point(self)\n    self.output.is_floating_point = False\n    self.output.is_po2 = 1\n\n    if self.input.max_val_po2 == -1 or self.weights.max_val_po2 == -1:\n      self.output.max_val_po2 = -1\n    else:\n      # Adder is two po2_value multiply with each other\n      self.output.max_val_po2 = self.input.max_val_po2 * self.weights.max_val_po2\n\n    if \"po2\" in output_quantizer.name:\n      # po2 * po2\n      if self.output.is_signed:\n        output_quantizer.name = \"quantized_po2\"\n      else:\n        output_quantizer.name = \"quantized_relu_po2\"\n\n    self.gate_bits = self.output.bits\n    self.gate_factor = 1\n\n  @staticmethod\n  def implemented_as():\n    return \"add\"\n\n\nclass FloatingPointMultiplier(IMultiplier):\n  \"\"\"multiplier for floating point.\"\"\"\n\n  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,\n               input_quantizer: quantizer_impl.IQuantizer,\n               output_quantizer: quantizer_impl.IQuantizer):\n    super().__init__(weight_quantizer, input_quantizer,\n                     output_quantizer)\n\n    self.output.bits = max(\n        self.input.bits * self.input.is_floating_point,\n        self.weights.bits * self.weights.is_floating_point,\n    )\n    self.output.int_bits = -1\n    self.output.is_signed = 1\n\n    assert self.input.is_floating_point | self.weights.is_floating_point\n    self.output.is_floating_point = True\n\n    self.gate_factor = 1\n    self.gate_bits = self.output.bits\n\n  @staticmethod\n  def implemented_as():\n    return \"mul\"\n\n\nclass FixedPointMultiplier(IMultiplier):\n  \"\"\"multiplier for fixed point.\"\"\"\n\n  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,\n               input_quantizer: quantizer_impl.IQuantizer,\n               output_quantizer: quantizer_impl.IQuantizer):\n    super().__init__(weight_quantizer, input_quantizer,\n                     output_quantizer)\n\n    # Total int bits is the sum of individual int bits.\n    self.output.int_bits = self.input.int_bits + self.weights.int_bits\n\n    # Total fractional bits is the sum of individual fractional bits\n    fractional_bits1 = (self.input.bits - int(self.input.is_signed)\n                        - self.input.int_bits)\n    fractional_bits2 = (self.weights.bits - int(self.weights.is_signed)\n                        - self.weights.int_bits)\n    fractional_bits = fractional_bits1 + fractional_bits2\n\n    self.output.is_signed = self.input.is_signed | self.weights.is_signed\n\n    # Total bits is the sum of int bits, fractional bits and sign bit\n    self.output.bits = self.output.int_bits + fractional_bits + int(\n        self.output.is_signed)\n\n    assert_neither_input_and_weights_is_floating_point(self)\n    self.output.is_floating_point = False\n\n    self.gate_factor = 1\n    self.gate_bits = np.sqrt(self.input.bits * self.weights.bits)\n\n  @staticmethod\n  def implemented_as():\n    return \"mul\"\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/qbn_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"quantized batch normliaztion quantizer implementation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\nimport math\n\nimport numpy as np\nfrom qkeras.qtools.quantized_operators import adder_factory\nfrom qkeras.qtools.quantized_operators import divider_factory\nfrom qkeras.qtools.quantized_operators import multiplier_factory\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass QBNFactory:\n  \"\"\"determine which quantizer implementation to use.\n\n  Create an qbn instance. The type and bit width of the output_quantizer\n  is deteremined from gamma, beta, mean and variance quantizer\n  y = gamma * (x - mean)/stddev + beta\n  \"\"\"\n\n  def make_quantizer(\n      self, input_quantizer: quantizer_impl.IQuantizer,\n      gamma_quantizer: quantizer_impl.IQuantizer,\n      beta_quantizer: quantizer_impl.IQuantizer,\n      mean_quantizer: quantizer_impl.IQuantizer,\n      variance_quantizer: quantizer_impl.IQuantizer,\n      use_scale,\n      use_center\n  ):\n    \"\"\"make a qbn quantizer.\"\"\"\n\n    self.input_quantizer = input_quantizer\n    self.gamma_quantizer = gamma_quantizer\n    self.beta_quantizer = beta_quantizer\n    self.mean_quantizer = mean_quantizer\n    self.variance_quantizer = variance_quantizer\n    self.use_scale = use_scale\n    self.use_center = use_center\n\n    multiplier = None\n    accumulator = None\n\n    # convert variance po2 quantizer to stddev po2 quantizer\n    stddev_quantizer = copy.deepcopy(variance_quantizer)\n    if stddev_quantizer.is_po2:\n      if variance_quantizer.max_val_po2 >= 0:\n        stddev_quantizer.max_val_po2 = np.round(math.sqrt(\n            variance_quantizer.max_val_po2))\n      else:\n        stddev_quantizer.max_val_po2 = variance_quantizer.max_val_po2\n\n      stddev_quantizer.bits = variance_quantizer.bits - 1\n      stddev_quantizer.int_bits = stddev_quantizer.bits\n\n    divider_instance = divider_factory.IDivider()\n\n    if use_scale:\n      # gamma/var\n      divider = divider_instance.make_quantizer(\n          gamma_quantizer, stddev_quantizer)\n\n      # update the actual number of values in divider quantizer during inference\n      count = -1\n      if gamma_quantizer.is_po2 and gamma_quantizer.inference_value_counts > 0:\n        count = gamma_quantizer.inference_value_counts\n        if stddev_quantizer.is_po2 and stddev_quantizer.inference_value_counts > 0:\n          count *= stddev_quantizer.inference_value_counts\n        else:\n          count = -1\n      if count > 0:\n        divider.output.inference_value_counts = count\n\n      # gamma/var * x\n      multiplier_instance = multiplier_factory.MultiplierFactory()\n      multiplier = multiplier_instance.make_multiplier(\n          divider.output, input_quantizer)\n      accumulator_input = multiplier\n\n    else:\n      # x/var\n      divider = divider_instance.make_quantizer(\n          input_quantizer, stddev_quantizer)\n      accumulator_input = divider\n\n    if use_center:\n      # y = gamma/var * x + beta\n      accumulator_instance = adder_factory.IAdder()\n      accumulator = accumulator_instance.make_quantizer(\n          accumulator_input.output, beta_quantizer)\n      output_q = accumulator\n    else:\n      output_q = accumulator_input\n\n    self.internal_divide_quantizer = divider\n    self.internal_multiplier = multiplier\n    self.internal_accumulator = accumulator\n    self.internal_output = output_q\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/quantizer_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"decides which quantizer implementation to use.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\nfrom qkeras import quantizers\n# from qkeras.google_internals import experimental_quantizers\n# from qkeras.google_internals import experimental_quantizer_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\nfrom qkeras.qtools.settings import cfg\n\n\nclass QuantizerFactory:\n  \"\"\"Convert qkeras quantizer to qtools quantizer type.\"\"\"\n\n  def __init__(self):\n    self.quantizer_lookup = {\n        quantizers.quantized_bits:\n            quantizer_impl.QuantizedBits,\n        quantizers.binary:\n            quantizer_impl.Binary,\n        quantizers.quantized_relu:\n            quantizer_impl.QuantizedRelu,\n        quantizers.ternary:\n            quantizer_impl.Ternary,\n        quantizers.quantized_relu_po2:\n            quantizer_impl.ReluPowerOfTwo,\n        quantizers.quantized_po2:\n            quantizer_impl.PowerOfTwo,\n        quantizers.stochastic_ternary:\n            quantizer_impl.StochasticTernary,\n        quantizers.stochastic_binary:\n            quantizer_impl.StochasticBinary,\n        quantizers.bernoulli:\n            quantizer_impl.Bernoulli,\n        quantizers.quantized_tanh:\n            quantizer_impl.QuantizedTanh,\n        quantizers.quantized_ulaw:\n            quantizer_impl.QuantizedUlaw,\n        # experimental_quantizers.quantized_bits_learnable_scale:\n            # experimental_quantizer_impl.QuantizedBitsLearnableScale,\n        # experimental_quantizers.parametric_quantizer_d_xmax:\n            # experimental_quantizer_impl.ParametricQuantizer,\n\n        # add following quantizer types for the use in GraphUpdateEdge\n        quantizer_impl.QuantizedBits:\n            quantizer_impl.QuantizedBits,\n        quantizer_impl.Binary:\n            quantizer_impl.Binary,\n        quantizer_impl.QuantizedRelu:\n            quantizer_impl.QuantizedRelu,\n        quantizer_impl.Ternary:\n            quantizer_impl.Ternary,\n        quantizer_impl.ReluPowerOfTwo:\n            quantizer_impl.ReluPowerOfTwo,\n        quantizer_impl.PowerOfTwo:\n            quantizer_impl.PowerOfTwo,\n        quantizer_impl.FloatingPoint:\n            quantizer_impl.FloatingPoint,\n        quantizer_impl.StochasticTernary:\n            quantizer_impl.StochasticTernary,\n        quantizer_impl.StochasticBinary:\n            quantizer_impl.StochasticTernary,\n        quantizer_impl.Bernoulli:\n            quantizer_impl.StochasticTernary,\n        quantizer_impl.QuantizedTanh:\n            quantizer_impl.StochasticTernary,\n        quantizer_impl.QuantizedUlaw:\n            quantizer_impl.StochasticTernary,\n        # experimental_quantizer_impl.QuantizedBitsLearnableScale:\n            # experimental_quantizer_impl.QuantizedBitsLearnableScale,\n        #experimental_quantizer_impl.ParametricQuantizer:\n            # experimental_quantizer_impl.ParametricQuantizer,\n    }\n\n    self._default_interm_quantizer = cfg.default_interm_quantizer\n\n  def _make_quantizer_util(self, quantizer) -> quantizer_impl.IQuantizer:\n    \"\"\"make quantizer util function.\"\"\"\n    if quantizer in [\"int8\", \"int16\", \"int32\", \"fp16\", \"fp32\"]:\n      return self.make_default_quantizer(mode=quantizer)\n\n    elif isinstance(quantizer, tuple(self.quantizer_lookup.keys())):\n      quantizer_class = self.quantizer_lookup[type(quantizer)]\n      if quantizer_class == type(quantizer):\n        return self.clone_quantizer(quantizer)\n      else:\n        q = quantizer_class()\n        q.convert_qkeras_quantizer(quantizer)\n        return q\n\n    return None\n\n  def make_quantizer(self, quantizer) -> quantizer_impl.IQuantizer:\n    \"\"\"create quantizer according to input qkeras quantizer.\"\"\"\n\n    q = None\n    if quantizer is not None:\n      q = self._make_quantizer_util(quantizer)\n\n    if q is None:\n      return self.make_default_quantizer(\n          mode=self._default_interm_quantizer)\n\n    return q\n\n  def is_quantizer_supported(self, quantizer) -> bool:\n    if quantizer is None:\n      # if None, will use default quantizer defined in config.json\n      return True\n\n    return isinstance(quantizer, tuple(self.quantizer_lookup.keys()))\n\n  def make_default_quantizer(self, mode) -> quantizer_impl.IQuantizer:\n    \"\"\"make quantizer given qkeras quantizer type.\"\"\"\n    if mode == \"fp32\":\n      return quantizer_impl.FloatingPoint(\n          bits=32)\n    elif mode == \"fp16\":\n      return quantizer_impl.FloatingPoint(\n          bits=16)\n    elif mode == \"int8\":\n      qbits = quantizer_impl.QuantizedBits()\n      qbits.convert_qkeras_quantizer(\n          quantizers.quantized_bits(8, 0, 1))\n      return qbits\n    elif mode == \"int16\":\n      qbits = quantizer_impl.QuantizedBits()\n      qbits.convert_qkeras_quantizer(\n          quantizers.quantized_bits(16, 7, 1))\n      return qbits\n    elif mode == \"int32\":\n      qbits = quantizer_impl.QuantizedBits()\n      qbits.convert_qkeras_quantizer(\n          quantizers.quantized_bits(32, 10, 1))\n      return qbits\n    else:\n      try:\n        # string to quantizer object\n        q_name = \"quantizers.\" + mode\n        qkeras_object = eval(q_name)  # pylint: disable=eval-used\n        return self._make_quantizer_util(qkeras_object)\n      except:  # pylint: disable=bare-except\n        raise ValueError(\"unaccepted quantizer {}!\".format(mode))\n\n  def clone_quantizer(\n      self, quantizer: quantizer_impl.IQuantizer) -> quantizer_impl.IQuantizer:\n    \"\"\"clone the given quantizer.\"\"\"\n    return copy.deepcopy(quantizer)\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/quantizer_impl.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"atomic quantizer implementation.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport math\n\nimport numpy as np\nfrom qkeras import quantizers\n\nFLOATINGPOINT_BITS = 32\n\n\ndef get_np_value(val):\n  if hasattr(val, \"numpy\"):\n    val = val.numpy()\n    if isinstance(val, np.ndarray) and len(val) == 1:\n      return val[0]\n    else:\n      return val\n  else:\n    return val\n\n\ndef get_exp(quantizer):\n  \"\"\"get max/min exp value for relu_po2 or quantized_po2.\"\"\"\n\n  if quantizer.is_signed:\n    non_sign_bits = quantizer.bits - 1\n  else:\n    non_sign_bits = quantizer.bits\n\n  min_exp = -2 ** (non_sign_bits - 1)\n  max_exp_orig = 2 ** (non_sign_bits - 1) - 1\n\n  max_exp = max_exp_orig\n  # max_value caps how many int_bits actually allowed\n  if quantizer.max_val_po2 != -1:\n    if quantizer.max_val_po2 <= 0:\n      max_exp = 0\n    else:\n      max_exp = math.ceil(np.log2(quantizer.max_val_po2))\n      max_exp = min(max_exp, max_exp_orig)\n\n  # if max_exp<0. no need to expand int_bits\n  max_exp = max(0, max_exp)\n\n  return (-min_exp, max_exp)\n\n\nclass IQuantizer(abc.ABC):\n  \"\"\"abstract class for quantizer.\"\"\"\n\n  def __init__(self):\n    self.mode = -1\n    self.bits = -1\n    self.int_bits = -1\n    self.is_signed = 0\n    self.is_floating_point = False\n    self.max_val_po2 = -1\n    self.is_po2 = 0\n    self.name = None\n    self.op_type = \"quantizer\"\n\n\nclass QuantizedBits(IQuantizer):\n  \"\"\"quantized bits.\n\n  Attributes:\n    mode: index of the current quantizer in\n          MultiplierFactory.multiplier_impl_table\n    bits: total bits\n    int_bits: integer bits\n    is_signed: if a signed number\n    name: quantizer name\n  \"\"\"\n\n  def __init__(self):\n    super().__init__()\n    self.mode = 0\n    self.is_signed = 1\n    self.name = \"quantized_bits\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.quantized_bits):\n    self.mode = 0\n    self.bits = quantizer.bits\n    self.int_bits = get_np_value(quantizer.integer)\n    self.is_signed = quantizer.keep_negative\n\n  def convert_to_qkeras_quantizer(\n      self, symmetric=1, alpha=None, use_stochastic_rounding=False,\n      scale_axis=None, qnoise_factor=1.0, elements_per_scale=None,\n      min_po2_exponent=None, max_po2_exponent=None):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.quantized_bits(\n        bits=self.bits, integer=self.int_bits, keep_negative=self.is_signed,\n        symmetric=symmetric, alpha=alpha,\n        use_stochastic_rounding=use_stochastic_rounding,\n        scale_axis=scale_axis, qnoise_factor=qnoise_factor,\n        elements_per_scale=elements_per_scale,\n        min_po2_exponent=min_po2_exponent, max_po2_exponent=max_po2_exponent)\n\n\nclass QuantizedTanh(QuantizedBits):\n  \"\"\"same as quantized bits.\"\"\"\n\n  def __init__(self):\n    super().__init__()\n    self.name = \"quantized_tanh\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.quantized_tanh):\n    self.mode = 0\n    self.bits = quantizer.bits\n    self.is_signed = 1\n\n  def convert_to_qkeras_quantizer(\n      self, symmetric=False, use_stochastic_rounding=False):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.quantized_tanh(\n        bits=self.bits, use_stochastic_rounding=use_stochastic_rounding,\n        symmetric=symmetric)\n\n\nclass QuantizedUlaw(QuantizedBits):\n  \"\"\"quantized ulaw type.\"\"\"\n\n  # same as quantized bits\n  def __init__(self):\n    super().__init__()\n    self.name = \"quantized_ulaw\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.quantized_ulaw):\n    self.mode = 0\n    self.bits = quantizer.bits\n    self.int_bits = get_np_value(quantizer.integer)\n    self.is_signed = 1\n\n  def convert_to_qkeras_quantizer(self, symmetric=0, u=255.0):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.quantized_ulaw(\n        bits=self.bits, integer=self.int_bits, symmetric=symmetric, u=u)\n\n\nclass Binary(IQuantizer):\n  \"\"\"binary quantizer.\"\"\"\n\n  def __init__(self, use_01=False):\n    super().__init__()\n    if use_01:\n      self.mode = 4\n      self.is_signed = 0\n    else:\n      self.mode = 3\n      self.is_signed = 1\n\n    self.bits = 1\n    self.int_bits = 1\n    self.use_01 = use_01\n    self.name = \"binary\"\n\n  def convert_qkeras_quantizer(self, quantizer: quantizers.binary):\n    if quantizer.use_01:\n      self.mode = 4\n      self.is_signed = 0\n    else:\n      self.mode = 3\n      self.is_signed = 1\n\n    self.use_01 = quantizer.use_01\n\n  def convert_to_qkeras_quantizer(self, alpha=None,\n                                  use_stochastic_rounding=False):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.binary(use_01=self.use_01, alpha=alpha,\n                             use_stochastic_rounding=use_stochastic_rounding)\n\n\nclass StochasticBinary(Binary):\n  \"\"\"stochastic binary quantizer.\"\"\"\n\n  # same as binary(-1, 1)\n  def __init__(self):\n    super().__init__(use_01=False)\n    self.name = \"stochastic_binary\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.stochastic_binary):\n    \"\"\"convert qkeras quantizer to qtools quantizer.\"\"\"\n\n    pass\n\n  def convert_to_qkeras_quantizer(self, alpha=None, temperature=6.0,\n                                  use_real_sigmoid=True):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.stochastic_binary(alpha=alpha, temperature=temperature,\n                                        use_real_sigmoid=use_real_sigmoid)\n\n\nclass Bernoulli(Binary):\n  \"\"\"bernoulli quantizer. same as binary(0, 1).\"\"\"\n\n  def __init__(self):\n    super().__init__(use_01=True)\n    self.name = \"bernoulli\"\n\n  def convert_qkeras_quantizer(self, quantizer: quantizers.bernoulli):\n    pass\n\n  def convert_to_qkeras_quantizer(self, alpha=None, temperature=6.0,\n                                  use_real_sigmoid=True):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.bernoulli(alpha=alpha, temperature=temperature,\n                                use_real_sigmoid=use_real_sigmoid)\n\n\nclass QuantizedRelu(IQuantizer):\n  \"\"\"quantized relu quantizer.\"\"\"\n\n  def __init__(self):\n    super().__init__()\n    self.is_signed = 0\n    self.name = \"quantized_relu\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.quantized_relu):\n    \"\"\"convert from qkeras quantizer.\"\"\"\n\n    bits = quantizer.bits\n    int_bits = get_np_value(quantizer.integer)\n\n    if bits == 1 and int_bits == 1:\n      mode = 4\n    else:\n      mode = 0\n\n    self.mode = mode\n    self.bits = bits\n    self.int_bits = int_bits\n    if hasattr(quantizer, \"negative_slope\") and quantizer.negative_slope != 0:\n      self.is_signed = 1\n\n  def convert_to_qkeras_quantizer(\n      self, use_sigmoid=0, negative_slope=0.0, use_stochastic_rounding=False,\n      relu_upper_bound=None, is_quantized_clip=True, qnoise_factor=1.0):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.quantized_relu(\n        bits=self.bits, integer=self.int_bits, use_sigmoid=use_sigmoid,\n        negative_slope=negative_slope,\n        use_stochastic_rounding=use_stochastic_rounding,\n        relu_upper_bound=relu_upper_bound,\n        is_quantized_clip=is_quantized_clip,\n        qnoise_factor=qnoise_factor)\n\n\nclass Ternary(IQuantizer):\n  \"\"\"ternary(0, 1, -1).\"\"\"\n\n  def __init__(self):\n    super().__init__()\n    self.mode = 2\n    self.bits = 2\n    self.int_bits = 2\n    self.is_signed = 1\n    self.name = \"ternary\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.ternary):\n    pass\n\n  def convert_to_qkeras_quantizer(\n      self, alpha=None, threshold=None, use_stochastic_rounding=False,\n      number_of_unrolls=5):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.ternary(\n        alpha=alpha, threshold=threshold,\n        use_stochastic_rounding=use_stochastic_rounding,\n        number_of_unrolls=number_of_unrolls)\n\n\nclass StochasticTernary(Ternary):\n  \"\"\"stochastic ternary.\"\"\"\n\n  def __init__(self):\n    super().__init__()\n    self.name = \"stochastic_ternary\"\n\n  # same as ternary\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.stochastic_ternary):\n    pass\n\n  def convert_to_qkeras_quantizer(\n      self, alpha=None, threshold=None, temperature=8.0,\n      use_real_sigmoid=True, number_of_unrolls=5):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    return quantizers.stochastic_ternary(\n        alpha=alpha, threshold=threshold, temperature=temperature,\n        use_real_sigmoid=use_real_sigmoid,\n        number_of_unrolls=number_of_unrolls)\n\n\nclass FloatingPoint(IQuantizer):\n  \"\"\"float32.\"\"\"\n\n  def __init__(self, bits):\n    super().__init__()\n    self.mode = 5\n    self.bits = bits\n    self.int_bits = -1\n    self.is_signed = 1\n    self.is_floating_point = True\n    self.name = \"floating_point\"\n\n  def convert_qkeras_quantizer(self, bits):\n    pass\n\n  def convert_to_qkeras_quantizer(self, bits):\n    pass\n\n\nclass PowerOfTwo(IQuantizer):\n  \"\"\"po2.\"\"\"\n\n  def __init__(self, is_signed=True):\n    super().__init__()\n    self.mode = 1\n    self.is_po2 = 1\n    self.is_signed = is_signed\n    self.inference_value_counts = -1\n\n    if is_signed:\n      self.name = \"quantized_po2\"\n    else:\n      self.name = \"quantized_relu_po2\"\n\n  def convert_qkeras_quantizer(self, quantizer):\n    \"\"\"convert qkeras quantizer to qtools quantizer.\"\"\"\n\n    assert \"po2\" in quantizer.__class__.__name__\n\n    if quantizer.__class__.__name__ == \"quantized_po2\":\n      self.is_signed = 1\n      self.name = \"quantized_po2\"\n\n    elif quantizer.__class__.__name__ == \"quantized_relu_po2\":\n      super().__init__()\n      self.is_signed = 0\n      self.name = \"quantized_relu_po2\"\n\n    bits = quantizer.bits\n    max_val_po2 = quantizer.max_value\n    if not max_val_po2:\n      self.max_val_po2 = -1\n    else:\n      self.max_val_po2 = max_val_po2\n    self.bits = bits\n    self.int_bits = bits\n\n  def convert_to_qkeras_quantizer(\n      self, negative_slope=0, use_stochastic_rounding=False,\n      quadratic_approximation=False):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    if self.is_signed:\n      # quantized_po2\n      return quantizers.quantized_po2(\n          bits=self.bits,\n          max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None,\n          use_stochastic_rounding=use_stochastic_rounding,\n          quadratic_approximation=quadratic_approximation)\n    else:\n      # quantized_relu_po2\n      return quantizers.quantized_relu_po2(\n          bits=self.bits,\n          max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None,\n          negative_slope=negative_slope,\n          use_stochastic_rounding=use_stochastic_rounding,\n          quadratic_approximation=quadratic_approximation)\n\n  def get_min_max_exp(self):\n    return get_exp(self)\n\n  def quantizer_bits_calculator(self, val):\n    \"\"\"calculate how many bits needed.\"\"\"\n\n    # calculate how many bits are required to represent a po2 value.\n    # val can be +/- values, can be integer or franctional number.\n    # needs to be dealt seperately.\n\n    sign_bit = val < 0\n\n    # get rid of sign\n    val = abs(val)\n\n    if val == 0:\n      # val of 0 is special case; qkeras uses mininmum\n      # number to represent 0\n      non_sign_bits = self.bits - sign_bit\n    else:\n      exp_value = np.log2(val)\n\n      # exp_value should be integer\n      if abs(np.round(exp_value) - exp_value) > 0:\n        raise ValueError(\"ERROR: {} is not a po2 value!\".format(val))\n\n      exp_value = int(exp_value)\n\n      # for n bits, the range of values it can represent is:\n      # min_val = -2 ** (n - 1)\n      # max_val = 2 ** (n - 1) - 1\n      if exp_value == 0:\n        non_sign_bits = 1\n      elif exp_value > 0:\n        # e.g., 16 needs 5 bits + 1 exp sign bit,\n        # 15 needs 4 bits + 1 exp sign bit\n        non_sign_bits = math.floor(np.log2(exp_value)) + 1 + 1\n      else:\n        # e.g., -16 needs 4 bits + 1 exp sign bit\n        non_sign_bits = math.ceil(np.log2(abs(exp_value))) + 1\n\n    return (sign_bit, non_sign_bits)\n\n  def update_quantizer(self, val, reset=False):\n    \"\"\"update quantizer bits according to the input value.\n\n    Args:\n      val: input value\n      reset: True->disregard current quantizer bits and reset\n        it according to the given value; False-> update the quantizer\n        bits with given value.\n        quantizer.bits = min(existing_bits, bits required by val)\n\n    Returns:\n      Update existing po2 quantizer bits by val.\n       quantizer.bits = min(existing_bits, bits required by val)\n    \"\"\"\n    (sign_bit, non_sign_bits) = self.quantizer_bits_calculator(val)\n\n    if reset:\n      self.bits = sign_bit + non_sign_bits\n    else:\n      # avoid input value exceeding quantizer limit\n      self.bits = min(self.bits, sign_bit + non_sign_bits)\n\n    self.int_bits = self.bits\n    self.max_val_po2 = min(val, self.max_val_po2)\n    self.is_signed = sign_bit\n\n    if sign_bit:\n      self.name = \"quantized_po2\"\n    else:\n      self.name = \"quantized_relu_po2\"\n\n  def update_inference_values(self, weights):\n    \"\"\"find how many different values in weights in the po2 quantizer.\"\"\"\n\n    inference_value_counts = len(set(weights.flatten()))\n    self.inference_value_counts = inference_value_counts\n\n\nclass ReluPowerOfTwo(PowerOfTwo):\n  \"\"\"relu po2.\"\"\"\n\n  def __init__(self):\n    super().__init__()\n    self.mode = 1\n    self.is_po2 = 1\n    self.is_signed = 0\n    self.name = \"quantized_relu_po2\"\n\n  def convert_qkeras_quantizer(\n      self, quantizer: quantizers.quantized_relu_po2):\n\n    self.bits = quantizer.bits\n    self.int_bits = quantizer.bits\n    if not quantizer.max_value:\n      self.max_val_po2 = -1\n    else:\n      self.max_val_po2 = quantizer.max_value\n\n  def convert_to_qkeras_quantizer(\n      self, negative_slope=0, use_stochastic_rounding=False,\n      quadratic_approximation=False):\n    \"\"\"convert qtools quantizer to qkeras quantizer.\"\"\"\n\n    # quantized_relu_po2\n    return quantizers.quantized_relu_po2(\n        bits=self.bits,\n        max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None,\n        negative_slope=negative_slope,\n        use_stochastic_rounding=use_stochastic_rounding,\n        quadratic_approximation=quadratic_approximation)\n"
  },
  {
    "path": "qkeras/qtools/quantized_operators/subtractor_factory.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"\"create subtractor quantizer.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom qkeras.qtools.quantized_operators import adder_factory\nfrom qkeras.qtools.quantized_operators import adder_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\n\n\nclass ISubtractor(adder_factory.IAdder):\n  \"\"\"Create a subtractor instance.\n\n  The methods in subtractor is mostly inherited from adder\n  with a few exceptions.\n  \"\"\"\n\n  def make_quantizer(self, quantizer_1: quantizer_impl.IQuantizer,\n                     quantizer_2: quantizer_impl.IQuantizer):\n    \"\"\"make an ISubtractor instance.\n\n    if quantizer1 and quantizer2 are both non-signed, result should change\n    to signed; else since a sign bit is already present,\n    no need to add extra sign bit\n\n    Args:\n      quantizer_1: first operand\n      quantizer_2: second operand\n\n    Returns:\n      An ISubtractor instance\n    \"\"\"\n    quantizer = super().make_quantizer(quantizer_1, quantizer_2)\n\n    if not isinstance(quantizer, adder_impl.FloatingPoint_Adder):\n      if not quantizer_1.is_signed and not quantizer_2.is_signed:\n        quantizer.output.is_signed = 1\n        quantizer.output.bits += 1\n\n    return quantizer\n"
  },
  {
    "path": "qkeras/qtools/run_qtools.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Interface for running qtools and qenergy.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\nimport numpy as np\n\nfrom qkeras.qtools import generate_layer_data_type_map\nfrom qkeras.qtools import interface\nfrom qkeras.qtools import qgraph\nfrom qkeras.qtools import qtools_util\nfrom qkeras.qtools.config_public import config_settings\nfrom qkeras.qtools.qenergy import qenergy\nfrom qkeras.qtools.settings import cfg\n\n\nclass QTools:\n  \"\"\"integration of different qtools functions.\"\"\"\n\n  def __init__(self, model, process, source_quantizers=None,\n               is_inference=False, weights_path=None,\n               keras_quantizer=None, keras_accumulator=None,\n               for_reference=False,\n               model_weights_already_quantized=True,\n               hw_weight_dict=None):\n\n    if model is not None:\n      self._model = model\n\n    if weights_path is not None:\n      self._model.load_weights(weights_path)\n\n    cfg.update(process, config_settings)\n\n    # if source_quantizers is None, CreateGraph will use\n    # default_source_quantizers defined in cfg\n    (graph, source_quantizer_list) = qgraph.CreateGraph(\n        model, source_quantizers, cfg.default_source_quantizer)\n\n    # qgraph.PrintGraph(graph)\n    qgraph.GraphPropagateActivationsToEdges(graph)\n    self._layer_map = generate_layer_data_type_map.generate_layer_data_type_map(\n        graph, source_quantizer_list, is_inference,\n        keras_quantizer, keras_accumulator, for_reference,\n        model_weights_already_quantized=model_weights_already_quantized,\n        hw_weight_dict=hw_weight_dict)\n\n    self._output_dict = interface.map_to_json(self._layer_map)\n    self.source_quantizer_list = source_quantizer_list\n\n  def qtools_stats_to_json(self, json_name):\n    \"\"\"dump the layer stats to a json file.\"\"\"\n\n    with open(json_name, \"w\") as outfile:\n      json.dump(self._output_dict, outfile, indent=4)\n\n  def qtools_stats_print(self):\n    \"\"\"print out the layer stats.\"\"\"\n\n    dict_to_json = json.dumps(self._output_dict, indent=4)\n    print(dict_to_json)\n\n  def pe(self, weights_on_memory=\"dram\",\n         activations_on_memory=\"dram\",\n         min_sram_size=0,\n         rd_wr_on_io=True,\n         verbose=False):\n    \"\"\"energy consumption calculation.\"\"\"\n\n    assert weights_on_memory in [\"dram\", \"sram\", \"fixed\"]\n    energy_dict = qenergy.energy_estimate(\n        self._model, self._layer_map, weights_on_memory,\n        activations_on_memory, min_sram_size,\n        rd_wr_on_io)\n\n    if verbose:\n      print(\"COST:\")\n      dict_to_json = json.dumps(energy_dict, indent=4)\n      print(dict_to_json)\n\n    return energy_dict\n\n  def extract_energy_sum(self, cfg_setting, energy_dict):\n    \"\"\"extracted energy needed in caculating sum.\"\"\"\n\n    value = 0\n    for layer in energy_dict.keys():\n      if layer == \"total_cost\":\n        continue\n\n      class_name = energy_dict[layer][\"class_name\"]\n      keys = cfg_setting.get(class_name, cfg_setting.get(\"default\", []))\n      value += sum([energy_dict[layer][\"energy\"][key] for key in keys])\n\n    return int(value)\n\n  def extract_energy_profile(self, cfg_setting, energy_dict):\n    \"\"\"extract energy consumption in each layer.\"\"\"\n\n    energy = {}\n    for layer in energy_dict.keys():\n      if layer == \"total_cost\":\n        continue\n\n      class_name = energy_dict[layer][\"class_name\"]\n      keys = cfg_setting.get(class_name, cfg_setting.get(\"default\", []))\n      energy[layer] = {}\n      energy[layer][\"energy\"] = energy_dict[layer][\"energy\"]\n      energy[layer][\"total\"] = sum(\n          [energy_dict[layer][\"energy\"][key] for key in keys])\n\n    return energy\n\n  def calculate_ace(self, default_float_bits):\n    \"\"\"Computes ACE numbers from conv/dense layers.\"\"\"\n\n    def _get_ace(layer):\n      ace = 0\n      ace_float = 0\n      if layer.name in self._output_dict:\n        layer_item = self._output_dict[layer.name]\n        # Here we only consider the number of multiplication as the\n        # operation count. To include the number of\n        # accumulators, we should multiply the value by 2, assuming\n        # accumulation count ~= multiplication count.\n        operation_count = layer_item[\"operation_count\"]\n\n        # Input bitwidth.\n        input_quantizer_list = layer_item[\"input_quantizer_list\"]\n        input_bits = input_quantizer_list[0][\"bits\"]\n\n        # Weight bitwidth\n        weight_quantizer = qtools_util.get_val(layer_item, \"weight_quantizer\")\n        if weight_quantizer:\n          # Only layers such as Conv/Dense have weight_quantizers.\n          w_bits = weight_quantizer[\"bits\"]\n          ace = operation_count * input_bits * w_bits\n          ace_float = operation_count * default_float_bits * default_float_bits\n      return (ace, ace_float)\n\n    print(\"WARNING: ACE are computed from conv/dense layers only!\")\n    return (sum([_get_ace(l)[0] for l in self._model.layers]),\n            sum([_get_ace(l)[1] for l in self._model.layers]))\n\n  def calculate_output_bytes(self, include_model_input_size,\n                             default_float_bits):\n    \"\"\"Computes activation layers' output size in bytes.\"\"\"\n\n    def _get_activation_size(layer):\n      # Since in hardare previous conv/dense layers will be fused with\n      # the following activation layers, we only consider the output of\n      # Activation layers when calculating output sizes.\n      if layer.__class__.__name__ in [\"QActivation\"]:\n        layer_item = self._output_dict[layer.name]\n\n        output_quantizer = layer_item[\"output_quantizer\"]\n        output_shape = output_quantizer[\"shape\"]\n        o_bits = output_quantizer[\"bits\"]\n        return (int(np.prod(output_shape[1:]) * o_bits / 8.0),\n                int(np.prod(output_shape[1:]) * default_float_bits / 8.0))\n      else:\n        return (0, 0)\n\n    output_bytes = sum([_get_activation_size(l)[0] for l in self._model.layers])\n    output_bytes_float = sum([_get_activation_size(l)[1] for l in\n                              self._model.layers])\n    if include_model_input_size:\n      # Include model input size.\n      output_bytes += (np.prod(self._model.input_shape[1:])\n                       * self.source_quantizer_list[0].bits / 8.0)\n      output_bytes_float += (np.prod(self._model.input_shape[1:]) *\n                             default_float_bits/ 8.0)\n\n    return (output_bytes, output_bytes_float)\n\n  def calculate_weight_bytes(self, default_float_bits):\n    \"\"\"Computes weight size in bytes from conv/dense layers.\"\"\"\n\n    def _get_weight_size(layer):\n      weight_bytes = 0\n      weight_bytes_float = 0\n\n      if layer.name in self._output_dict:\n        layer_item = self._output_dict[layer.name]\n        weight_quantizer = qtools_util.get_val(layer_item, \"weight_quantizer\")\n\n        if weight_quantizer:\n          # Calculates kernel bytes.\n          w_bits = weight_quantizer[\"bits\"]\n          weight_bytes += int(np.prod(layer.weights[0].shape) * w_bits / 8.0)\n          weight_bytes_float += int(np.prod(layer.weights[0].shape) *\n                                    default_float_bits / 8.0)\n          # Calculates bias bytes.\n          if hasattr(layer, \"use_bias\") and layer.use_bias:\n            bias_quantizer = qtools_util.get_val(layer_item, \"bias_quantizer\")\n\n            assert bias_quantizer is not None, (\n                f\"{layer.name} has no bias_quantizer!\")\n            b_bits = bias_quantizer[\"bits\"]\n            weight_bytes += int(np.prod(layer.weights[1].shape) * b_bits / 8.0)\n            weight_bytes_float += int(np.prod(layer.weights[1].shape) *\n                                      default_float_bits / 8.0)\n      return (weight_bytes, weight_bytes_float)\n\n    return (sum([_get_weight_size(l)[0] for l in self._model.layers]),\n            sum([_get_weight_size(l)[1] for l in self._model.layers]))\n\n  def get_roofline_numbers(self, include_model_input_size=True,\n                           default_float_bits=32):\n    \"\"\"Extracts model numbers for roofline model analysis.\"\"\"\n\n    return {\"ACE\": self.calculate_ace(default_float_bits)[0],\n            \"weight_in_bytes\": self.calculate_weight_bytes(\n                default_float_bits)[0],\n            \"activation_in_bytes\": self.calculate_output_bytes(\n                include_model_input_size, default_float_bits)[0],\n            \"ACE_float\": self.calculate_ace(\n                default_float_bits)[1],\n            \"weight_in_bytes_float\": self.calculate_weight_bytes(\n                default_float_bits)[1],\n            \"activation_in_bytes_float\": self.calculate_output_bytes(\n                include_model_input_size, default_float_bits)[1]}\n\n\n"
  },
  {
    "path": "qkeras/qtools/settings.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"configurations.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\n\nclass ConfigClass:\n  \"\"\"configuration class.\"\"\"\n\n  def __init__(self):\n\n    self.default_source_quantizer = \"quantized_bits(8, 0, 1)\"\n    self.default_interm_quantizer = \"fp32\"\n\n    # Horowitz estimates from ISSCC 2014\n\n    self.fpm_add = np.poly1d([0.003125, 0])\n    self.fpm_mul = np.poly1d([0.002994791667, 0.001041666667, 0])\n    self.fp16_add = np.poly1d([0.4])\n    self.fp16_mul = np.poly1d([1.1])\n    self.fp32_add = np.poly1d([0.9])\n    self.fp32_mul = np.poly1d([3.7])\n\n    self.sram_rd = np.poly1d([0.02455, -0.2656, 0.8661])\n    self.dram_rd = np.poly1d([20.3125, 0])\n    self.sram_mul_factor = 1/64.\n    self.dram_mul_factor = 1.0\n\n    self.include_energy = {}\n    self.include_energy[\"default\"] = [\"inputs\", \"parameters\", \"op_cost\"]\n    self.include_energy[\"QActivation\"] = [\"outputs\"]\n    self.include_energy[\"QAdaptiveActivation\"] = [\"outputs\"]\n    self.include_energy[\"Activation\"] = [\"outputs\"]\n    self.include_energy[\"QBatchNormalization\"] = [\"parameters\"]\n    self.include_energy[\"BatchNormalization\"] = [\"parameters\"]\n    self.include_energy[\"Add\"] = [\"op_cost\"]\n    self.include_energy[\"Subtract\"] = [\"op_cost\"]\n    self.include_energy[\"MaxPooling2D\"] = [\"op_cost\"]\n    self.include_energy[\"default\"] = [\"inputs\", \"parameters\", \"op_cost\"]\n\n  def update(self, process, cfg_setting):\n    \"\"\"update config.\"\"\"\n\n    # pylint: disable=bare-except\n    try:\n      self.default_source_quantizer = cfg_setting[\n          \"default_source_quantizer\"]\n    except:\n      pass\n\n    try:\n      self.default_interm_quantizer = cfg_setting[\n          \"default_interm_quantizer\"]\n    except:\n      pass\n\n    try:\n      self.fpm_add = np.poly1d(cfg_setting[process][\"fpm_add\"])\n    except:\n      pass\n\n    try:\n      self.fpm_mul = np.poly1d(cfg_setting[process][\"fpm_mul\"])\n    except:\n      pass\n\n    try:\n      self.fp16_add = np.poly1d(cfg_setting[process][\"fp16_add\"])\n    except:\n      pass\n\n    try:\n      self.fp16_mul = np.poly1d(cfg_setting[process][\"fp16_mul\"])\n    except:\n      pass\n\n    try:\n      self.fp32_add = np.poly1d(cfg_setting[process][\"fp32_add\"])\n    except:\n      pass\n\n    try:\n      self.fp32_mul = np.poly1d(cfg_setting[process][\"fp32_mul\"])\n    except:\n      pass\n\n    try:\n      self.sram_rd = np.poly1d(cfg_setting[process][\"sram_rd\"])\n    except:\n      pass\n\n    try:\n      self.dram_rd = np.poly1d(cfg_setting[process][\"dram_rd\"])\n    except:  # pylint: disable=broad-except\n      pass\n\n    try:\n      for key in cfg_setting[\"include_energy\"]:\n        self.include_energy[key] = cfg_setting[\"include_energy\"][key]\n        if \"Q\" == key[0]:\n\t        # use the same rule for keras layer and qkeras layer\n          self.include_energy[key[1:]] = cfg_setting[\"include_energy\"][key]\n    except:\n      pass\n\n\ncfg = ConfigClass()\n\n"
  },
  {
    "path": "qkeras/quantizer_imports.py",
    "content": "# Copyright 2025 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Imports for QKeras quantizers.\"\"\"\n\nfrom .quantizers import bernoulli\nfrom .quantizers import binary\nfrom .quantizers import quantized_bits\nfrom .quantizers import quantized_hswish\nfrom .quantizers import quantized_linear\nfrom .quantizers import quantized_po2\nfrom .quantizers import quantized_relu\nfrom .quantizers import quantized_relu_po2\nfrom .quantizers import quantized_sigmoid\nfrom .quantizers import quantized_tanh\nfrom .quantizers import quantized_ulaw\nfrom .quantizers import stochastic_binary\nfrom .quantizers import stochastic_ternary\nfrom .quantizers import ternary\n"
  },
  {
    "path": "qkeras/quantizer_registry.py",
    "content": "# Copyright 2024 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Registry for QKeras quantizers.\"\"\"\n\nfrom . import registry\n\n# Global registry for all QKeras quantizers.\n_QUANTIZERS_REGISTRY = registry.Registry()\n\n\ndef register_quantizer(quantizer):\n  \"\"\"Decorator for registering a quantizer.\"\"\"\n  _QUANTIZERS_REGISTRY.register(quantizer)\n  # Return the quantizer after registering. This ensures any registered\n  # quantizer class is properly defined.\n  return quantizer\n\n\ndef lookup_quantizer(name):\n  \"\"\"Retrieves a quantizer from the quantizers registry.\"\"\"\n  return _QUANTIZERS_REGISTRY.lookup(name)\n"
  },
  {
    "path": "qkeras/quantizers.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport re\nfrom typing import Any, List, Tuple, cast\n\nimport numpy as np\nimport six\nfrom six.moves import range\nimport tensorflow.compat.v2 as tf\nfrom tensorflow.keras import initializers\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.utils import deserialize_keras_object\n\nfrom . import base_quantizer\nfrom . import quantizer_registry\n# from .google_internals.experimental_quantizers import parametric_quantizer_d_xmax\n# from .google_internals.experimental_quantizers import quantized_bits_learnable_scale\nfrom .safe_eval import safe_eval\nfrom tensorflow.python.framework import smart_cond as tf_utils\n\n#\n# Library of auxiliary functions\n#\n\n\ndef get_weight_scale(quantizer, x=None):\n  \"\"\"Gets the scales of weights for (stochastic_)binary and ternary quantizers.\n\n  Arguments:\n    quantizer: A binary or teneray quantizer class.\n    x: A weight tensor.  We keep it here for now for backward compatibility.\n\n  Returns:\n    Weight scale per channel for binary and ternary\n    quantizers with auto or auto_po2 alpha/threshold.\n  \"\"\"\n  if hasattr(quantizer, \"scale\") and quantizer.scale is not None:\n    return K.eval(quantizer.scale)\n  return 1.0\n\n\ndef _get_integer_bits(min_value,\n                      max_value,\n                      bits=8,\n                      symmetric=False,\n                      keep_negative=False,\n                      is_clipping=True):\n  \"\"\"Estimates the integer bit(number of bits to the left of the binary point)\n  satisfying the input argument constraints.\n\n  Args:\n    min_value: A tensor object. Its elements are in float representing the\n      minimum values of ranges.\n    max_value: A tensor object. Its elements are in float representing the\n      maximum values of ranges.\n    bits: number of bits to perform quantization.\n    symmetric: boolean type. if true, it enforces negative and positive ranges\n      to be symmetric.\n    keep_negative: boolean type. if true, we do not clip negative numbers.\n    is_clipping: boolean type. if true, the min_value and max_value are clipped\n      to nearest powers-of-2.\n\n  Returns:\n    integer_bits : number of bits to the left of the binary point.\n  \"\"\"\n  # Max the min and max values positive if only using positive values\n  if not keep_negative:\n    min_value = K.maximum(min_value, 0)\n    max_value = K.maximum(max_value, 0)\n\n  # The number of bits excluding the sign bit\n  unsigned_bits = bits - keep_negative\n\n  # log2 of absolute min_value and max_value\n  min_value_log2 = K.log(K.abs(min_value)) / np.log(2.0)\n  max_value_log2 = K.log(K.abs(max_value)) / np.log(2.0)\n\n  # Estimate integer_bits\n  if is_clipping:\n    min_int_bits = tf.math.round(\n        tf.where(min_value_log2 > 0, min_value_log2, 0))\n    max_int_bits = tf.math.round(\n        tf.where(max_value_log2 > 0, max_value_log2, 0))\n  else:\n    min_int_bits = tf.math.ceil(tf.where(min_value_log2 > 0, min_value_log2, 0))\n    max_int_bits = tf.math.ceil(tf.where(max_value_log2 > 0, max_value_log2, 0))\n    # Checks max_value is bounded by the maximum positive value of\n    # pow(2,integer_bits) - pow(2,-fractional_bits).\n    max_value_po2 = pow(2.0, max_int_bits) - pow(\n        2.0, K.minimum(max_int_bits - unsigned_bits, 0))\n    max_int_bits = tf.where(max_value <= max_value_po2, max_int_bits,\n                            max_int_bits + 1)\n    if symmetric:\n      # Checks min_value is bounded by the minimum negative value of\n      # - pow(2,integer_bits) + pow(2,-fractional_bits).\n      min_value_po2 = -pow(2.0, min_int_bits) + pow(\n          2.0, K.minimum(min_int_bits - unsigned_bits, 0))\n      min_int_bits = tf.where(min_value_po2 <= min_value, min_int_bits,\n                              min_int_bits + 1)\n\n  # To cover both negative and positive ranges with integer_bits.\n  # (For keep_negative=False, min_int_bits is 0.)\n  integer_bits = tf.cast(K.maximum(min_int_bits, max_int_bits), dtype=tf.int32)\n  # It assumes that integer_bits cannot be greater than unsigned_bits\n  integer_bits = K.minimum(unsigned_bits, integer_bits)\n\n  return integer_bits\n\n\ndef _get_scaling_axis(scale_axis: Any, len_axis: int) -> List[int]:\n  \"\"\"Get the axis/axes to perform auto scaling at.\n\n  Args:\n    scale_axis: int or List[int] representing which axis/axes to calculate\n     scale at.\n    len_axis: int representing the shape of the tensor on which scaling is\n      performed.\n\n  Returns:\n    List[int] representing the scaling axes.\n\n  \"\"\"\n\n  if scale_axis is not None:\n    # if scale_axis is set, scale over all axis except the scale_axis.\n    if isinstance(scale_axis, list):\n      axis = [i for i in range(len_axis) if i not in scale_axis]\n    else:\n      axis = tf.range(scale_axis)\n      axis = tf.concat([axis, tf.range(scale_axis + 1, len_axis)], axis=0)\n  else:\n    # if scale_axis is not set, scale over all axis except the channel axis.\n    if K.image_data_format() == \"channels_last\":\n      axis = tf.range(tf.math.maximum(len_axis - 1, 0))\n    else:\n      axis = tf.range(1, len_axis)\n  return axis\n\n\ndef _get_unrolled_shape(input_shape: List[int], unroll_factor: Any,\n                        unroll_axis: Any) -> Tuple[List[int], Any]:\n  \"\"\"Gets the shape of the unrolled tensor given unroll_factor and unroll_axis.\n\n  Both unroll_factor and unroll_axis can either be ints or List[int]. If they\n  are List[int], their lengths must match, and their values represent every\n  unroll axis and its corresponding unroll factor.\n\n  Examples:\n  1. If input_shape = [16, 32], the unroll_factor = 4, and unroll_axis = 1. This\n     means that axis 1 of the input should be unrolled by a factor of 4. This\n     function would return a tuple; the first element represents the unrolled\n     shape [16, 8, 4], and the second element represents the updated unroll axis\n     in the unrolled shape which, in this case, is still 1.\n  2. If input_shape = [16, 32], the unroll_factor = [2, 4], and unroll_axis =\n     [0, 1]. This means that axis 0 of the input should be unrolled by a factor\n     of 2, while axis 1 of the input should be unrolled by a factor of 4. This\n     function would return a tuple; the first element represents the unrolled\n     shape [4, 2, 8, 4], and the second element represents the updated unroll\n     axis in the unrolled shape which, in this case, will be [0, 2].\n\n  Args:\n    input_shape: List[int]. The shape of the input tensor to be unrolled.\n    unroll_factor: int or List[int] representing the unrolling factor(s) across\n      various dimensions of the input tensors. If a list is used, its length has\n      to match unroll_axis.\n    unroll_axis: int or List[int] representing which axis/axes to unroll. If\n      a list is used, its length has to match unroll_factor.\n\n  Returns:\n    Tuple of (List of ints representing the shape of the unrolled tensor,\n    Int or List[int] representing updated scale_axis after unrolling.\n  \"\"\"\n  def _unroll_one_axis(shape, factor, axis):\n    shape[axis] = shape[axis] // factor\n    shape.insert(axis + 1, factor)\n\n  unrolled_shape = input_shape.copy()\n\n  if isinstance(unroll_factor, int) and isinstance(unroll_axis, int):\n    unrolled_scale_axis = unroll_axis\n    _unroll_one_axis(unrolled_shape, unroll_factor, unroll_axis)\n\n  elif isinstance(unroll_factor, list) and isinstance(unroll_axis, list):\n    # axis_shift shifts the pre-defined axis every time we add a new\n    # unrolled axis\n    assert len(unroll_axis) == len(unroll_factor), (\n        \"unroll_axis and unroll_factor must have the same length\")\n\n    unrolled_scale_axis = unroll_axis.copy()\n    axis_shift = 0\n    for idx, (axis, factor) in enumerate(zip(unroll_axis, unroll_factor)):\n      unrolled_scale_axis[idx] += axis_shift\n      _unroll_one_axis(unrolled_shape, factor, axis+axis_shift)\n      axis_shift += 1\n  else:\n    raise ValueError(\n        \"Both unroll_factor and unroll_axis has to be either ints or lists\"\n    )\n  return unrolled_shape, unrolled_scale_axis\n\n\ndef _get_rolled_back_shape(input_shape: List[int], roll_axis: Any) -> List[int]:\n  \"\"\"Gets the shape of the rolled back tensor given roll_axis.\n\n  If roll_axis is an int, the input shape will be rolled back once along the\n  roll_axis. If roll_axis is List[int], the input shape will be rolled back\n  len(roll_axis) times.\n\n  Examples:\n  1. If input_shape = [4, 2, 8, 4] and roll_axis = 1. This means that the axis\n     following axis 1 will be rolled back to axis 1. This function would return\n     a the rolled back shape which is [4, 16, 4] in this case.\n  2. If input_shape = [4, 2, 8, 4] and roll_axis = [0, 2]. This means that the\n     axis following axis 0 will be rolled back to axis 0, and the axis following\n     axis 2 will be rolled back to axis 2. This function would return the rolled\n     back shape which is [16, 32] in this case.\n\n  Args:\n    input_shape: List[int]. The shape of the input tensor to be rolled back.\n    roll_axis: int or List[int] representing which axis/axes of the tensor to\n      roll back.\n\n  Returns:\n    List of ints representing the shape of the rolled back tensor.\n  \"\"\"\n  def _roll_back_one_axis(shape, axis):\n    shape[axis] *= shape[axis+1]\n    shape.pop(axis + 1)\n\n  rolled_shape = input_shape.copy()\n\n  if isinstance(roll_axis, int):\n    _roll_back_one_axis(rolled_shape, roll_axis)\n\n  elif isinstance(roll_axis, list):\n    # axis_shift shifts the pre-defined axis every time we roll back an axis.\n    axis_shift = 0\n    for axis in roll_axis:\n      _roll_back_one_axis(rolled_shape, axis+axis_shift)\n      axis_shift -= 1\n\n  return rolled_shape\n\n\ndef _validate_axis_and_eps(x_shape: List[int], scale_axis: Any,\n                           elements_per_scale: Any) -> Tuple[Any, Any]:\n  \"\"\"Validates scale_axis and elements_per_scale.\n\n  This function verifies that the values for scale_axis and elements_per_scale\n  are valid and perform any required transformations returning a Tuple of\n  verified (scale_axis, elements_per_scale)\n\n  This fuction accepts scale_axis and elements_per_scale to be either ints or\n  list of ints, so it verifies 4 different scenarios:\n  1. If both scale_axis and elements_per_scale are ints. The function verifies\n     that the x_shape is divisible by elements_per_scale at the scale_axis.\n  2. If scale_axis is an int while elements_per_scale is a list. The function\n     raises an error since this is an ambigious state.\n  3. If scale_axis is a list and elements_per_scale is an int. The function\n     modifies elements_per_scale to a list of length scale_axis, and it verifies\n     that the x_shape is divisible by the elements_per_scale at the\n     corresponding scale_axis.\n  4. If scale_axis is a list and elements_per_scale is a list. The function\n     verifies that the length of the two lists match, and that the x_shape is\n     divisible by the corresponding elements_per_scale at the corresponding\n     scale_axis.\n\n  Examples:\n  - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=4 --> Valid\n  - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=3 --> Invalid\n  - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=[2, 4]\n    --> Invalid\n  - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=2\n    --> Valid\n  - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=[2, 4]\n    --> Valid\n  - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=[1, 2, 4]\n    --> Invalid\n\n  Args:\n    x_shape: List[int] representing the shape of the input tensor.\n    scale_axis: Int or List[int] representing the axis/axes to perform auto\n      scaling at.\n    elements_per_scale: Int or List[int] representing the number of\n     elements/values associated with every scale along the corresponding\n     scale_axis.\n\n  Returns:\n    A Tuple of verified (scale_axis, elements_per_scale).\n  \"\"\"\n\n  assert (\n      scale_axis is not None\n  ), \"scale_axis must be set if elements_per_scale is used.\"\n\n  # if both are ints\n  if isinstance(scale_axis, int) and isinstance(elements_per_scale, int):\n    assert x_shape[scale_axis] % elements_per_scale == 0, (\n        f\"scaling axis of dimension {x_shape[scale_axis]} has to be divisible \"\n        f\"by thenumber of elements per scale, given {elements_per_scale}.\"\n    )\n\n  # if scale_axis is int and elements_per_scale is a list of ints\n  elif isinstance(scale_axis, int) and isinstance(elements_per_scale, list):\n    raise ValueError(\n        f\"scale_axis is an integer {scale_axis}, \"\n        f\"while {elements_per_scale} is a list of values which is ambigious.\"\n    )\n\n  # if scale_axis is list of ints and elements_per_scale is an int\n  elif isinstance(scale_axis, list) and isinstance(elements_per_scale, int):\n    for axis in scale_axis:\n      assert x_shape[axis] % elements_per_scale == 0, (\n          f\"scaling axis of dimension {x_shape[axis]} has to be divisible by \"\n          f\"number of elements per scale, given {elements_per_scale}.\"\n      )\n    # duplicate the elements_per_scale to match length of scale_axis\n    elements_per_scale = [elements_per_scale] * len(scale_axis)\n\n  # if both scale_axis and elements_per_scale are lists\n  else:\n    assert len(scale_axis) == len(\n        elements_per_scale\n    ), (f\"both scale_axis and elements_per_scale lists must match in length; \"\n        f\"Got {len(scale_axis)} and {len(elements_per_scale)}\")\n    for axis, eps in zip(scale_axis, elements_per_scale):\n      assert x_shape[axis] % eps == 0, (\n          f\"scaling axis of dimension {x_shape[axis]} has to be divisible by\"\n          f\" the corresponding number of elements per scale, given {eps}.\"\n      )\n\n  assert (\n      isinstance(scale_axis, int) and isinstance(elements_per_scale, int)\n  ) or (isinstance(scale_axis, list) and isinstance(elements_per_scale, list))\n\n  return scale_axis, elements_per_scale\n\n\ndef _repeat_along_axis(x: tf.Tensor, axis: int, repeats: int) -> tf.Tensor:\n  \"\"\"Repeats the elements in a tensor along the specified axis.\"\"\"\n  return tf.repeat(x, repeats=repeats, axis=axis)\n\n\ndef _repeat_along_axes(x: tf.Tensor, axis: Any, repeats: Any) -> tf.Tensor:\n  \"\"\"Repeats the elements in a tensor along the specified axes.\"\"\"\n  if isinstance(axis, int) and isinstance(repeats, int):\n    x = _repeat_along_axis(x, axis, repeats)\n  elif isinstance(axis, list) and isinstance(repeats, list):\n    for a, r in zip(axis, repeats):\n      x = _repeat_along_axis(x, axis=a, repeats=r)\n  return x\n\n\ndef _get_scale_mean(\n    scale_axis: Any, x: tf.Tensor, q: tf.Tensor, elements_per_scale: Any\n):\n  \"\"\"Gets the mean of the tensor along the specified scaling axis/axes.\n\n  Args:\n    scale_axis: int or List[int] representing which axis/axes to calculate\n     scale at.\n    x: A tensor object. Its elements are in float.\n    q: A tensor object. Its elements are in quantized format of x.\n    elements_per_scale: if set to an int or List[int], we create multiple scales\n      per axis across scale_axis, where 'elements_per_scale' represents the\n      number of elements/values associated with every separate scale value.\n\n  Returns:\n    A tuple of two tensors representing the mean of x and its quantized format\n    along the specified scaling axis/axes.\n  \"\"\"\n  if elements_per_scale is not None:\n    # Get the input shape\n    x_shape = x.shape.as_list()\n\n    scale_axis, elements_per_scale = _validate_axis_and_eps(\n        x_shape, scale_axis, elements_per_scale)\n\n    # get the shape of unrolled tensors x and q\n    unrolled_shape, unrolled_scale_axis = _get_unrolled_shape(\n        x_shape, elements_per_scale, scale_axis)\n\n    # Unroll x and q\n    x1 = tf.reshape(x, unrolled_shape)\n    q1 = tf.reshape(q, unrolled_shape)\n\n    # Get the mean along the unroll axis/axes\n    axes_of_mean = _get_scaling_axis(unrolled_scale_axis, len(unrolled_shape))\n    qx = K.mean(tf.math.multiply(x1, q1), axis=axes_of_mean, keepdims=True)\n    qq = K.mean(tf.math.multiply(q1, q1), axis=axes_of_mean, keepdims=True)\n\n    # Reshape qx and qq to be divisible by the input shape.\n    # To achieve this, qx and qq are first rolled back along unroll axis.\n    # Then, the values along the scale_axis are repeated \"elements_per_scale\"\n    # times to match the original shape.\n    rolled_back_shape = _get_rolled_back_shape(qx.shape.as_list(),\n                                               roll_axis=unrolled_scale_axis)\n\n    qx = tf.reshape(qx, rolled_back_shape)\n    qx = _repeat_along_axes(qx, repeats=elements_per_scale, axis=scale_axis)\n\n    qq = tf.reshape(qq, rolled_back_shape)\n    qq = _repeat_along_axes(qq, repeats=elements_per_scale, axis=scale_axis)\n  else:\n    len_axis = len(x.shape)\n    axis = _get_scaling_axis(scale_axis, len_axis)\n    qx = K.mean(tf.math.multiply(x, q), axis=axis, keepdims=True)\n    qq = K.mean(tf.math.multiply(q, q), axis=axis, keepdims=True)\n  return qx, qq\n\n\ndef _clip_po2_scale(scale: tf.Tensor, min_po2_exponent: Any,\n                    max_po2_exponent: Any):\n  \"\"\"Clip power-of-two scales given minimum and maximum po2 exponenets.\"\"\"\n\n  min_po2 = None if min_po2_exponent is None else 2**min_po2_exponent\n  max_po2 = None if max_po2_exponent is None else 2**max_po2_exponent\n  scale = K.clip(scale, min_value=min_po2, max_value=max_po2)\n  return scale\n\n\ndef _get_least_squares_scale(\n  alpha: Any, x: tf.Tensor, q: tf.Tensor, scale_axis: Any = None,\n  per_channel_scale: bool = True, elements_per_scale: Any = None,\n  min_po2_exponent: Any = None, max_po2_exponent: Any = None):\n  \"\"\"Gets scaling factor for scaling the tensor per channel.\n\n  It uses the least squares method to find the scaling factor.\n\n  (https://en.wikipedia.org/wiki/Linear_least_squares)\n\n  Arguments:\n    alpha: A float or string. When it is string, it should be either \"auto\" or\n      \"auto_po2\", and scale = sum(x * q, axis=all but last) / sum(q * q,\n      axis=all but last)\n    x: A tensor object. Its elements are in float.\n    q: A tensor object. Its elements are in quantized format of x.\n    scale_axis: int or List[int] representing which axis/axes to calculate\n     scale from.\n    per_channel_scale: A bool. Whether to perform per-channel scaling or not.\n    elements_per_scale: if set to an int or List[int], we create multiple scales\n      per axis across scale_axis, where 'elements_per_scale' represents the\n      number of elements/values associated with every separate scale value.\n    min_po2_exponent: if set while using \"auto_po2\", it represents the minimum\n      allowed power of two exponent.\n    max_po2_exponent: if set while using \"auto_po2\", it represents the maximum\n      allowed power of two exponent.\n\n  Returns:\n    A scaling factor tensor or scalar for scaling tensor per channel.\n  \"\"\"\n\n  if isinstance(alpha, six.string_types) and \"auto\" in alpha:\n    assert alpha in [\"auto\", \"auto_po2\"]\n    # in different tensorflow version (e.g., 2.4)\n    # x.shape is a tuple which doesn't have as_list() method\n    try:\n      x_shape = x.shape.as_list()\n    except AttributeError:\n      x_shape = list(x.shape)\n\n    len_axis = len(x_shape)\n    if not per_channel_scale:\n      qx = K.mean(x * q, keepdims=True)\n      qq = K.mean(q * q, keepdims=True)\n    else:\n      if len_axis > 1:\n        qx, qq = _get_scale_mean(scale_axis, x, q, elements_per_scale)\n      else:\n        # No summing (averaging) along the channel axis to get per-channel\n        # scales.\n        qx = x * q\n        qq = q * q\n\n    scale = qx / (qq + K.epsilon())\n    if alpha == \"auto_po2\":\n      scale = K.pow(2.0,\n                    tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)))\n\n      if min_po2_exponent is not None or max_po2_exponent is not None:\n        scale = _clip_po2_scale(scale, min_po2_exponent, max_po2_exponent)\n\n  elif alpha is None:\n    scale = 1.0\n  elif isinstance(alpha, np.ndarray):\n    scale = alpha\n  else:\n    scale = float(alpha)\n  return scale\n\ndef _get_scale(*args, **kwargs):\n  \"\"\"Old name for _get_least_squares_scale. Kept for backwards compatibility.\"\"\"\n  return _get_least_squares_scale(*args, **kwargs)\n\ndef smooth_sigmoid(x):\n  \"\"\"Implements a linear approximation of a sigmoid function.\"\"\"\n\n  # if we use 2.65 as the clipping point, MSE w.r.t. original sigmoid is\n  # smaller than hard_simoid but the arithmetic for it is (x >> 3) +\n  # (x >> 4) + 0.5, which is also not bad.\n\n  return tf.keras.backend.clip(0.1875 * x + 0.5, 0.0, 1.0)\n\n\ndef hard_sigmoid(x):\n  \"\"\"Computes hard_sigmoid function that saturates between 0 and 1.\"\"\"\n\n  return tf.keras.backend.clip(0.5 * x + 0.5, 0.0, 1.0)\n\n\ndef binary_sigmoid(x):\n  \"\"\"Computes binary_sigmoid.\"\"\"\n\n  return _round_through(hard_sigmoid(x))\n\n\n# we use a version of approximated sigmoid everywhere in this code.\n# we can set it to hard_sigmoid(x) or smooth_sigmoid(x).\n\n_default_sigmoid_type = \"hard\"\n_sigmoid = None\n\n\ndef set_internal_sigmoid(mode):\n  \"\"\"Sets _sigmoid to either real, hard or smooth.\"\"\"\n\n  global _sigmoid\n\n  if mode not in [\"real\", \"hard\", \"smooth\"]:\n    raise ValueError(\"mode has to be 'real', 'hard' or 'smooth'.\")\n\n  if mode == \"hard\":\n    _sigmoid = hard_sigmoid\n  elif mode == \"smooth\":\n    _sigmoid = smooth_sigmoid\n  elif mode == \"real\":\n    _sigmoid = tf.keras.backend.sigmoid\n\n\nset_internal_sigmoid(_default_sigmoid_type)\n\n\ndef binary_tanh(x):\n  \"\"\"Computes binary_tanh function that outputs -1 and 1.\"\"\"\n  return 2.0 * binary_sigmoid(x) - 1.0\n\n\ndef hard_tanh(x):\n  \"\"\"Computes hard_tanh function that saturates between -1 and 1.\"\"\"\n  return 2.0 * hard_sigmoid(x) - 1.0\n\n\ndef smooth_tanh(x):\n  \"\"\"Computes smooth_tanh function that saturates between -1 and 1.\"\"\"\n  return 2.0 * smooth_sigmoid(x) - 1.0\n\n\ndef stochastic_round(x, precision=0.5):\n  \"\"\"Performs stochastic rounding to the first decimal point.\"\"\"\n  scale = 1.0 / precision\n  scale_x = x * scale\n  fraction = scale_x - tf.floor(scale_x)\n\n  result = tf.where(fraction < tf.random.uniform(tf.shape(x)),\n                    tf.math.floor(scale_x), tf.math.ceil(scale_x))\n  return result / scale\n\n\ndef stochastic_round_po2(x):\n  \"\"\"Performs stochastic rounding for the power of two.\"\"\"\n  # TODO(b/237832905): test stochastic_round_po2 and constraint.\n  # because quantizer is applied after constraint.\n  y = tf.abs(x)\n  eps = tf.keras.backend.epsilon()\n  log2 = tf.keras.backend.log(2.0)\n\n  x_log2 = tf.round(tf.keras.backend.log(y + eps) / log2)\n  po2 = tf.cast(pow(2.0, tf.cast(x_log2, dtype=\"float32\")), dtype=\"float32\")\n  left_val = tf.where(po2 > y, x_log2 - 1, x_log2)\n  right_val = tf.where(po2 > y, x_log2, x_log2 + 1)\n  # sampling in [2**left_val, 2**right_val].\n  minval = 2 ** left_val\n  maxval = 2 ** right_val\n  val = tf.random.uniform(tf.shape(y), minval=minval, maxval=maxval)\n  # use y as a threshold to keep the probabliy [2**left_val, y, 2**right_val]\n  # so that the mean value of the sample should be y\n  x_po2 = tf.where(y < val, left_val, right_val)\n  \"\"\"\n  x_log2 = stochastic_round(tf.keras.backend.log(y + eps) / log2)\n  sign = tf.sign(x)\n  po2 = (\n      tf.sign(x) *\n      tf.cast(pow(2.0, tf.cast(x_log2, dtype=\"float32\")), dtype=\"float32\")\n  )\n  \"\"\"\n  return x_po2\n\n\ndef _round_through(x, use_stochastic_rounding=False, precision=0.5):\n  \"\"\"Rounds x but using straight through estimator.\n\n  We use the trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182).\n\n  Straight through estimator is a biased estimator for the rounding\n  operation defined by Hinton\"s Coursera Lecture 9c where dL/dx is made\n  equal to dL/dy for y = f(x) during gradient computation, where f(x) is\n  a non-derivable function. In that case, we assume df/dx = 1 in:\n\n  dL   dL df   dL\n  -- = -- -- = --\n  dx   df dx   dy\n\n  (https://www.youtube.com/watch?v=LN0xtUuJsEI&list=PLoRl3Ht4JOcdU872GhiYWf6jwrk_SNhz9&index=41)\n\n  Arguments:\n    x: tensor to perform round operation with straight through gradient.\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n    precision: by default we will use 0.5 as precision, but that can overriden\n      by the user.\n\n  Returns:\n    Rounded tensor.\n  \"\"\"\n  if use_stochastic_rounding:\n    output = tf_utils.smart_cond(\n        K.learning_phase(),\n        lambda: x + tf.stop_gradient(-x + stochastic_round(x, precision)),\n        lambda: x + tf.stop_gradient(-x + tf.round(x)))\n  else:\n    output = x + tf.stop_gradient(-x + tf.round(x))\n  return output\n\n\ndef _sign_through(x):\n  \"\"\"Computes the sign operation using the straight through estimator.\"\"\"\n\n  # tf.sign generates -1, 0 or +1, so it should not be used when we attempt\n  # to generate -1 and +1.\n\n  k_sign = tf.sign(x)\n\n  return x + tf.stop_gradient(-x + k_sign)\n\n\ndef _ceil_through(x):\n  \"\"\"Computes the ceiling operation using straight through estimator.\"\"\"\n\n  return x + tf.stop_gradient(-x + tf.ceil(x))\n\n\ndef _floor_through(x):\n  \"\"\"Computes the floor operation using straight through estimator.\"\"\"\n\n  return x + tf.stop_gradient(-x + tf.floor(x))\n\n#\n# Activation functions for quantized networks.\n#\n# Please note some of these functions can be used as well\n# as quantizer functions for weights of dense and convolutional\n# layers.\n#\n\n\n@quantizer_registry.register_quantizer\nclass quantized_linear(base_quantizer.BaseQuantizer):\n  \"\"\"Linear quantization with fixed number of bits.\n\n  This quantizer maps inputs to the nearest value of a fixed number of\n  outputs that are evenly spaced, with possible scaling and stochastic\n  rounding. This is an updated version of the legacy quantized_bits.\n\n  The core computation is:\n    1. Divide the tensor by a quantization scale\n    2. Clip the tensor to a specified range\n    3. Round to the nearest integer\n    4. Multiply the rounded result by the quantization scale\n\n  This clip range is determined by\n    - The number of bits we have to represent the number\n    - Whether we want to have a symmetric range or not\n    - Whether we want to keep negative numbers or not\n\n    The quantization scale is defined by either the quantizer parameters or the\n    data passed to the __call__ method. See documentation for the `alpha`\n    parameter to find out more.\n\n    For backprop purposes, the quantizer uses the straight-through estimator\n    for the rounding step (https://arxiv.org/pdf/1903.05662.pdf). Thus the\n    gradient of the __call__ method is 1 on the interval\n    [quantization_scale * clip_min, quantization_scale * clip_max] and 0\n    elsewhere.\n\n  The quantizer also supports a number of other optional features:\n  - Stochastic rounding (see the `stochastic_rounding` parameter)\n  - Quantization noise (see the `qnoise_factor` parameter)\n\n  Notes on the various \"scales\" in quantized_linear:\n\n      - The quantization scale is the scale used in the core computation (see\n        above). You can access it via the `quantization_scale` attribute.\n      - The data type scale is the scale is determined by the type of data\n        stored on hardware on a small device running a true quantized model.\n        It is the quantization scale needed to represent `bits` bits, `integer`\n        of which are integer bits, and one bit is reserved for the sign if\n        `keep_negative` is True. It can be calculated as\n        2 ** (integer - bits + keep_negative). You can access it via the\n        `data_type_scale` attribute.\n      - The `scale` attribute stores the quotient of the quantization scale and\n        the data type scale. This is also the scale that can be directly\n        specified by the user, via the `alpha` parameter.\n\n    These three quantities are related by the equation\n    scale = quantization_scale / data_type_scale.\n\n    See the diagram below of scale usage in a quantized conv layer.\n\n    +------------------------------------------------------------------------+\n    |     data_type_scale        --------------->     stored_weights         |\n    | (determines decimal point)                            |                |\n    |                                                       V                |\n    |                                                    conv op             |\n    |                                                       |                |\n    |                                                       V                |\n    |                                                  accumulator           |\n    |                                                       |                |\n    |  determines quantization                              V                |\n    |    range and precision     --------------->   quantization_scale       |\n    |       (per channel)                                   |                |\n    |                                                       V                |\n    |                                                   activation           |\n    +------------------------------------------------------------------------+\n\n      # TODO: The only fundamentally necessary scale is the quantization scale.\n      # We should consider removing the data type scale and scale attributes,\n      # but know that this will require rewriting much of how qtools and HLS4ML\n      # use these scale attributes.\n\n    Note on binary quantization (bits=1):\n      The core computation is modified here when `keep_negative` is True to\n      perform a scaled sign function. This is needed because the core\n      computation as defined above requires that 0 be mapped to 0, which does\n      not allow us to keep both positive and negative outputs for binary\n      quantization. Special shifting operations are used to achieve this.\n\n    Example usage:\n\n  # 8-bit quantization with 3 integer bits\n  >>> q = quantized_linear(8, 3)\n  >>> x = tf.constant([0.0, 0.5, 1.0, 1.5, 2.0])\n  >>> q(x).numpy()\n  array([0., 0., 1., 2., 2.], dtype=float32)\n\n  # 2-bit quantization with \"auto\" and tensor alphas\n  >>> q_auto = quantized_linear(2, alpha=\"auto\")\n  >>> x = tf.constant([0.0, 0.5, 1.0, 1.5, 2.0])\n  >>> q_auto(x).numpy()\n  array([0., 0., 0., 2., 2.], dtype=float32)\n  >>> q_auto.scale.numpy()\n  array([4.], dtype=float32)\n  >>> q_auto.quantization_scale.numpy()\n  array([2.], dtype=float32)\n  >>> q_fixed = quantized_linear(2, alpha=q_auto.scale)\n  >>> q_fixed(x)\n  array([0., 0., 0., 2., 2.], dtype=float32)\n\n    Args:\n      bits (int): Number of bits to represent the number. Defaults to 8.\n      integer (int): Number of bits to the left of the decimal point, used for\n        data_type_scale. Defaults to 0.\n      symmetric (bool): If true, we will have the same number of values\n        for positive and negative numbers. Defaults to True.\n      alpha (str, Tensor, None): Instructions for determining the quantization\n        scale. Defaults to None.\n        - If None: the quantization scale is the data type scale, determined\n          by `integer`, `bits`, and `keep_negative`.\n        - If \"auto\", the quantization scale is calculated as the minimum\n          floating point scale per-channel that does not clip the max of x.\n        - If \"auto_po2\", the quantization scale is chosen as the\n          power of two per-channel that minimizes squared error between the\n          quantized x and the original x.\n        - If Tensor: The quantization scale is the Tensor passed in\n          multiplied by the data type scale.\n      keep_negative (bool): If false, we clip negative numbers. Defaults to\n        True.\n      use_stochastic_rounding (bool): If true, we perform stochastic rounding\n        (https://arxiv.org/pdf/1502.02551.pdf).\n      scale_axis (int, None): Which axis to calculate scale from. If None, we\n        perform per-channel scaling based off of the image data format. Note\n        that each entry of a rank-1 tensor is considered its own channel by\n        default. See `_get_scaling_axis` for more details. Defaults to None.\n      qnoise_factor (float): A scalar from 0 to 1 that represents the level of\n        quantization noise to add. This controls the amount of the\n        quantization noise to add to the outputs by changing the weighted\n        sum of (1 - qnoise_factor) * unquantized_x + qnoise_factor *\n        quantized_x. Defaults to 1.0, which means that the result is fully\n        quantized.\n      use_variables (bool): If true, we use tf.Variables to store certain\n        parameters. See the BaseQuantizer implementation for more details.\n        Defaults to False. If set to True, be sure to use the special attribute\n        update methods detailed in the BaseQuantizer.\n      var_name (str or None): A variable name shared between the tf.Variables\n        created in on initialization, if use_variables is true. If None, the\n        variable names are generated automatically based on the parameter names\n        along with a uid. Defaults to None.\n\n  Returns:\n    function: Function that computes linear quantization.\n\n  Raises:\n    ValueError:\n      - If `bits` is not positive, or is too small to represent `integer`.\n      - If `integer` is negative.\n      - If `alpha` is a string but not one of (\"auto\", \"auto_po2\").\n  \"\"\"\n\n  # string options for alpha parameter\n  ALPHA_STRING_OPTIONS = (\"auto\", \"auto_po2\")\n\n  def __init__(\n      self,\n      bits=8,\n      integer=0,\n      symmetric=1,\n      keep_negative=True,\n      alpha=None,\n      use_stochastic_rounding=False,\n      scale_axis=None,\n      qnoise_factor=1.0,\n      var_name=None,\n      use_variables=False,\n  ):\n    super().__init__()\n\n    self.var_name = var_name\n\n    # Error checking\n    self._check_bits(bits)\n    self._check_alpha(alpha)\n\n    # Set non-modifyable attributes\n    self._bits = bits\n    self._integer = integer\n    self._keep_negative = keep_negative\n    self._use_stochastic_rounding = use_stochastic_rounding\n    self._scale_axis = scale_axis\n    self._use_variables = use_variables\n\n    # Set modifyable attributes\n    self.alpha = alpha\n    self.qnoise_factor = qnoise_factor\n    self.symmetric = symmetric\n\n    # Set default quantization scale\n    self.quantization_scale = self.default_quantization_scale\n\n  def _check_bits(self, bits):\n    \"\"\"Error checking for bits parameter\"\"\"\n    err_msg = f\"Bit count {bits} must be positive\"\n    if bits <= 0:\n      raise ValueError(err_msg)\n\n  def _check_alpha(self, alpha):\n    \"\"\"Error checking for alpha parameter\"\"\"\n\n    if isinstance(alpha, six.string_types):\n      # Check the quantizer has been given a valid alpha string\n      if not alpha in self.ALPHA_STRING_OPTIONS:\n        raise ValueError(\n            f\"Invalid alpha '{alpha}' for auto alpha computation. \"\n            f\"Must be one of {self.ALPHA_STRING_OPTIONS}\")\n    elif alpha is not None: # alpha is a tensor\n      try:\n        # any allowable array type can be cast as a numpy array\n        np.array(alpha)\n      except TypeError:\n        raise TypeError(\n            f\"alpha must be, a string, an array, or None, not {type(alpha)}\")\n\n  @property\n  def bits(self):\n    return self._bits\n\n  @property\n  def integer(self):\n    return self._integer\n\n  @property\n  def keep_negative(self):\n    return self._keep_negative\n\n  @property\n  def use_stochastic_rounding(self):\n    return self._use_stochastic_rounding\n\n  @property\n  def scale_axis(self):\n    return self._scale_axis\n\n  @property\n  def use_variables(self):\n    return self._use_variables\n\n  @property\n  def scale(self):\n    return self.quantization_scale / self.data_type_scale\n\n  @property\n  def data_type_scale(self):\n    \"\"\"Quantization scale for the data type\"\"\"\n    # integer is sometimes cast as int32, so cast to float32 to avoid errors\n    integer = tf.cast(self.integer, tf.float32)\n    return K.pow(2.0, integer - self.bits + self.keep_negative)\n\n  @property\n  def auto_alpha(self):\n    \"\"\"Returns true if using a data-dependent alpha\"\"\"\n\n    return isinstance(self.alpha, six.string_types)\n\n  @property\n  def use_sign_function(self):\n    \"\"\"Return true if using sign function for quantization\"\"\"\n\n    return (self.bits == 1.0) and self.keep_negative\n\n  @property\n  def default_quantization_scale(self):\n    \"\"\"Calculate and set quantization_scale default\"\"\"\n\n    # Set default quantization scale\n    quantization_scale = self.data_type_scale\n\n    # Quantization scale given by alpha\n    if self.alpha is not None and not self.auto_alpha:\n      quantization_scale = self.alpha * self.data_type_scale\n\n    return quantization_scale\n\n  def get_clip_bounds(self):\n    \"\"\"Get bounds of clip range\"\"\"\n\n    if self.use_sign_function:\n      clip_min = K.cast_to_floatx(-0.5)\n      clip_max = K.cast_to_floatx(0.5)\n    else:\n      unsigned_bits_po2 = K.pow(2.0, self.bits - self.keep_negative)\n      # if symmetric, clip_min is negative of clip_max. Otherwise clip_min is\n      # lowered by 1, giving us one more representable number\n      clip_min = self.keep_negative * (-unsigned_bits_po2 + self.symmetric)\n      clip_max = unsigned_bits_po2 - K.cast_to_floatx(1.0)\n\n    return clip_min, clip_max\n\n  def __call__(self, x):\n    \"\"\"Core quantization function\"\"\"\n\n    # Build if not already built\n    self._build()\n\n    # Data type conversion\n    x = K.cast_to_floatx(x)\n    shape = x.shape\n\n    if self.auto_alpha:\n      # get data-dependent quantization scale\n      quantization_scale = self._get_auto_quantization_scale(x)\n    else:\n      # quantization scale determined by quantizer params, not data\n      # see default_quantization_scale property for more info\n      quantization_scale = self.quantization_scale\n\n    scaled_xq = self._scale_clip_and_round(x, quantization_scale)\n    xq = scaled_xq * quantization_scale\n\n    res = x + self.qnoise_factor * (xq - x)\n    res.set_shape(shape)\n\n    return res\n\n  def _scale_clip_and_round(self, x, quantization_scale):\n    \"\"\"Scale, clip, and round x to an integer value in a limited range\n    Note that the internal shift is needed for 1-bit quantization to ensure\n    that a sign function is used. Otherise, the binary quantizer would have\n    three output values\"\"\"\n\n    # special shifting needed to compute a sign function.\n    shift = self.use_sign_function * 0.5\n\n    clip_min, clip_max = self.get_clip_bounds()\n\n    scaled_x = x / quantization_scale\n    clipped_scaled_x = K.clip(scaled_x, clip_min, clip_max)\n    # Round through to nearest integer, using straight-through estimator\n    # for gradient computations.\n    scaled_xq = _round_through(\n      clipped_scaled_x - shift,\n      use_stochastic_rounding=self.use_stochastic_rounding,\n      precision=1.0, # using 1.0 precision so that we round to a nearby integer\n    )\n\n    return scaled_xq + shift\n\n  def _get_auto_quantization_scale(self, x):\n    \"\"\"Get quantization_scale, either from self or from input x\"\"\"\n\n    # Get the minimum floating point scale that does not clip the max of x\n    # This is the quantization scale for alpha=\"auto\"\n    quantization_scale = self._get_quantization_scale_from_max_data(x)\n\n    if self.alpha == \"auto_po2\":\n      quantization_scale = self._po2_autoscale(x, quantization_scale)\n\n    # update quantization_scale variable\n    # stop_gradient on quantization_scale to ignore dependence on x\n    self.quantization_scale = tf.stop_gradient(quantization_scale)\n\n    # very important that return value is a tf.Variable with shape None\n    return self.quantization_scale\n\n  def _get_quantization_scale_from_max_data(self, x):\n    \"\"\"Get the minimum floating point scale that does not clip the max\n    of x\"\"\"\n\n    axis = _get_scaling_axis(self.scale_axis, tf.rank(x))\n\n    clip_min, clip_max = self.get_clip_bounds()\n    clip_range = clip_max - clip_min\n\n    # get quantization scale- depends on whether we are keeping negative\n    # divide by clip range to ensure that we clip right at the max of x\n    if self.keep_negative:\n      data_max = K.max(tf.math.abs(x), axis=axis, keepdims=True)\n      quantization_scale = (data_max * 2) / clip_range\n    else:\n      data_max = K.max(x, axis=axis, keepdims=True)\n      quantization_scale = data_max / clip_range\n\n    return tf.math.maximum(quantization_scale, K.epsilon())\n\n  def _po2_autoscale(self, x, quantization_scale):\n    \"\"\"Get an approximation of the \"best\" po2 scale using least squares\"\"\"\n\n    # set alpha scale to a near power of two\n    quantization_scale = K.pow(\n        2.0,\n        tf.math.round(K.log(quantization_scale + K.epsilon()) / K.log(2.0)))\n\n    def loop_body(_, quantization_scale):\n      \"\"\"Loop body for least squares autoscaling\"\"\"\n\n      scaled_xq = self._scale_clip_and_round(x, quantization_scale)\n      new_quantization_scale = _get_least_squares_scale(\n          alpha=\"auto_po2\",\n          x=x,\n          q=scaled_xq,\n          scale_axis=self.scale_axis,\n      )\n      return quantization_scale, new_quantization_scale\n\n    def loop_cond(last_quantization_scale, quantization_scale):\n      \"\"\"Loop condition for least squares autoscaling- stop when the\n      scale converges\"\"\"\n\n      tensors_not_equal = tf.math.reduce_any(\n          tf.not_equal(last_quantization_scale, quantization_scale))\n      return tensors_not_equal\n\n    # Need a tensor of the same shape as quantization_scale that\n    # does not equal quantization_scale\n    dummy_quantization_scale = -tf.ones_like(quantization_scale)\n\n    # For 1-bit quantization, po2 autoscale loop is guaranteed to converge\n    # after 1 iteration\n    max_iterations = 1 if self.use_sign_function else 5\n\n    _, quantization_scale = tf.while_loop(\n        loop_cond,\n        loop_body,\n        (dummy_quantization_scale, quantization_scale),\n        maximum_iterations=max_iterations,\n    )\n\n    return quantization_scale\n\n  def _build(self):\n    \"\"\"Build if not done so already\"\"\"\n\n    if not self.built:\n      self.build(var_name=self.var_name, use_variables=self.use_variables)\n\n  def max(self):\n    \"\"\"Get maximum value that quantized_linear class can represent.\"\"\"\n    _, clip_max = self.get_clip_bounds()\n    return clip_max * self.quantization_scale\n\n  def min(self):\n    \"\"\"Get minimum value that quantized_linear class can represent.\"\"\"\n    clip_min, _ = self.get_clip_bounds()\n    return clip_min * self.quantization_scale\n\n  def range(self):\n    \"\"\"Returns a list of all values that quantized_linear can represent\n    }.\"\"\"\n\n    if self.use_sign_function:\n      return K.cast_to_floatx([self.max(), self.min()])\n    else:\n      clip_min, clip_max = self.get_clip_bounds()\n      clip_max = tf.cast(clip_max, tf.int32)\n      clip_min = tf.cast(clip_min, tf.int32)\n      pos_array = K.cast_to_floatx(tf.range(clip_max + 1))\n      neg_array = K.cast_to_floatx(tf.range(clip_min, 0))\n\n      return self.quantization_scale * tf.concat([pos_array, neg_array], axis=0)\n\n  def __str__(self):\n\n    # Main parameters always printed in string\n    flags = [\n      str(int(self.bits)),\n      str(int(self.integer)),\n      str(int(self.symmetric))]\n    # Optional parameters only printed if not default\n    if not self.keep_negative:\n      flags.append(\"keep_negative=False\")\n    if self.auto_alpha:\n      alpha = \"'\" + self.alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    elif self.alpha is not None:\n      alpha = np.array(alpha)\n      flags.append(\"alpha=\" + str(alpha))\n    if self.use_stochastic_rounding:\n      flags.append(\"use_stochastic_rounding=\" +\n                   str(int(self.use_stochastic_rounding)))\n    return \"quantized_linear(\" + \",\".join(flags) + \")\"\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n      self.symmetric = True\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n\n    config = {\n        \"bits\": self.bits,\n        \"integer\": self.integer,\n        \"symmetric\": self.symmetric,\n        \"alpha\": self.alpha,\n        \"keep_negative\": self.keep_negative,\n        \"use_stochastic_rounding\": self.use_stochastic_rounding,\n        \"qnoise_factor\": self.qnoise_factor,\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass quantized_bits(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Legacy quantizer: Quantizes the number to a number of bits.\n\n  In general, we want to use a quantization function like:\n\n  a = (pow(2,bits) - 1 - 0) / (max(x) - min(x))\n  b = -min(x) * a\n\n  in the equation:\n\n  xq = a x + b\n\n  This requires multiplication, which is undesirable. So, we\n  enforce weights to be between -1 and 1 (max(x) = 1 and min(x) = -1),\n  and separating the sign from the rest of the number as we make this function\n  symmetric, thus resulting in the following approximation.\n\n  1) max(x) = +1, min(x) = -1\n  2) max(x) = -min(x)\n\n  a = pow(2,bits-1)\n  b = 0\n\n  Finally, just remember that to represent the number with sign, the\n  largest representation is -pow(2,bits) to pow(2, bits-1)\n\n  Symmetric and keep_negative allow us to generate numbers that are symmetric\n  (same number of negative and positive representations), and numbers that\n  are positive.\n\n  Note:\n    the behavior of quantized_bits is different than Catapult HLS ac_fixed\n    or Vivado HLS ap_fixed. For ac_fixed<word_length, integer_lenth, signed>,\n    when signed = true, it is equavlent to\n    quantized_bits(word_length, integer_length-1, keep_negative=True)\n\n  Attributes:\n    bits: number of bits to perform quantization.\n    integer: number of bits to the left of the decimal point.\n    symmetric: if true, we will have the same number of values for positive\n      and negative numbers.\n    alpha: a tensor or None, the scaling factor per channel.\n      If None, the scaling factor is 1 for all channels.\n    keep_negative: if true, we do not clip negative numbers.\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n    scale_axis: int or List[int] which axis/axes to calculate scale from.\n    qnoise_factor: float. a scalar from 0 to 1 that represents the level of\n      quantization noise to add. This controls the amount of the quantization\n      noise to add to the outputs by changing the weighted sum of\n      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.\n    var_name: String or None. A variable name shared between the tf.Variables\n      created in the build function. If None, it is generated automatically.\n    use_ste: Bool. Whether to use \"straight-through estimator\" (STE) method or\n        not.\n    use_variables: Bool. Whether to make the quantizer variables to be dynamic\n      tf.Variables or not.\n    elements_per_scale: if set to an int or List[int], we create multiple scales\n      per axis across scale_axis, where 'elements_per_scale' represents the\n      number of elements/values associated with every separate scale value.\n      It is only supported when using \"auto_po2\".\n    min_po2_exponent: if set while using \"auto_po2\", it represents the minimum\n      allowed power of two exponent.\n    max_po2_exponent: if set while using \"auto_po2\", it represents the maximum\n      allowed power of two exponent.\n    post_training_scale: if set, it represents the scale value to be used for\n      quantization.\n\n  Returns:\n    Function that computes fixed-point quantization with bits.\n  \"\"\"\n\n  def __init__(self,\n               bits=8,\n               integer=0,\n               symmetric=0,\n               keep_negative=True,\n               alpha=None,\n               use_stochastic_rounding=False,\n               scale_axis=None,\n               qnoise_factor=1.0,\n               var_name=None,\n               use_ste=True,\n               use_variables=False,\n               elements_per_scale=None,\n               min_po2_exponent=None,\n               max_po2_exponent=None,\n               post_training_scale=None):\n    super().__init__()\n\n    self.bits = bits\n    self.integer = integer\n    self.symmetric = symmetric\n    self.keep_negative = keep_negative\n    self.alpha = alpha\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.post_training_scale = post_training_scale\n    # \"auto*\" |-> symmetric\n    if isinstance(self.alpha, six.string_types):\n      self.freeze_scale = False\n      self.symmetric = True\n      if post_training_scale is not None:\n        self.scale = np.array(post_training_scale)\n        self.freeze_scale = True\n    else:\n      if post_training_scale is not None:\n        raise ValueError(f\"alpha={alpha} doesn't support post_training_scale: \"\n                         f\"{post_training_scale}\")\n      self.scale = None\n      # If alpha is not \"auto*\", then scale is fixed and not trainable.\n      self.freeze_scale = True\n\n    self.scale_axis = scale_axis\n    self.qnoise_factor = qnoise_factor\n    self.use_ste = use_ste\n    self.var_name = var_name\n    self.use_variables = use_variables\n    self.elements_per_scale = elements_per_scale\n    self.min_po2_exponent = min_po2_exponent\n    self.max_po2_exponent = max_po2_exponent\n\n  def __str__(self):\n    # Convert Tensors to printable strings by converting to a numpy array and\n    # then using regex to remove brackets when there is only one integer bit\n    integer_bits = re.sub(\n        r\"\\[(\\d)\\]\", r\"\\g<1>\",\n        str(self.integer.numpy() if isinstance(self.integer, tf.Variable\n                                              ) else self.integer))\n\n    flags = [str(self.bits), integer_bits, str(int(self.symmetric))]\n    if not self.keep_negative:\n      flags.append(\"keep_negative=False\")\n    if self.alpha:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.use_stochastic_rounding:\n      flags.append(\"use_stochastic_rounding=\" +\n                   str(int(self.use_stochastic_rounding)))\n    return \"quantized_bits(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    \"\"\"Computes fixedpoint quantization of x.\"\"\"\n    if not self.built:\n      self.build(var_name=self.var_name, use_variables=self.use_variables)\n\n    x = K.cast_to_floatx(x)\n\n    # quantized_bits with \"1\" bit becomes a binary implementation.\n    unsigned_bits = self.bits - self.keep_negative\n    # In pow function, use float datatype instead of integer, so that\n    # K.pow() results will use float32 instead of int32 as the default datatype.\n    # float32 has a much larger value range (2^128) than int32 (2^32), this is\n    # particularly important when quantizing very large values, and when integer\n    # bits are set much larger than total bits.\n    m = K.pow(2.0, K.cast_to_floatx(unsigned_bits))\n    m_i = K.pow(2.0, K.cast_to_floatx(self.integer))\n\n    # Verify that \"elements_per_scale\", \"min_po2_exponent\",\n    # and \"max_po2_exponent\" are only set when alpha is \"auto_po2\"\n    if self.alpha != \"auto_po2\":\n      assert (\n          self.elements_per_scale is None\n      ), \"elements_per_scale is only supported when using auto_po2\"\n      assert (\n          self.min_po2_exponent is None\n      ), \"min_po2_exponent is only supported when using auto_po2\"\n      assert (\n          self.max_po2_exponent is None\n      ), \"max_po2_exponent is only supported when using auto_po2\"\n\n    if self.alpha is None:\n      scale = 1.0\n    elif isinstance(self.alpha, six.string_types):\n      # We only deal with the symmetric case right now.\n      assert self.symmetric, \"Only symmetric quantizers are implemented\"\n      len_axis = len(x.shape)\n      if len_axis > 1:\n        axis = _get_scaling_axis(self.scale_axis, len_axis)\n      else:\n        axis = [0]\n\n      x = x / m_i\n\n      # Using 2's complement, we can represent 2**(bits-1)-1 positive values\n      # If we wish to maintain symmetry, we can double 2**(bits-1)-1 to get\n      # the total number of possible values we can represent.\n      # If symmetry is not enforced, then we can represent (2**bits)-1 values\n      # using 2's complement.\n      levels = (2**(self.bits-1)-1) * 2 if self.symmetric else (2**self.bits)-1\n\n      if self.freeze_scale:\n        # Scale is fixed value. In this case, scale is extracted from the\n        # post-training quantizater scale. In order to retrain models with\n        # this scale value, we need to divide it by m to make it in the same\n        # value scale as x.\n        scale = self.scale / m\n      else:\n        # Calculate the scale.\n        scale = (K.max(abs(x), axis=axis, keepdims=True) * 2) / levels\n\n        # If alpha is \"auto_po2\", then get the \"best\" po2 scale\n        if \"po2\" in self.alpha:\n          scale = K.pow(2.0,\n                        tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)))\n          for idx in range(5):\n            v = tf.floor(tf.abs(x) / scale + 0.5)\n            mask = v < levels / 2\n            z = tf.sign(x) * tf.where(mask, v, tf.ones_like(v) * levels / 2)\n            scale = _get_least_squares_scale(\n                alpha=\"auto_po2\", x=x, q=z,\n                scale_axis=self.scale_axis,\n                elements_per_scale=self.elements_per_scale,\n                min_po2_exponent=self.min_po2_exponent,\n                max_po2_exponent=self.max_po2_exponent)\n\n        elif self.alpha != \"auto\":\n          # If alpha is \"auto\", then directly uuse the \"best\"\n          # floating point scale.\n          raise ValueError(f\"Invalid alpha '{self.alpha}'\")\n\n      # Even for trainable scale, we still need to quantize x with the best\n      # scale. This extra step is needed to ensure that with the same input\n      # and scale, the quantized output is identical between training and\n      # inference.\n      v = tf.floor(tf.abs(x) / scale + 0.5)\n      mask = v < levels / 2\n      z = tf.sign(x) * tf.where(mask, v, tf.ones_like(v) * levels / 2)\n\n      # z is an integer number, so we must make the scale * m and z / m\n      scale = scale * m\n\n      # we will not use \"z\" right now because of stochastic_rounding\n      # this is still under test.\n\n      # if \"new\" in self.alpha:\n      #  z = z / m\n      #  self.scale = scale\n      #  return x + tf.stop_gradient(-x + scale * z)\n      x = m_i * x\n      xq = m_i * z / m\n      if not self.freeze_scale:\n        self.scale = scale\n      xq = scale * xq\n\n      if self.use_ste:\n        return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))\n      else:\n        return (1 - self.qnoise_factor) * x + tf.stop_gradient(\n            self.qnoise_factor * xq)\n\n    else:\n      scale = self.alpha\n\n    # quantized_bits with \"1\" bit becomes a binary implementation.\n    if unsigned_bits > 0:\n      p = x * m / m_i\n      xq = m_i * tf.keras.backend.clip(\n          _round_through(p, self.use_stochastic_rounding, precision=1.0),\n          self.keep_negative  * (-m + self.symmetric), m - 1) / m\n    else:\n      xq = tf.sign(x)\n      xq += (1.0 - tf.abs(xq))\n      if not self.keep_negative:\n        xq = (xq + 1.0) / 2.0\n\n    self.scale = scale\n    xq = scale * xq\n\n    if self.use_ste:\n      return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))\n    else:\n      return (1 - self.qnoise_factor) * x + tf.stop_gradient(\n          self.qnoise_factor * xq)\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n      self.freeze_scale = False\n      self.symmetric = True\n\n  def max(self):\n    \"\"\"Get maximum value that quantized_bits class can represent.\"\"\"\n    unsigned_bits = self.bits - self.keep_negative\n    if unsigned_bits > 0:\n      return max(\n          1.0,\n          np.array(\n              K.pow(2., K.cast(self.integer, dtype=\"float32\")),\n              dtype=\"float32\"))\n    else:\n      return 1.0\n\n  def min(self):\n    \"\"\"Get minimum value that quantized_bits class can represent.\"\"\"\n    if not self.keep_negative:\n      return 0.0\n    unsigned_bits = self.bits - self.keep_negative\n    if unsigned_bits > 0:\n      return -max(\n          1.0,\n          np.array(\n              K.pow(2, K.cast(self.integer, dtype=\"float32\")), dtype=\"float32\"))\n    else:\n      return -1.0\n\n  def range(self):\n    \"\"\"Returns a list of all values that quantized_bits can represent\n    ordered by their binary representation ascending.\"\"\"\n    assert self.symmetric == 0\n    assert self.keep_negative\n    assert self.alpha is None or self.alpha == 1.0\n\n    x = np.asarray(range(2**self.bits), dtype=np.float32)\n    p_and_n = np.where(x >= 2**(self.bits - 1),\n                       (x - 2**(self.bits - 1)) - 2**(self.bits - 1), x)\n    return p_and_n * np.array(\n        K.pow(2.0, -self.bits + K.cast(self.integer, dtype=\"float32\") + 1),\n        dtype=\"float32\")\n\n  @classmethod\n  def from_config(cls, config):\n    # Convert JSON-serializable lists back to NumPy arrays.\n    if config.get(\"post_training_scale\") is not None:\n      config[\"post_training_scale\"] = np.array(config[\"post_training_scale\"])\n\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"bits\":\n            self.bits,\n        \"integer\":\n            self.integer.numpy()\n            if isinstance(self.integer, tf.Variable) else self.integer,\n        \"symmetric\":\n            self.symmetric,\n        \"alpha\":\n            self.alpha,\n        \"keep_negative\":\n            self.keep_negative,\n        \"use_stochastic_rounding\":\n            self.use_stochastic_rounding,\n        \"qnoise_factor\":\n            self.qnoise_factor.numpy() if isinstance(\n                self.qnoise_factor, tf.Variable) else self.qnoise_factor,\n        \"post_training_scale\":\n            # Since NumPy arrays are not directly JSON-serializable,\n            # we convert them to lists.\n            (self.post_training_scale.tolist() if self.post_training_scale is\n             not None else None)\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass bernoulli(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes a Bernoulli sample with probability sigmoid(x).\n\n  This computation uses ST approximation.\n\n  To do that, we compute sigmoid(x) and a random sample z ~ U[0,1]. As\n  p in [0,1] and z in [0,1], p - z in [-1,1]. However, -1 will\n  never appear because to get -1 we would need sigmoid(-inf) - z == 1.\n  As a result, the range will be in practical terms [0,1].\n\n  The noise introduced by z can be seen as a regularizer to the weights W of\n  y = Wx as y = Wx + Wz for some noise z with mean mu(z) and var(z). As a\n  result, W**2 var(z) to the variance of y, which has the same effect as a\n  regularizer on L2 with lambda = var(z), as presented in Hinton\"s Coursera\n  Lecture 9c.\n\n  Remember that E[dL/dy] = E[dL/dx] once we add stochastic sampling.\n\n  Attributes:\n    alpha: allows one to specify multiplicative factor for number generation\n      of \"auto\" or \"auto_po2\".\n    temperature: amplifier factor for sigmoid function, making stochastic\n      less stochastic as it moves away from 0.\n    use_real_sigmoid: use real sigmoid for probability.\n\n  Returns:\n    Computation of round with stochastic sampling with straight through\n    gradient.\n  \"\"\"\n\n  def __init__(self, alpha=None, temperature=6.0, use_real_sigmoid=True):\n    super().__init__()\n    self.alpha = alpha\n    self.bits = 1\n    self.temperature = temperature\n    self.use_real_sigmoid = use_real_sigmoid\n    self.default_alpha = 1.0\n    self.scale = None\n\n  def __str__(self):\n    flags = []\n    if self.alpha is not None:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.temperature != 6.0:\n      flags.append(\"temperature=\" + str(self.temperature))\n    if not self.use_real_sigmoid:\n      flags.append(\"use_real_sigmoid=\" + str(int(self.use_real_sigmoid)))\n    return \"bernoulli(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    if isinstance(self.alpha, six.string_types):\n      assert self.alpha in [\"auto\", \"auto_po2\"]\n\n    if isinstance(self.alpha, six.string_types):\n      len_axis = len(x.shape)\n\n      if len_axis > 1:\n        if K.image_data_format() == \"channels_last\":\n          axis = list(range(len_axis - 1))\n        else:\n          axis = list(range(1, len_axis))\n      else:\n        axis = [0]\n\n      std = K.std(x, axis=axis, keepdims=True) + K.epsilon()\n    else:\n      std = 1.0\n\n    if self.use_real_sigmoid:\n      p = tf.keras.backend.sigmoid(self.temperature * x / std)\n    else:\n      p = _sigmoid(self.temperature * x/std)\n    r = tf.random.uniform(tf.shape(x))\n    q = tf.sign(p - r)\n    q += (1.0 - tf.abs(q))\n    q = (q + 1.0) / 2.0\n\n    q_non_stochastic = tf.sign(x)\n    q_non_stochastic += (1.0 - tf.abs(q_non_stochastic))\n    q_non_stochastic = (q_non_stochastic + 1.0) / 2.0\n\n    # if we use non stochastic binary to compute alpha,\n    # this function seems to behave better\n    scale = _get_least_squares_scale(self.alpha, x, q_non_stochastic)\n    self.scale = scale\n    return x + tf.stop_gradient(-x + scale * q)\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n\n  def max(self):\n    \"\"\"Get the maximum value bernoulli class can represent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return 1.0\n    else:\n      return max(1.0, self.alpha)\n\n  def min(self):\n    \"\"\"Get the minimum value bernoulli class can represent.\"\"\"\n    return 0.0\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\"alpha\": self.alpha}\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass ternary(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes an activation function returning -alpha, 0 or +alpha.\n\n  Right now we assume two type of behavior. For parameters, we should\n  have alpha, threshold and stochastic rounding on. For activations,\n  alpha and threshold should be floating point numbers, and stochastic\n  rounding should be off.\n\n  Attributes:\n    x: tensor to perform sign opertion with stochastic sampling.\n    bits: number of bits to perform quantization.\n    alpha: ternary is -alpha or +alpha. Alpha can be \"auto\" or \"auto_po2\".\n    threshold: threshold to apply \"dropout\" or dead band (0 value). If \"auto\"\n      is specified, we will compute it per output layer.\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n\n  Returns:\n    Computation of sign within the threshold.\n  \"\"\"\n\n  def __init__(self, alpha=None, threshold=None, use_stochastic_rounding=False,\n               number_of_unrolls=5):\n    super().__init__()\n    self.bits = 2\n    self.alpha = alpha\n    self.threshold = threshold\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.default_alpha = 1.0\n    self.default_threshold = 0.33\n    self.number_of_unrolls = number_of_unrolls\n    self.scale = None\n\n  def __str__(self):\n    flags = []\n    if self.alpha is not None:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.threshold is not None:\n      flags.append(\"threshold=\" + str(self.threshold))\n    if self.use_stochastic_rounding:\n      flags.append(\n          \"use_stochastic_rounding=\" + str(int(self.use_stochastic_rounding)))\n    if self.number_of_unrolls != 5:\n      flags.append(\n          \"number_of_unrolls=\" + str(int(self.number_of_unrolls)))\n    return \"ternary(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    if isinstance(self.alpha, six.string_types):\n      # parameters\n      assert self.alpha in [\"auto\", \"auto_po2\"]\n      assert self.threshold is None\n    else:\n      # activations\n      assert not self.use_stochastic_rounding\n      assert not isinstance(self.threshold, six.string_types)\n\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      scale = 1.0\n    elif isinstance(self.alpha, np.ndarray):\n      scale = self.alpha\n    else:\n      scale = float(self.alpha)\n\n    # This is an approximiation from https://arxiv.org/abs/1605.04711\n    # We consider channels_last only for now.\n    if isinstance(self.alpha, six.string_types):\n      # It is for parameters\n      # first, compute which asix corresponds to the channels.\n      # TODO(b/237833510): support channels_first\n      try:\n        len_axis = len(x.shape.as_list())\n      except AttributeError:\n        len_axis = len(list(x.shape))\n\n      if len_axis == 1:\n        axis = None\n      elif K.image_data_format() == \"channels_last\":\n        axis = list(range(len_axis - 1))\n      else:\n        axis = list(range(1, len_axis))\n\n      # This approximation is exact if x ~ U[-m, m]. For x ~ N(0, m)\n      # we need to iterate a few times before we can coverge\n      m = K.max(tf.abs(x), axis=axis, keepdims=True)\n      scale = 2 * m / 3.0\n      if \"po2\" in self.alpha:\n        scale = K.pow(2.0,\n                      tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)))\n\n      for _ in range(self.number_of_unrolls):\n        thres = scale / 2.0\n        # once we scale the number precision == 0.33 works\n        # well for Uniform and Normal distribution of input\n        v = scale * _round_through(\n            x / scale,\n            use_stochastic_rounding=self.use_stochastic_rounding,\n            precision=1. / 3.)\n        q = K.cast(tf.abs(v) >= thres, K.floatx()) * tf.sign(x)\n        scale = _get_least_squares_scale(self.alpha, x, q)\n    else:\n      if self.threshold is None:\n        thres = self.default_threshold\n      else:\n        thres = self.threshold\n      q = K.cast(tf.abs(x) >= thres, K.floatx()) * tf.sign(x)\n\n    # ternary ranges from -1 to +1, so we use tanh(x) to be a differentiable\n    # version of that.\n    if self.alpha is None:\n      x = K.tanh(x)\n\n    self.scale = scale\n    return x + tf.stop_gradient(-x + scale * q)\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n\n  def max(self):\n    \"\"\"Get the maximum value that ternary can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return 1.0\n    else:\n      return max(1.0, self.alpha)\n\n  def min(self):\n    \"\"\"Get the minimum value that ternary can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return -1.0\n    else:\n      return -max(1.0, self.alpha)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"alpha\": self.alpha,\n        \"threshold\": self.threshold,\n        \"use_stochastic_rounding\": self.use_stochastic_rounding,\n        \"number_of_unrolls\": self.number_of_unrolls\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass stochastic_ternary(ternary):  # pylint: disable=invalid-name\n  \"\"\"Computes a stochastic activation function returning -alpha, 0 or +alpha.\n\n  Computes straight-through approximation using random sampling to make\n  E[dL/dy] = E[dL/dx], and computing the sign function. See explanation above.\n\n  Attributes:\n    x: tensor to perform sign opertion with stochastic sampling.\n    bits: number of bits to perform quantization.\n    alpha: ternary is -alpha or +alpha, or \"auto\" or \"auto_po2\".\n    threshold: (1-threshold) specifies the spread of the +1 and -1 values.\n    temperature: amplifier factor for sigmoid function, making stochastic less\n      stochastic as it moves away from 0.\n    use_real_sigmoid: use real sigmoid for probability.\n    number_of_unrolls: number of times we iterate between scale and threshold.\n\n  Returns:\n    Computation of sign with stochastic sampling with straight through gradient.\n  \"\"\"\n\n  def __init__(\n      self,\n      alpha=None,\n      threshold=None,\n      temperature=8.0,\n      use_real_sigmoid=True,\n      number_of_unrolls=5,\n  ):\n    super().__init__(\n        alpha=alpha, threshold=threshold, number_of_unrolls=number_of_unrolls\n    )\n\n    self.bits = 2\n    self.alpha = alpha\n    self.threshold = threshold\n    assert threshold != 1.0\n    self.default_alpha = 1.0\n    self.default_threshold = 0.33\n    self.temperature = temperature\n    self.use_real_sigmoid = use_real_sigmoid\n    self.number_of_unrolls = number_of_unrolls\n    self.scale = None\n\n  def __str__(self):\n    flags = []\n    if self.alpha is not None:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.threshold is not None:\n      flags.append(\"threshold=\" + str(self.threshold))\n    if self.temperature != 8.0:\n      flags.append(\"temperature=\" + str(self.temperature))\n    if not self.use_real_sigmoid:\n      flags.append(\"use_real_sigmoid=0\")\n    if self.number_of_unrolls != 5:\n      flags.append(\"number_of_unrolls=\" + str(self.number_of_unrolls))\n    return \"stochastic_ternary(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    def stochastic_output():\n      # right now we only accept alpha = \"auto\" or \"auto_po2\"\n\n      assert isinstance(self.alpha, six.string_types)\n      assert self.alpha in [\"auto\", \"auto_po2\"]\n\n      if self.alpha is None:\n        scale = self.default_alpha\n      elif isinstance(self.alpha, six.string_types):\n        scale = 1.0\n        assert self.alpha in [\"auto\", \"auto_po2\"]\n      else:\n        assert self.alpha >= 0.0\n        scale = float(self.alpha)\n\n      len_axis = len(x.shape)\n      if len_axis > 1:\n        if K.image_data_format() == \"channels_last\":\n          axis = list(range(len_axis - 1))\n        else:\n          axis = list(range(1, len_axis))\n      else:\n        axis = [0]\n\n      x_std = K.std(x, axis=axis, keepdims=True)\n\n      m = K.max(tf.abs(x), axis=axis, keepdims=True)\n      scale = 2.0 * m / 3.0\n      if self.alpha == \"auto_po2\":\n        scale = K.pow(\n            2.0, tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0))\n        )\n      for _ in range(self.number_of_unrolls):\n        T = scale / 2.0\n        q_ns = K.cast(tf.abs(x) >= T, K.floatx()) * K.sign(x)\n        scale = _get_least_squares_scale(self.alpha, x, q_ns)\n\n      x_norm = x / (x_std + K.epsilon())\n      T = scale / (2.0 * (x_std + K.epsilon()))\n\n      if self.use_real_sigmoid:\n        p0 = tf.keras.backend.sigmoid(self.temperature * (x_norm - T))\n        p1 = tf.keras.backend.sigmoid(self.temperature * (x_norm + T))\n      else:\n        p0 = _sigmoid(self.temperature * (x_norm - T))\n        p1 = _sigmoid(self.temperature * (x_norm + T))\n      r0 = tf.random.uniform(tf.shape(p0))\n      r1 = tf.random.uniform(tf.shape(p1))\n      q0 = tf.sign(p0 - r0)\n      q0 += 1.0 - tf.abs(q0)\n      q1 = tf.sign(p1 - r1)\n      q1 += 1.0 - tf.abs(q1)\n\n      q = (q0 + q1) / 2.0\n      self.scale = scale\n      return x + tf.stop_gradient(-x + scale * q)\n\n    output = tf_utils.smart_cond(\n        K.learning_phase(), stochastic_output, lambda: ternary.__call__(self, x)\n    )\n    return output\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n\n  def max(self):\n    \"\"\"Get the maximum value that stochastic_ternary can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return 1.0\n    else:\n      return max(1.0, self.alpha)\n\n  def min(self):\n    \"\"\"Get the minimum value that stochastic_ternary can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return -1.0\n    else:\n      return -max(1.0, self.alpha)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"alpha\": self.alpha,\n        \"threshold\": self.threshold,\n        \"temperature\": self.temperature,\n        \"use_real_sigmoid\": self.use_real_sigmoid,\n        \"number_of_unrolls\": self.number_of_unrolls,\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass binary(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes the sign(x) returning a value between -alpha and alpha.\n\n  Although we cannot guarantee E[dL/dy] = E[dL/dx] if we do not use the\n  stochastic sampling, we still use the ST approximation.\n\n  Modified from original binary to match QNN implementation.\n\n  The binary qunatizer supports multiple-scales per tensor where:\n  - alpha: It can be set to \"auto\" or \"auto_po2\" to enable auto-scaling. \"auto\"\n           allows arbitrary scale while \"auto_po2\" allows power-of-two scales\n           only. It can also be set to a fixed value or None (i.e., no scaling).\n  - scale_axis: It determines the axis/axes to calculate the auto-scale at.\n  - elements_per_scale: It enables fine-grained scaling where it determines\n    the number of elements across scale axis/axes that should be grouped into\n    one scale.\n\n  Examples:\n\n  1. Input shape = [1, 8, 8, 16] alpha=\"auto\", scale_axis=None,\n     elements_per_scale=None --> Number of separate scales = 16\n\n  2. Input shape = [1, 8, 8, 16] alpha=\"auto\", scale_axis=1,\n     elements_per_scale=None --> Number of separate scales = 8\n\n  3. Input shape = [1, 8, 8, 16] alpha=\"auto\", scale_axis=1,\n     elements_per_scale=2 --> Number of separate scales = 4\n\n  4. Input shape = [1, 8, 8, 16] alpha=\"auto\", scale_axis=[2, 3],\n     elements_per_scale=2 --> Number of separate scales = 4*8 = 32\n\n  5. Input shape = [1, 8, 8, 16] alpha=\"auto\", scale_axis=[2, 3],\n     elements_per_scale=[2, 4] --> Number of separate scales = 4*4 = 16\n\n  Attributes:\n    x: tensor to perform sign_through.\n    bits: number of bits to perform quantization.\n    use_01: if True, return {0,1} instead of {-1,+1}.\n    alpha: binary is -alpha or +alpha, or \"auto\", \"auto_po2\" to compute\n      automatically.\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n    elements_per_scale: if set to an int or List[int], we create multiple scales\n      per axis across scale_axis, where 'elements_per_scale' represents the\n      number of elements/values associated with every separate scale value.\n    scale_axis: int or List[int] which axis/axes to calculate scale from.\n    min_po2_exponent: if set while using \"auto_po2\", it represents the minimum\n      allowed power of two exponent.\n    max_po2_exponent: if set while using \"auto_po2\", it represents the maximum\n      allowed power of two exponent.\n\n  Returns:\n    Computation of sign operation with straight through gradient.\n  \"\"\"\n\n  def __init__(self, use_01=False, alpha=None, use_stochastic_rounding=False,\n               scale_axis=None, elements_per_scale=None, min_po2_exponent=None,\n               max_po2_exponent=None):\n    super().__init__()\n    self.use_01 = use_01\n    self.bits = 1\n    self.alpha = alpha\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.default_alpha = 1.0\n    self.scale = None\n    self.scale_axis = scale_axis\n    self.elements_per_scale = elements_per_scale\n    self.min_po2_exponent = min_po2_exponent\n    self.max_po2_exponent = max_po2_exponent\n\n  def __str__(self):\n    def list_to_str(l):\n      return \",\".join([str(x) for x in l])\n\n    flags = []\n    if self.use_01:\n      flags.append(\"use_01=\" + str(int(self.use_01)))\n    if self.alpha is not None:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.elements_per_scale is not None:\n      if isinstance(self.elements_per_scale, list):\n        flags.append(\"elements_per_scale=[\" +\n                     list_to_str(self.elements_per_scale) + \"]\")\n      else:\n        flags.append(\"elements_per_scale=\" + str(self.elements_per_scale))\n    if self.scale_axis is not None:\n      if isinstance(self.scale_axis, list):\n        flags.append(\"scale_axis=[\" + list_to_str(self.scale_axis) + \"]\")\n      else:\n        flags.append(\"scale_axis=\" + str(self.scale_axis))\n    if self.min_po2_exponent is not None:\n      flags.append(\"min_po2_exponent=\" + str(self.min_po2_exponent))\n    if self.max_po2_exponent is not None:\n      flags.append(\"max_po2_exponent=\" + str(self.max_po2_exponent))\n    if self.use_stochastic_rounding:\n      flags.append(\n          \"use_stochastic_rounding=\" + str(self.use_stochastic_rounding))\n    return \"binary(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    if isinstance(self.alpha, six.string_types):\n      assert self.alpha in [\"auto\", \"auto_po2\"]\n    if self.alpha is None:\n      scale = self.default_alpha\n    elif isinstance(self.alpha, six.string_types):\n      scale = 1.0\n    elif isinstance(self.alpha, np.ndarray):\n      scale = self.alpha\n    else:\n      scale = float(self.alpha)\n\n    if self.use_stochastic_rounding:\n      try:\n        len_axis = len(x.shape.as_list())\n      except AttributeError:\n        len_axis = len(list(x.shape))\n      if len_axis == 1:\n        axis = None\n      elif K.image_data_format() == \"channels_last\":\n        axis = list(range(len_axis - 1))\n      else:\n        axis = list(range(1, len_axis))\n\n      # if stochastic_round is through, we need to scale\n      # number so that the precision is small enough.\n      # This is especially important if range of x is very\n      # small, which occurs during initialization of weights.\n      m = K.max(tf.abs(x), axis=axis, keepdims=True)\n      m = tf.where(m > 1.0, tf.ones_like(m), m)\n      f = 2 * m\n\n      x = tf_utils.smart_cond(\n          K.learning_phase(),\n          lambda: f * _round_through(\n              x / f, use_stochastic_rounding=True, precision=0.125),\n          lambda: x)\n\n    k_sign = tf.sign(x)\n    if self.use_stochastic_rounding:\n      # in inference, we use a biased \"1\" for stochastic rounding right now\n      k_sign += (1.0 - tf.abs(k_sign)) * tf_utils.smart_cond(\n          K.learning_phase(),\n          lambda: 2.0 * tf.round(tf.random.uniform(tf.shape(x))) - 1.0,\n          lambda: tf.ones_like(tf.shape(x), dtype=K.floatx()))\n      # if something still remains, just make it positive for now.\n    k_sign += (1.0 - tf.abs(k_sign))\n    if self.use_01:\n      k_sign = (k_sign + 1.0) / 2.0\n\n    # approximate binary by tanh(x) as it has limited range between -1 and +1.\n    if self.alpha is None:\n      x = K.tanh(x)\n\n    self.scale = _get_least_squares_scale(\n        self.alpha,\n        x,\n        k_sign,\n        elements_per_scale=self.elements_per_scale,\n        scale_axis=self.scale_axis,\n        min_po2_exponent=self.min_po2_exponent,\n        max_po2_exponent=self.max_po2_exponent,\n    )\n    return x + tf.stop_gradient(-x + self.scale * k_sign)\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n\n  def max(self):\n    \"\"\"Get maximum value that binary class can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return 1.0\n    else:\n      return max(1.0, self.alpha)\n\n  def min(self):\n    \"\"\"Get minimum value that binary class can respresent.\"\"\"\n    if self.use_01:\n      return 0.0\n    elif self.alpha is None or isinstance(self.alpha, six.string_types):\n      return -1.0\n    else:\n      return -max(1.0, self.alpha)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"use_01\": self.use_01,\n        \"alpha\": self.alpha,\n        \"use_stochastic_rounding\": self.use_stochastic_rounding\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass stochastic_binary(binary):  # pylint: disable=invalid-name\n  \"\"\"Computes a stochastic activation function returning -alpha or +alpha.\n\n  Computes straight-through approximation using random sampling to make\n  E[dL/dy] = E[dL/dx], and computing the sign function. See explanation above.\n\n  Attributes:\n    x: tensor to perform sign opertion with stochastic sampling.\n    alpha: binary is -alpha or +alpha, or \"auto\" or \"auto_po2\".\n    bits: number of bits to perform quantization.\n    temperature: amplifier factor for sigmoid function, making stochastic\n      behavior less stochastic as it moves away from 0.\n    use_real_sigmoid: use real sigmoid from tensorflow for probablity.\n\n  Returns:\n    Computation of sign with stochastic sampling with straight through gradient.\n  \"\"\"\n\n  def __init__(self, alpha=None, temperature=6.0, use_real_sigmoid=True):\n    super().__init__(alpha=alpha)\n    self.alpha = alpha\n    self.bits = 1\n    self.temperature = temperature\n    self.use_real_sigmoid = use_real_sigmoid\n    self.default_alpha = 1.0\n    self.scale = None\n\n  def __str__(self):\n    flags = []\n    if self.alpha is not None:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.temperature != 6.0:\n      flags.append(\"temperature=\" + str(self.temperature))\n    if not self.use_real_sigmoid:\n      flags.append(\"use_real_sigmoid=\" + str(int(self.use_real_sigmoid)))\n    return \"stochastic_binary(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    def stochastic_output():\n      if isinstance(self.alpha, six.string_types):\n        assert self.alpha in [\"auto\", \"auto_po2\"]\n        len_axis = len(x.shape)\n        if len_axis > 1:\n          if K.image_data_format() == \"channels_last\":\n            axis = list(range(len_axis - 1))\n          else:\n            axis = list(range(1, len_axis))\n        else:\n          axis = [0]\n        std = K.std(x, axis=axis, keepdims=True) + K.epsilon()\n      else:\n        std = 1.0\n\n      if self.use_real_sigmoid:\n        p = tf.keras.backend.sigmoid(self.temperature * x / std)\n      else:\n        p = _sigmoid(self.temperature * x / std)\n\n      r = tf.random.uniform(tf.shape(x))\n      q = tf.sign(p - r)\n      q += 1.0 - tf.abs(q)\n      q_non_stochastic = tf.sign(x)\n      q_non_stochastic += 1.0 - tf.abs(q_non_stochastic)\n      scale = _get_least_squares_scale(self.alpha, x, q_non_stochastic)\n      self.scale = scale\n      return x + tf.stop_gradient(-x + scale * q)\n\n    output = tf_utils.smart_cond(\n        K.learning_phase(), stochastic_output, lambda: binary.__call__(self, x)\n    )\n    return output\n\n  def _set_trainable_parameter(self):\n    if self.alpha is None:\n      self.alpha = \"auto_po2\"\n\n  def max(self):\n    \"\"\"Get the maximum value that stochastic_binary can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return 1.0\n    else:\n      return max(1.0, self.alpha)\n\n  def min(self):\n    \"\"\"Get the minimum value that stochastic_binary can respresent.\"\"\"\n    if self.alpha is None or isinstance(self.alpha, six.string_types):\n      return -1.0\n    else:\n      return -max(1.0, self.alpha)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"alpha\": self.alpha,\n        \"temperature\": self.temperature,\n        \"use_real_sigmoid\": self.use_real_sigmoid,\n    }\n    return config\n\n\n@tf.function(jit_compile=True)\ndef fast_relu_quantize(p, m_i, factor):\n  return m_i * tf.clip_by_value(tf.round(p) * factor, 0.0, 1.0 - factor)\n\n\n@quantizer_registry.register_quantizer\nclass quantized_relu(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes a quantized relu to a number of bits.\n\n  Modified from:\n\n  [https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow]\n\n  Assume h(x) = +1 with p = sigmoid(x), -1 otherwise, the expected value of\n  h(x) is:\n\n  E[h(x)] = +1 P(p <= sigmoid(x)) - 1 P(p > sigmoid(x))\n          = +1 P(p <= sigmoid(x)) - 1 ( 1 - P(p <= sigmoid(x)) )\n          = 2 P(p <= sigmoid(x)) - 1\n          = 2 sigmoid(x) - 1, if p is sampled from a uniform distribution U[0,1]\n\n  If use_sigmoid is 0, we just keep the positive numbers up to\n  2**integer * (1 - 2**(-bits)) instead of normalizing them, which is easier\n  to implement in hardware.\n\n  Attributes:\n    bits: number of bits to perform quantization.\n    integer: number of bits to the left of the decimal point.\n    use_sigmoid: if true, we apply sigmoid to input to normalize it.\n    negative_slope: slope when activation < 0, needs to be power of 2.\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n    relu_upper_bound: A float representing an upper bound of the unquantized\n      relu. If None, we apply relu without the upper bound when\n      \"is_quantized_clip\" is set to false (true by default).\n      Note: The quantized relu uses the quantization parameters (bits and\n      integer) to upper bound. So it is important to set relu_upper_bound\n      appropriately to the quantization parameters. \"is_quantized_clip\"\n      has precedence over \"relu_upper_bound\" for backward compatibility.\n    is_quantized_clip: A boolean representing whether the inputs are clipped to\n      the maximum value represented by the quantization parameters. This\n      parameter is deprecated, and the default is set to True for backwards\n      compatibility. Users are encouraged to use \"relu_upper_bound\" instead.\n    qnoise_factor: float. a scalar from 0 to 1 that represents the level of\n      quantization noise to add. This controls the amount of the quantization\n      noise to add to the outputs by changing the weighted sum of\n      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.\n    var_name: String or None. A variable name shared between the tf.Variables\n      created in the build function. If None, it is generated automatically.\n    use_ste: Bool. Whether to use \"straight-through estimator\" (STE) method or\n        not.\n    use_variables: Bool. Whether to make the quantizer variables to be dynamic\n      tf.Variables or not.\n\n  Returns:\n    Function that performs relu + quantization to bits >= 0.\n  \"\"\"\n\n  def __init__(self,\n               bits=8,\n               integer=0,\n               use_sigmoid=0,\n               negative_slope=0.0,\n               use_stochastic_rounding=False,\n               relu_upper_bound=None,\n               is_quantized_clip=True,\n               qnoise_factor=1.0,\n               var_name=None,\n               use_ste=True,\n               use_variables=False,\n               enable_fast_inference=False):\n    super().__init__()\n    self.bits = bits\n    self.integer = integer\n    self.use_sigmoid = use_sigmoid\n    self.negative_slope = negative_slope\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.relu_upper_bound = relu_upper_bound\n    self.is_quantized_clip = is_quantized_clip\n    self.qnoise_factor = qnoise_factor\n    self.use_ste = use_ste\n    assert negative_slope >= 0.0\n    if negative_slope != 0.0:\n      assert np.mod(np.log2(negative_slope), 1) == 0\n    self.var_name = var_name\n    self.use_variables = use_variables\n    self.enable_fast_inference = enable_fast_inference\n\n  def __str__(self):\n    # Converts Tensors to printable strings by converting to a numpy array and\n    # then using regex to remove brackets when there is only one integer bit\n    integer_bits = re.sub(\n        r\"\\[(\\d)\\]\", r\"\\g<1>\",\n        str(self.integer.numpy() if isinstance(self.integer, tf.Variable\n                                              ) else self.integer))\n\n    flags = [str(self.bits), integer_bits]\n    if self.use_sigmoid or self.use_stochastic_rounding:\n      flags.append(str(int(self.use_sigmoid)))\n    if self.negative_slope:\n      flags.append(str(self.negative_slope))\n    if self.use_stochastic_rounding:\n      flags.append(str(int(self.use_stochastic_rounding)))\n    return \"quantized_relu(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    if self.enable_fast_inference:\n      # This is the fast inference version of the quantizer.\n      m_i = 1 << self.integer\n      p = x * (2 ** (self.bits - self.integer))\n      factor = 2 ** -self.bits\n      return fast_relu_quantize(p, m_i, factor)\n\n    if not self.built:\n      self.build(var_name=self.var_name, use_variables=self.use_variables)\n\n    non_sign_bits = self.bits - (self.negative_slope != 0.0)\n    x = K.cast(x, dtype=\"float32\")\n    m = K.cast(K.pow(2, non_sign_bits), dtype=\"float32\")\n    m_i = K.cast(K.pow(2, self.integer), dtype=\"float32\")\n\n    # is_quantized_clip has precedence over relu_upper_bound for backward\n    # compatibility.\n    m_f = K.cast(\n        K.pow(\n            tf.constant(2., tf.float32),\n            K.cast(self.integer, dtype=\"float32\") - non_sign_bits),\n        dtype=\"float32\")\n    if self.is_quantized_clip:\n      x_u = tf.where(x <= m_i - m_f, K.relu(x, alpha=self.negative_slope),\n                     tf.ones_like(x) * (m_i - m_f))\n    elif self.relu_upper_bound is not None:\n      x_u = tf.where(x <= self.relu_upper_bound,\n                     K.relu(x, alpha=self.negative_slope),\n                     tf.ones_like(x) * self.relu_upper_bound)\n    else:\n      x_u = K.relu(x, alpha=self.negative_slope)\n\n    if self.use_sigmoid:\n      p = _sigmoid(x / m_i) * m\n      xq = m_i * tf.keras.backend.clip(\n          2.0 * (_round_through(p, self.use_stochastic_rounding) / m) - 1.0,\n          0.0, 1.0 - 1.0 / m)\n      if self.negative_slope > 0:\n        neg_factor = 1 / (self.negative_slope * m)\n        xq = xq + m_i * self.negative_slope * tf.keras.backend.clip(\n            2.0 * (_round_through(p * self.negative_slope,\n                                  self.use_stochastic_rounding) * neg_factor) -\n            1.0, -1.0, 0.0)\n    else:\n      p = x * m / m_i\n      xq = m_i * tf.keras.backend.clip(\n          _round_through(p, self.use_stochastic_rounding) / m, 0.0,\n          1.0 - 1.0 / m)\n      if self.negative_slope > 0:\n        neg_factor = 1 / (self.negative_slope * m)\n        xq = xq + m_i * self.negative_slope * (\n            tf.keras.backend.clip(\n                _round_through(p * self.negative_slope,\n                               self.use_stochastic_rounding) * neg_factor, -1.0,\n                0.0))\n\n    if self.relu_upper_bound and not self.is_quantized_clip:\n      xq = tf.where(xq <= self.relu_upper_bound, xq,\n                    tf.ones_like(xq) * self.relu_upper_bound)\n\n    if self.use_ste:\n      return x_u + tf.stop_gradient(self.qnoise_factor * (-x_u + xq))\n    else:\n      return (1 - self.qnoise_factor) * x_u + tf.stop_gradient(\n          self.qnoise_factor * xq)\n\n  def max(self):\n    \"\"\"Get the maximum value that quantized_relu can represent.\"\"\"\n    unsigned_bits = self.bits - (self.negative_slope != 0.0)\n\n    if unsigned_bits > 0:\n      return max(\n          1.0,\n          np.array(\n              K.pow(2.0, K.cast(self.integer, dtype=\"float32\")),\n              dtype=\"float32\"))\n    else:\n      return 1.0\n\n  def min(self):\n    \"\"\"Get the minimum value that quantized_relu can represent.\"\"\"\n    if self.negative_slope == 0.0:\n      return 0.0\n\n    unsigned_bits = self.bits - 1\n    if unsigned_bits > 0:\n      return min(\n          -0.0, -self.negative_slope * np.array(\n              K.pow(2.0, K.cast(self.integer, dtype=\"float32\")),\n              dtype=\"float32\"))\n    else:\n      return -1.0\n\n  def range(self):\n    \"\"\"Returns a list of all values that quantized_relu can represent\n\n      ordered by their binary representation ascending.\n    \"\"\"\n    assert self.use_sigmoid == 0  # current unsupported\n    assert self.negative_slope == 0  # # unsupported unsupported\n    x = np.asarray(range(2**self.bits))\n    return x * np.array(\n        K.pow(2.0, -self.bits + K.cast(self.integer, dtype=\"float32\")),\n        dtype=\"float32\")\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"bits\":\n            self.bits,\n        \"integer\":\n            self.integer.numpy() if isinstance(self.integer, tf.Variable) else\n            self.integer,\n        \"use_sigmoid\":\n            self.use_sigmoid,\n        \"negative_slope\":\n            self.negative_slope,\n        \"use_stochastic_rounding\":\n            self.use_stochastic_rounding,\n        \"relu_upper_bound\":\n            self.relu_upper_bound,\n        \"qnoise_factor\":\n            self.qnoise_factor.numpy() if isinstance(\n                self.qnoise_factor, tf.Variable) else self.qnoise_factor\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass quantized_ulaw(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes a u-law quantization.\n\n  Attributes:\n    bits: number of bits to perform quantization.\n    integer: number of bits to the left of the decimal point.\n    symmetric: if true, we will have the same number of values for positive\n      and negative numbers.\n    u: parameter of u-law\n\n  Returns:\n    Function that performs ulaw + quantization to bits in the range -1.0 to 1.0.\n  \"\"\"\n\n  def __init__(self, bits=8, integer=0, symmetric=0, u=255.0):\n    super().__init__()\n    self.bits = bits\n    self.integer = integer\n    self.symmetric = symmetric\n    self.u = u\n\n  def __str__(self):\n    flags = [str(self.bits), str(self.integer)]\n    if self.symmetric or self.u != 255.0:\n      flags.append(str(int(self.symmetric)))\n    if self.u != 255.0:\n      flags.append(str(self.u))\n    return \"quantized_ulaw(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    non_sign_bits = self.bits - 1\n    m = pow(2, non_sign_bits)\n    m_i = pow(2, self.integer)\n    p = _sigmoid(x / m_i) * m\n    rp = 2.0 * (_round_through(p) / m) - 1.0\n    u_law_p = tf.sign(rp) * tf.keras.backend.log(\n        1 + self.u * tf.abs(rp)) / tf.keras.backend.log(1 + self.u)\n    xq = m_i * tf.keras.backend.clip(u_law_p, -1.0 +\n                                     (1.0 * self.symmetric) / m, 1.0 - 1.0 / m)\n    return xq\n\n  def max(self):\n    \"\"\"Get the maximum value that quantized_ulaw can represent.\"\"\"\n    unsigned_bits = self.bits - 1\n\n    if unsigned_bits > 0:\n      return max(1.0, np.power(2.0, self.integer))\n    else:\n      return 1.0\n\n  def min(self):\n    \"\"\"Get the minimum value that quantized_ulaw can represent.\"\"\"\n    unsigned_bits = self.bits - 1\n\n    if unsigned_bits > 0:\n      return -max(1.0, np.power(2.0, self.integer))\n    else:\n      return -1.0\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"bits\": self.bits,\n        \"integer\": self.integer,\n        \"symmetric\": self.symmetric,\n        \"u\": self.u\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass quantized_tanh(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes a quantized tanh to a number of bits.\n\n  Modified from:\n\n  [https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow]\n\n  Attributes:\n    bits: number of bits to perform quantization.\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n    symmetric: if true, we will have the same number of values for positive\n      and negative numbers.\n    use_real_tanh: if true, use the tanh function from Keras backend,\n      if false, use tanh that is defined as 2 * sigmoid(x) - 1\n\n  Returns:\n    Function that performs tanh + quantization to bits in the range -1.0 to 1.0.\n  \"\"\"\n\n  def __init__(self, bits=8, use_stochastic_rounding=False,\n               symmetric=False, use_real_tanh=False):\n    super().__init__()\n    self.bits = bits\n    self.symmetric = symmetric\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.use_real_tanh = use_real_tanh\n\n  def __str__(self):\n    flags = [str(self.bits)]\n    if self.use_stochastic_rounding:\n      flags.append(str(int(self.use_stochastic_rounding)))\n    if self.symmetric:\n      flags.append(str(int(self.symmetric)))\n    if self.use_real_tanh:\n      flags.append(str(int(self.use_real_tanh)))\n    return \"quantized_tanh(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    non_sign_bits = self.bits - 1\n    x = K.cast_to_floatx(x)\n    m = K.cast_to_floatx(K.pow(2, non_sign_bits))\n    p = K.tanh(x) if self.use_real_tanh else 2.0 * _sigmoid(x) - 1.0\n    return tf.keras.backend.clip(\n                                 (_round_through(p * m, self.use_stochastic_rounding) / m),\n                                 -1.0 + (1.0 * self.symmetric) / m,\n                                 1.0 - 1.0 / m)\n\n  def max(self):\n    \"\"\"Get the maximum value that quantized_tanh can represent.\"\"\"\n    return 1.0 - 1.0 / pow(2, self.bits - 1)\n\n  def min(self):\n    \"\"\"Get the minimum value that quantized_tanh can represent.\"\"\"\n    return -1.0 + (1.0 * self.symmetric) / pow(2, self.bits - 1)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"bits\": self.bits,\n        \"symmetric\": self.symmetric,\n        \"use_stochastic_rounding\": self.use_stochastic_rounding,\n        \"use_real_tanh\": self.use_real_tanh\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass quantized_sigmoid(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Computes a quantized sigmoid to a number of bits.\n\n  Attributes:\n    bits: number of bits to perform quantization.\n    symmetric: if true, we will have the same number of values for positive\n      and negative numbers.\n    use_real_sigmoid: if true, will use the sigmoid from Keras backend\n    use_stochastic_rounding: if true, we perform stochastic rounding.\n\n  Returns:\n    Function that performs sigmoid + quantization to bits in the range 0.0 to 1.0.\n  \"\"\"\n\n  def __init__(self, bits=8, symmetric=False,\n               use_real_sigmoid=False,\n               use_stochastic_rounding=False):\n    super().__init__()\n    self.bits = bits\n    self.symmetric = symmetric\n    self.use_real_sigmoid = use_real_sigmoid\n    self.use_stochastic_rounding = use_stochastic_rounding\n\n  def __str__(self):\n    flags = [str(self.bits)]\n    if self.symmetric:\n      flags.append(str(int(self.symmetric)))\n    if self.use_real_sigmoid:\n      flags.append(str(int(self.use_real_sigmoid)))\n    if self.use_stochastic_rounding:\n      flags.append(str(int(self.use_stochastic_rounding)))\n    return \"quantized_sigmoid(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    x = K.cast_to_floatx(x)\n    m = K.cast_to_floatx(K.pow(2, self.bits))\n\n    p = K.sigmoid(x) if self.use_real_sigmoid else _sigmoid(x)\n\n    return tf.keras.backend.clip((_round_through(p*m, self.use_stochastic_rounding) / m),\n                                 (1.0 * self.symmetric) / m,\n                                 1.0 - 1.0 / m)\n\n  def max(self):\n    \"\"\"Get the maximum value that quantized_sigmoid can represent.\"\"\"\n    return 1.0 - 1.0 / pow(2, self.bits)\n\n  def min(self):\n    \"\"\"Get the minimum value that quantized_sigmoid can represent.\"\"\"\n    return (1.0 * self.symmetric) / pow(2, self.bits)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    config = {\n        \"bits\": self.bits,\n        \"symmetric\": self.symmetric,\n        \"use_real_sigmoid\": self.use_real_sigmoid,\n        \"use_stochastic_rounding\": self.use_stochastic_rounding\n    }\n    return config\n\n\ndef _clip_power_of_two(x_abs,\n                       min_exp,\n                       max_exp,\n                       max_value,\n                       quadratic_approximation=False,\n                       use_stochastic_rounding=False,\n                       log2_rounding=\"rnd\"):\n  \"\"\"Clips a tensor using power-of-two quantizer.\n\n\n  Args:\n    x_abs: A tensor object. Its elements should be non-negative.\n    min_exp: An integer representing the smallest exponent.\n    max_exp: An integer representing the largest exponent.\n    max_value: A float or None. If it is None, we clip the value to max_value.\n    quadratic_approximation: An boolean representing whether the quadratic\n      approximation is applied.\n    use_stochastic_rounding: An boolean representing whether the stochastic\n      rounding method is applied.\n    log2_rounding: log2 rounding mode. \"rnd\" and \"floor\" currently\n      supported, corresponding to tf.round and tf.floor respectively.\n\n  Returns:\n    A tensor object, the values are clipped by min_exp and max_exp.\n  \"\"\"\n\n  # if quadratic_approximation is True, round to the exponent for sqrt(x),\n  # so that the return value can be divided by two without remainder.\n  log2 = np.log(2.0)\n\n  # When the elements of x_abs are small than the keras epsilon,\n  # we just overwrite x_abs with eps\n  eps = tf.keras.backend.epsilon()\n  x_filter = tf.where(x_abs < eps, eps, x_abs)\n  if max_value is not None:\n    # If the elements of x_filter has value larger than x_value, clip it.\n    x_filter = tf.where(x_filter >= max_value,\n                        tf.ones_like(x_filter) * max_value, x_filter)\n\n  def power_of_two_clip(x_abs, min_exp, max_exp, quadratic_approximation,\n                        use_stochastic_rounding, log2_rounding):\n    assert log2_rounding in [\"rnd\", \"floor\"]\n\n    if quadratic_approximation:\n      q_factor = 2.0\n      x_input = tf.sqrt(x_abs)\n    else:\n      q_factor = 1.0\n      x_input = x_abs\n\n    if log2_rounding == \"floor\":\n      x_log2 = _floor_through(tf.keras.backend.log(x_input) / log2)\n    elif use_stochastic_rounding:\n      x_log2 = tf_utils.smart_cond(\n          K.learning_phase(),\n          lambda: stochastic_round_po2(x_input),\n          lambda: _round_through(tf.keras.backend.log(x_input) / log2))\n    else:\n      x_log2 = _round_through(tf.keras.backend.log(x_input) / log2)\n\n    x_clipped = q_factor * tf.keras.backend.clip(x_log2, min_exp, max_exp)\n    return x_clipped\n\n  x_clipped = tf.where(\n      x_abs < eps,\n      tf.ones_like(x_abs) * min_exp,\n      power_of_two_clip(x_filter, min_exp, max_exp, quadratic_approximation,\n                        use_stochastic_rounding, log2_rounding))\n\n  return x_clipped\n\n\ndef _need_exponent_sign_bit_check(max_value):\n  \"\"\"Checks whether the sign bit of exponent is needed.\n\n  This is used by quantized_po2 and quantized_relu_po2.\n\n  Args:\n    max_value: the maximum value allowed.\n\n  Returns:\n    An integer. 1: sign_bit is needed. 0: sign_bit is not needed.\n  \"\"\"\n\n  if max_value is not None:\n    if max_value < 0:\n      raise ValueError(\"po2 max_value should be non-negative.\")\n    if max_value > 1:\n      # if max_value is larger than 1,\n      #   the exponent could be positive and negative.\n      #   e.g., log(max_value) > 0 when max_value > 1\n      need_exponent_sign_bit = 1\n    else:\n      need_exponent_sign_bit = 0\n  else:\n    # max_value is not specified, so we cannot decide the range.\n    # Then we need to put sign_bit for exponent to be safe\n    need_exponent_sign_bit = 1\n  return need_exponent_sign_bit\n\n\ndef _get_min_max_exponents(non_sign_bits, need_exponent_sign_bit,\n                           quadratic_approximation):\n  \"\"\"Given a bitwidth, gets min and max exponents that it can represent.\n\n  Args:\n    non_sign_bits: An integer representing the bitwidth of the exponent.\n    need_exponent_sign_bit: An integer representing whether it needs sign bit\n      in exponent. (1: need sign bit. 0: sign bit is not needed.)\n    quadratic_approximation: A boolean representing whether the quadratic\n      approximiation method is enforced.\n\n  Returns:\n    A tuple of integers: min_exp, max_exp\n  \"\"\"\n  effect_bits = non_sign_bits - need_exponent_sign_bit\n  min_exp = -2**(effect_bits)\n  max_exp = 2**(effect_bits) - 1\n  if quadratic_approximation:\n    max_exp = 2 * (max_exp // 2)\n  return min_exp, max_exp\n\n\n@quantizer_registry.register_quantizer\nclass quantized_po2(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Quantizes to the closest power of 2.\n\n  Attributes:\n    bits: An integer, the bits allocated for the exponent, its sign and the sign\n      of x.\n    max_value: An float or None. If None, no max_value is specified.\n      Otherwise, the maximum value of quantized_po2 <= max_value\n    use_stochastic_rounding: A boolean, default is False, if True, it uses\n      stochastic rounding and forces the mean of x to be x statstically.\n    quadratic_approximation: A boolean, default is False if True, it forces the\n      exponent to be even number that closted to x.\n    log2_rounding: A string, log2 rounding mode. \"rnd\" and \"floor\" currently\n      supported, corresponding to tf.round and tf.floor respectively.\n    qnoise_factor: float. a scalar from 0 to 1 that represents the level of\n      quantization noise to add. This controls the amount of the quantization\n      noise to add to the outputs by changing the weighted sum of\n      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.\n    var_name: String or None. A variable name shared between the tf.Variables\n      created in the build function. If None, it is generated automatically.\n    use_ste: Bool. Whether to use \"straight-through estimator\" (STE) method or\n        not.\n    use_variables: Bool. Whether to make the quantizer variables to be dynamic\n      tf.Variables or not.\n  \"\"\"\n\n  def __init__(self,\n               bits=8,\n               max_value=None,\n               use_stochastic_rounding=False,\n               quadratic_approximation=False,\n               log2_rounding=\"rnd\",\n               qnoise_factor=1.0,\n               var_name=None,\n               use_ste=True,\n               use_variables=False):\n    super().__init__()\n    self.bits = bits\n    self.max_value = max_value\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.log2_rounding = log2_rounding\n    # if True, round to the exponent for sqrt(x),\n    # so that the return value can be divided by two without remainder.\n    self.quadratic_approximation = quadratic_approximation\n    need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value)\n    non_sign_bits = self.bits - 1\n    self._min_exp, self._max_exp = _get_min_max_exponents(\n        non_sign_bits, need_exponent_sign_bit, self.quadratic_approximation)\n    # qnoise_factor related attributes\n    self.qnoise_factor = qnoise_factor\n    self.use_ste = use_ste\n    self.var_name = var_name\n    self.use_variables = use_variables\n\n  def __str__(self):\n    flags = [str(self.bits)]\n    if self.max_value is not None or self.use_stochastic_rounding:\n      flags.append(str(int(self.max_value)))\n    if self.use_stochastic_rounding:\n      flags.append(str(int(self.use_stochastic_rounding)))\n    if self.quadratic_approximation:\n      flags.append(\n          \"quadratic_approximation=\" + str(int(self.quadratic_approximation)))\n    return \"quantized_po2(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    if not self.built:\n      self.build(var_name=self.var_name, use_variables=self.use_variables)\n\n    x_sign = tf.sign(x)\n    x_sign += (1.0 - tf.abs(x_sign))\n    x_abs = tf.abs(x)\n    x_clipped = _clip_power_of_two(x_abs, self._min_exp, self._max_exp,\n                                   self.max_value,\n                                   self.quadratic_approximation,\n                                   self.use_stochastic_rounding,\n                                   self.log2_rounding)\n    xq = x_sign * pow(2.0, x_clipped)\n\n    if self.use_ste:\n      return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))\n    else:\n      return (1 - self.qnoise_factor) * x + tf.stop_gradient(\n          self.qnoise_factor * xq)\n\n  def max(self):\n    \"\"\"Get the maximum value that quantized_po2 can represent.\"\"\"\n    if self.max_value:\n      return max(1.0, self.max_value)\n    else:\n      return max(1.0, 2**self._max_exp)\n\n  def min(self):\n    \"\"\"Get the minimum value that quantized_po2 can represent.\"\"\"\n    if self.max_value:\n      return -max(1.0, self.max_value)\n    else:\n      return -max(1.0, 2**self._max_exp)\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    \"\"\"Gets configugration of the quantizer.\n\n    Returns:\n      A dict mapping quantization configuration, including\n        bits: bitwidth for exponents.\n        max_value: the maximum value of this quantized_po2 can represent.\n        use_stochastic_rounding:\n          if True, stochastic rounding is used.\n        quadratic_approximation:\n          if True, the exponent is enforced to be even number, which is\n          the closest one to x.\n        log2_rounding:\n          A string, Log2 rounding mode\n    \"\"\"\n    config = {\n        \"bits\":\n            self.bits,\n        \"max_value\":\n            self.max_value,\n        \"use_stochastic_rounding\":\n            self.use_stochastic_rounding,\n        \"quadratic_approximation\":\n            self.quadratic_approximation,\n        \"qnoise_factor\":\n            self.qnoise_factor.numpy() if isinstance(\n                self.qnoise_factor, tf.Variable) else self.qnoise_factor,\n        \"log2_rounding\":\n            self.log2_rounding\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass quantized_relu_po2(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name\n  \"\"\"Quantizes x to the closest power of 2 when x > 0\n\n  Attributes:\n    bits: An integer, the bits allocated for the exponent and its sign.\n    max_value: default is None, or a non-negative value to put a constraint for\n      the max value.\n    negative_slope: slope when activation < 0, needs to be power of 2.\n    use_stochastic_rounding: A boolean, default is False, if True, it uses\n      stochastic rounding and forces the mean of x to be x statstically.\n    quadratic_approximation: A boolean, default is False if True, it forces the\n      exponent to be even number that is closest to x.\n    log2_rounding: A string, log2 rounding mode. \"rnd\" and \"floor\" currently\n      supported, corresponding to tf.round and tf.floor respectively.\n    qnoise_factor: float. a scalar from 0 to 1 that represents the level of\n      quantization noise to add. This controls the amount of the quantization\n      noise to add to the outputs by changing the weighted sum of\n      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.\n    var_name: String or None. A variable name shared between the tf.Variables\n      created in the build function. If None, it is generated automatically.\n    use_ste: Bool. Whether to use \"straight-through estimator\" (STE) method or\n        not.\n    use_variables: Bool. Whether to make the quantizer variables to be dynamic\n      tf.Variables or not.\n  \"\"\"\n\n  def __init__(self,\n               bits=8,\n               max_value=None,\n               negative_slope=0,\n               use_stochastic_rounding=False,\n               quadratic_approximation=False,\n               log2_rounding=\"rnd\",\n               qnoise_factor=1.0,\n               var_name=None,\n               use_ste=True,\n               use_variables=False):\n    super().__init__()\n    self.bits = bits\n    self.max_value = max_value\n    self.negative_slope = negative_slope\n    self.use_stochastic_rounding = use_stochastic_rounding\n    self.log2_rounding = log2_rounding\n    # if True, round to the exponent for sqrt(x),\n    # so that the return value can be divided by two without remainder.\n    self.quadratic_approximation = quadratic_approximation\n    need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value)\n    self._min_exp = -2**(self.bits - need_exponent_sign_bit)\n    self._max_exp = 2**(self.bits - need_exponent_sign_bit) - 1\n    if self.quadratic_approximation:\n      self._max_exp = 2 * (self._max_exp // 2)\n\n    assert negative_slope >= 0.0\n    if negative_slope != 0:\n      assert np.mod(np.log2(negative_slope), 1) == 0\n    # qnoise_factor related attributes\n    self.qnoise_factor = qnoise_factor\n    self.use_ste = use_ste\n    self.var_name = var_name\n    self.use_variables = use_variables\n\n  def __str__(self):\n    flags = [str(self.bits)]\n    if self.max_value is not None or self.use_stochastic_rounding:\n      flags.append(str(int(self.max_value)))\n    if self.negative_slope:\n      flags.append(str(self.negative_slope))\n    if self.use_stochastic_rounding:\n      flags.append(str(int(self.use_stochastic_rounding)))\n    if self.quadratic_approximation:\n      flags.append(\n          \"quadratic_approximation=\" + str(int(self.quadratic_approximation)))\n    return \"quantized_relu_po2(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    if not self.built:\n      self.build(var_name=self.var_name, use_variables=self.use_variables)\n\n    x_original = x\n\n    if self.max_value is None:\n      x = K.relu(x, self.negative_slope)\n    else:\n      x = tf.where(\n          x <= self.max_value,\n          K.relu(x, self.negative_slope),\n          tf.ones_like(x) * self.max_value)\n\n    x_pos_clipped = _clip_power_of_two(\n        K.relu(x_original),\n        self._min_exp, self._max_exp,\n        self.max_value,\n        self.quadratic_approximation,\n        self.use_stochastic_rounding,\n        self.log2_rounding)\n\n    x_neg_clipped = _clip_power_of_two(\n        K.relu(-x_original) * self.negative_slope,\n        self._min_exp, self._max_exp,\n        self.max_value,\n        self.quadratic_approximation,\n        self.use_stochastic_rounding,\n        self.log2_rounding)\n\n    xq = tf.where(\n        tf.logical_or(x_original >= 0.0, self.negative_slope == 0.0),\n        pow(2.0, x_pos_clipped), -pow(2.0, x_neg_clipped))\n\n    if self.use_ste:\n      return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))\n    else:\n      return (1 - self.qnoise_factor) * x + tf.stop_gradient(\n          self.qnoise_factor * xq)\n\n  def max(self):\n    \"\"\"Get the maximum value that quantized_relu_po2 can represent.\"\"\"\n    if self.max_value:\n      return max(1.0, self.max_value)\n    else:\n      return max(1.0, 2**self._max_exp)\n\n  def min(self):\n    \"\"\"Get the minimum value that quantized_relu_po2 can represent.\"\"\"\n    if self.negative_slope == 0.0:\n      return 2**self._min_exp\n\n    unsigned_bits = self.bits - 1\n    if unsigned_bits > 0:\n      return min(2**self._min_exp, - self.negative_slope * np.power(2.0, unsigned_bits))\n    else:\n      return 2**self._min_exp\n\n  @classmethod\n  def from_config(cls, config):\n    return cls(**config)\n\n  def get_config(self):\n    \"\"\"Gets configugration of the quantizer.\n\n    Returns:\n      A dict mapping quantization configuration, including\n        bits: bitwidth for exponents.\n        max_value: the maximum value of this quantized_relu_po2 can represent.\n        use_stochastic_rounding:\n          if True, stochastic rounding is used.\n        quadratic_approximation:\n          if True, the exponent is enforced to be even number, which is\n          the closest one to x.\n        log2_rounding:\n          A string, Log2 rounding mode\n\n    \"\"\"\n\n    config = {\n        \"bits\":\n            self.bits,\n        \"max_value\":\n            self.max_value,\n        \"negative_slope\":\n            self.negative_slope,\n        \"use_stochastic_rounding\":\n            self.use_stochastic_rounding,\n        \"quadratic_approximation\":\n            self.quadratic_approximation,\n        \"qnoise_factor\":\n            self.qnoise_factor.numpy() if isinstance(\n                self.qnoise_factor, tf.Variable) else self.qnoise_factor,\n        \"log2_rounding\":\n            self.log2_rounding\n    }\n    return config\n\n\n@quantizer_registry.register_quantizer\nclass quantized_hswish(quantized_bits):  # pylint: disable=invalid-name\n  \"\"\"Computes a quantized hard swish to a number of bits.\n\n  # TODO(mschoenb97): Update to inherit from quantized_linear.\n\n  Equation of h-swisth function in mobilenet v3:\n  hswish(x) = x * ReluY(x + relu_shift) / Y\n  Y is relu_upper_bound\n\n  Attributes:\n    bits: number of bits to perform quantization, also known as word length.\n    integer: number of integer bits.\n    symmetric: if True,  the quantization is in symmetric mode, which puts\n      restricted range for the quantizer. Otherwise, it is in asymmetric mode,\n      which uses the full range.\n    alpha: a tensor or None, the scaling factor per channel. If None, the\n      scaling factor is 1 for all channels.\n    use_stochastic_rounding: if true, we perform stochastic rounding. This\n      parameter is passed on to the underlying quantizer quantized_bits which is\n      used to quantize h_swish.\n    scale_axis: which axis to calculate scale from\n    qnoise_factor: float. a scalar from 0 to 1 that represents the level of\n      quantization noise to add. This controls the amount of the quantization\n      noise to add to the outputs by changing the weighted sum of (1 -\n      qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.\n    var_name: String or None. A variable name shared between the tf.Variables\n      created in the build function. If None, it is generated automatically.\n    use_ste: Bool. Whether to use \"straight-through estimator\" (STE) method or\n      not.\n    use_variables: Bool. Whether to make the quantizer variables to be dynamic\n      tf.Variables or not.\n    relu_shift: integer type, representing the shift amount of the unquantized\n      relu.\n    relu_upper_bound: integer type, representing an upper bound of the\n      unquantized relu. If None, we apply relu without the upper bound when\n      \"is_quantized_clip\" is set to false (true by default).\n      Note: The quantized relu uses the quantization parameters (bits and\n        integer) to upper bound. So it is important to set relu_upper_bound\n        appropriately to the quantization parameters. \"is_quantized_clip\" has\n        precedence over \"relu_upper_bound\" for backward compatibility.\n  \"\"\"\n\n  def __init__(\n      self,\n      bits=8,\n      integer=0,\n      symmetric=0,\n      alpha=None,\n      use_stochastic_rounding=False,\n      scale_axis=None,\n      qnoise_factor=1.0,\n      var_name=None,\n      use_variables=False,\n      relu_shift: int = 3,\n      relu_upper_bound: int = 6,\n  ):\n    super().__init__(\n        bits=bits,\n        integer=integer,\n        symmetric=symmetric,\n        keep_negative=True,\n        alpha=alpha,\n        use_stochastic_rounding=use_stochastic_rounding,\n        scale_axis=scale_axis,\n        qnoise_factor=qnoise_factor,\n        var_name=var_name,\n        use_variables=use_variables,\n    )\n\n    self.relu_shift = relu_shift\n    self.relu_upper_bound = relu_upper_bound\n\n  def __str__(self):\n    \"\"\"Converts Tensors to printable strings.\"\"\"\n\n    integer_bits = re.sub(\n        r\"\\[(\\d)\\]\",\n        r\"\\g<1>\",\n        str(\n            self.integer.numpy()\n            if isinstance(self.integer, tf.Variable)\n            else self.integer\n        ),\n    )\n    assert isinstance(integer_bits, int)\n\n    flags = [\n        str(self.bits),\n        integer_bits,\n        str(int(self.symmetric)),\n        \"relu_shift=\" + str(self.relu_shift),\n        \"relu_upper_bound=\" + str(self.relu_upper_bound),\n    ]\n\n    if not self.keep_negative:\n      flags.append(\"keep_negative=False\")\n    if self.alpha:\n      alpha = str(self.alpha)\n      if isinstance(self.alpha, six.string_types):\n        alpha = \"'\" + alpha + \"'\"\n      flags.append(\"alpha=\" + alpha)\n    if self.use_stochastic_rounding:\n      flags.append(\n          \"use_stochastic_rounding=\" + str(int(self.use_stochastic_rounding))\n      )\n    return \"quantized_hswish(\" + \",\".join(flags) + \")\"\n\n  def __call__(self, x):\n    assert self.relu_upper_bound > 0, (\n        \"relu_upper_bound must be a positive value, \"\n        f\"found {self.relu_upper_bound} instead\"\n    )\n    assert (\n        self.relu_shift > 0\n    ), f\"relu_shift must be a positive value, found {self.relu_shift} instead\"\n    x = K.cast_to_floatx(x)\n    shift_x = x + self.relu_shift\n    relu_x = tf.where(\n        shift_x <= self.relu_upper_bound,\n        K.relu(shift_x, alpha=False),\n        tf.ones_like(shift_x) * self.relu_upper_bound,\n    )\n\n    hswish_x = tf.math.multiply(x, relu_x) / self.relu_upper_bound\n    return super(quantized_hswish, self).__call__(hswish_x)\n\n  def min(self):\n    \"\"\"Gets the minimum value that quantized_hswish can represent.\"\"\"\n\n    # get the minimum value that the number of bits can represent\n    min_quant = super(quantized_hswish, self).min()\n    # In the negative end, the hswish function becomes\n    # x * (x + relu_shift) / relu_upper_bound\n    # the min value of this parabolic function is\n    # - relu_shift^2 / (4 * relu_upper_bound)\n    denom = 4 * self.relu_upper_bound\n    min_parabolic = -self.relu_shift * self.relu_shift / denom\n\n    if min_quant >= min_parabolic:\n      return min_quant\n\n    # get the quantized value of min_parabolic\n    return super(quantized_hswish, self).call(min_parabolic)\n\n  def get_config(self):\n    \"\"\"Add relu_shift and relu_upper_bound to the config file.\"\"\"\n\n    base_config = super(quantized_hswish, self).get_config()\n\n    config = {\n        \"relu_shift\": self.relu_shift,\n        \"relu_upper_bound\": self.relu_upper_bound,\n    }\n\n    out_config = dict(list(base_config.items()) + list(config.items()))\n    return out_config\n\n\n# TODO(akshayap): Update to use registry for quantizers instead of globals().\ndef get_quantizer(identifier):\n  \"\"\"Gets the quantizer.\n\n  Args:\n    identifier: An quantizer, which could be dict, string, or callable function.\n\n  Returns:\n    A quantizer class or quantization function from this file. For example,\n      Quantizer classes: quantized_bits, quantized_po2, quantized_relu_po2,\n      binary, stochastic_binary, ternary, stochastic_ternary, etc.\n\n      Quantization functions: binary_sigmoid, hard_sigmoid, soft_sigmoid, etc.\n\n  Raises:\n    ValueError: An error occurred when quantizer cannot be interpreted.\n  \"\"\"\n\n  if identifier is None:\n    return None\n  if isinstance(identifier, dict):\n    return deserialize_keras_object(\n        identifier, module_objects=globals(), printable_module_name=\"quantizer\")\n  elif isinstance(identifier, six.string_types):\n    return safe_eval(identifier, globals())\n  elif callable(identifier):\n    return identifier\n  else:\n    raise ValueError(\"Could not interpret quantizer identifier: \" +\n                     str(identifier))\n\n\ndef get_quantized_initializer(w_initializer, w_range):\n  \"\"\"Gets the initializer and scales it by the range.\"\"\"\n\n  if isinstance(w_initializer, six.string_types):\n\n    if w_initializer == \"he_normal\":\n      return initializers.VarianceScaling(\n          scale=2 * w_range, mode=\"fan_in\", distribution=\"normal\", seed=None)\n    if w_initializer == \"he_uniform\":\n      return initializers.VarianceScaling(\n          scale=2 * w_range, mode=\"fan_in\", distribution=\"uniform\", seed=None)\n    elif w_initializer == \"glorot_normal\":\n      return initializers.VarianceScaling(\n          scale=w_range, mode=\"fan_avg\", distribution=\"normal\", seed=None)\n    elif w_initializer == \"glorot_uniform\":\n      return initializers.VarianceScaling(\n          scale=w_range, mode=\"fan_avg\", distribution=\"uniform\", seed=None)\n    elif w_initializer == \"random_uniform\":\n      return initializers.RandomUniform(-w_range, w_range)\n\n  return w_initializer\n"
  },
  {
    "path": "qkeras/registry.py",
    "content": "# Copyright 2024 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"General purpose registy for registering classes or functions.\n\nThe registry can be used along with decorators to record any class/function.\n\nSample usage:\n  # Setup registry with decorator.\n  _REGISTRY = registry.Registry()\n  def register(cls):\n    _REGISTRY.register(cls)\n  def lookup(name):\n    return _REGISTRY.lookup(name)\n\n  # Register instances.\n  @register\n  def foo_task():\n    ...\n\n  @register\n  def bar_task():\n    ...\n\n  # Retrieve instances.\n  def my_executor():\n   ...\n   my_task = lookup(\"foo_task\")\n   ...\n\"\"\"\n\n\nclass Registry(object):\n  \"\"\"A registry class to record class representations or function objects.\"\"\"\n\n  def __init__(self):\n    \"\"\"Initializes the registry.\"\"\"\n    self._container = {}\n\n  def register(self, item, name=None):\n    \"\"\"Register an item.\n\n    Args:\n     item: Python item to be recorded.\n     name: Optional name to be used for recording item. If not provided,\n       item.__name__ is used.\n    \"\"\"\n    if not name:\n      name = item.__name__\n    self._container[name] = item\n\n  def lookup(self, name):\n    \"\"\"Retrieves an item from the registry.\n\n    Args:\n      name: Name of the item to lookup.\n\n    Returns:\n      Registered item from the registry.\n    \"\"\"\n    return self._container[name]\n"
  },
  {
    "path": "qkeras/safe_eval.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements a safe evaluation using globals().\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom pyparsing import delimitedList\nfrom pyparsing import Group\nfrom pyparsing import Optional\nfrom pyparsing import Regex\nfrom pyparsing import Suppress\n\nimport logging\nfrom tensorflow import keras\n\n\ndef Num(s):\n  \"\"\"Tries to convert string to either int or float.\"\"\"\n  try:\n    try:\n      return int(s)\n    except ValueError:\n      return float(s)\n  except ValueError:\n    # this should be always true. if it isn't int or float, it should be str\n    assert (\n        (s[0] == '\"' and s[-1] == '\"') or\n        (s[0] == \"'\" and s[-1] == \"'\")\n    )\n    s = s[1:-1]\n    return s\n\ndef Str(s):\n  return s[1:-1]\n\ndef IsNum(s):\n  try:\n    try:\n      int(s)\n      return True\n    except ValueError:\n      float(s)\n      return True\n  except ValueError:\n    return False\n\ndef IsBool(s):\n  if s in [\"True\", \"False\"]:\n    return True\n  else:\n    return False\n\ndef IsNone(s):\n  return s == \"None\"\n\ndef Bool(s):\n  return True if \"True\" in s else False\n\ndef ListofNums(s):\n  # remove list brackets\n  s = s.replace(\"[\", \"\").replace(\"]\", \"\")\n  list_s = s.split(\" \")\n  return [Num(e) for e in list_s]\n\ndef IsListofNums(s):\n  # remove list brackets\n  s = s.replace(\"[\", \"\").replace(\"]\", \"\")\n  list_s = s.split(\" \")\n  if len(list_s) > 1:\n    for e in list_s:\n      # if any of the elements is not a number return false\n      if not IsNum(e):\n        return False\n    return True\n  else:\n    return False\n\ndef GetArg(s):\n  if IsBool(s):\n    return Bool(s)\n  elif IsNum(s):\n    return Num(s)\n  elif IsNone(s):\n    return None\n  elif IsListofNums(s):\n    return ListofNums(s)\n  else:\n    return Str(s)\n\n\ndef GetParams(s):\n  \"\"\"Extracts args and kwargs from string.\"\"\"\n  # modified from https://stackoverflow.com/questions/38799223/parse-string-to-identify-kwargs-and-args  # pylint: disable=line-too-long\n\n  _lparen = Suppress(\"(\")  # pylint: disable=invalid-name\n  _rparen = Suppress(\")\")  # pylint: disable=invalid-name\n  _eq = Suppress(\"=\")  # pylint: disable=invalid-name\n\n  data = (_lparen + Optional(\n      delimitedList(\n          Group(Regex(r\"[^=,)\\s]+\") + Optional(_eq + Regex(u\"[^,)]*\")))\n          )\n      ) + _rparen)\n\n  items = data.parseString(s).asList()\n\n  # need to make sure that kwargs only happen after args are processed\n  args = [GetArg(i[0]) for i in items if len(i) == 1]\n  kwargs = {i[0]: GetArg(i[1]) for i in items if len(i) == 2}\n\n  # check for syntax error\n  for i in range(1, len(items)):\n    if (len(items[i]) == 1) and (len(items[i-1]) == 2):\n      raise SyntaxError((\"Error with item \" + str(i) + \" \\n\" +\n                         \"  parsing string \" + s + \"\\n\" +\n                         \"  Items: \" + str(items) + \"\\n\" +\n                         \"  Item[\" + str(i-1) +\"] :\" + str(items[i-1]) + \"\\n\" +\n                         \"  Item[\" + str(i) +\"] :\" + str(items[i]) ))\n\n  return args, kwargs\n\n\ndef safe_eval(eval_str, op_dict, *params, **kwparams):  # pylint: disable=invalid-name\n  \"\"\"Replaces eval by a safe eval mechanism.\"\"\"\n\n  function_split = eval_str.split(\"(\")\n  quantizer = op_dict.get(function_split[0], None)\n\n  if len(function_split) == 2:\n    args, kwargs = GetParams(\"(\" + function_split[1])\n  else:\n    args = []\n    kwargs = {}\n\n  args = args + list(params)\n  for k in kwparams:\n    kwargs[k] = kwparams[k]\n\n  # must be Keras activation object if None\n  if quantizer is None:\n    logging.info(\"keras dict %s\", function_split[0])\n    quantizer = keras.activations.get(function_split[0])\n\n  if len(function_split) == 2 or args or kwargs:\n    return quantizer(*args, **kwargs)\n  else:\n    if isinstance(quantizer, type):\n      # Check if quantizer is a class\n      return quantizer()\n    else:\n      # Otherwise it is a function, so just return it\n      return quantizer\n"
  },
  {
    "path": "qkeras/utils.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nimport copy\nimport json\nimport tempfile\nimport types\n\nimport numpy as np\nimport os\nimport six\nimport re\nimport networkx as nx\nimport tensorflow as tf\nimport tensorflow.keras.backend as K\n\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.models import model_from_json\n\nfrom tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper\nfrom tensorflow_model_optimization.python.core.sparsity.keras import prune_registry\nfrom tensorflow_model_optimization.python.core.sparsity.keras import prunable_layer\n\nfrom .qlayers import Clip\nfrom .qconv2d_batchnorm import QConv2DBatchnorm\nfrom .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm\nfrom .qlayers import QActivation\nfrom .qlayers import QAdaptiveActivation\nfrom .qpooling import QAveragePooling2D\nfrom .qlayers import QDense\nfrom .qlayers import QInitializer\nfrom .qconvolutional import QConv1D\nfrom .qconvolutional import QConv2D\nfrom .qconvolutional import QConv2DTranspose\nfrom .qrecurrent import QSimpleRNN\nfrom .qrecurrent import QSimpleRNNCell\nfrom .qrecurrent import QLSTM\nfrom .qrecurrent import QLSTMCell\nfrom .qrecurrent import QGRU\nfrom .qrecurrent import QGRUCell\nfrom .qrecurrent import QBidirectional\nfrom .qconvolutional import QSeparableConv1D\nfrom .qconvolutional import QSeparableConv2D\nfrom .qconvolutional import QDepthwiseConv2D\nfrom .qnormalization import QBatchNormalization\nfrom .qpooling import QGlobalAveragePooling2D\nfrom .qtools import qgraph\nfrom .quantizers import binary\nfrom .quantizers import bernoulli\nfrom .quantizers import get_weight_scale\nfrom .quantizers import quantized_bits\nfrom .quantizers import quantized_relu\nfrom .quantizers import quantized_ulaw\nfrom .quantizers import quantized_tanh\nfrom .quantizers import quantized_sigmoid\nfrom .quantizers import quantized_po2\nfrom .quantizers import quantized_relu_po2\nfrom .quantizers import stochastic_binary\nfrom .quantizers import stochastic_ternary\nfrom .quantizers import ternary\n# from .google_internals.experimental_quantizers import quantized_bits_learnable_scale\n# from .google_internals.experimental_quantizers import parametric_quantizer_d_xmax\nfrom .safe_eval import safe_eval\nfrom tensorflow.python.ops import math_ops\nfrom .qmac import QScaleShift\n\n\nREGISTERED_LAYERS = [\n    \"QActivation\",\n    \"QAdaptiveActivation\",\n    \"QDense\",\n    \"QConv1D\",\n    \"QConv2D\",\n    \"QSeparableConv1D\",\n    \"QSeparableConv2D\",\n    \"QDepthwiseConv2D\",\n    \"QConv2DTranspose\",\n    \"QSimpleRNN\",\n    \"QLSTM\",\n    \"QGRU\",\n    \"QBidirectional\",\n    \"QBatchNormalization\",\n    \"QConv2DBatchnorm\",\n    \"QDepthwiseConv2DBatchnorm\",\n    \"QAveragePooling2D\",\n    \"QGlobalAveragePooling2D\",\n]\n\n\ndef find_bn_fusing_layer_pair(model, custom_objects={}):\n  \"\"\"Finds layers that can be fused with the following batchnorm layers.\n\n  Args:\n    model: input model\n    custom_objects: Dict of model specific objects needed for cloning.\n\n  Returns:\n    Dict that marks all the layer pairs that need to be fused.\n\n  Note: supports sequential and non-sequential model\n  \"\"\"\n\n  fold_model = clone_model(model, custom_objects)\n  (graph, _) = qgraph.GenerateGraphFromModel(\n      fold_model, \"quantized_bits(8, 0, 1)\", \"quantized_bits(8, 0, 1)\")\n\n  qgraph.GraphAddSingleSourceSingleSink(graph)\n  qgraph.GraphRemoveNodeWithNodeType(graph, \"InputLayer\")\n  qgraph.GraphPropagateActivationsToEdges(graph)\n\n  # Finds the Batchnorm nodes and mark them.\n  layers_followed_by_bn = {}\n  bn_layers_to_skip = set()\n  for node_id in nx.topological_sort(graph):\n    node = graph.nodes[node_id]\n    layer = node[\"layer\"][0]\n    if layer:\n      successor_ids = list(graph.successors(node_id))\n      is_single = len(successor_ids) == 1\n      successor_layer = graph.nodes[successor_ids[0]][\"layer\"][0]\n      followed_by_bn = (successor_layer.__class__.__name__ ==\n                        \"QBatchNormalization\")\n      # TODO(lishanok): extend to QDense types\n      enable_bn_fusing = layer.__class__.__name__ in [\n          \"QConv2D\", \"QDepthwiseConv2D\"\n      ] and is_single and followed_by_bn\n\n      if enable_bn_fusing:\n        layers_followed_by_bn[layer.name] = successor_layer.name\n        bn_layers_to_skip.add(successor_layer.name)\n\n  return (layers_followed_by_bn, bn_layers_to_skip)\n\n\ndef add_bn_fusing_weights(prev_layer, bn_layer, saved_weights):\n  \"\"\"Adds additional fusing weights to saved_weights.\n\n  In hardware inference, we need to combined fuse previous layer's output with\n  the following batchnorm op.\n  z[i] = bn(y[i]) = inv[i] * y'[i] * scale[i] - bias'[i] is the final output\n  of the previous layer and bn layer, with:\n    inv[i] = gamma[i]* rsqrt(variance[i]^2+epsilon) is computed from the\n      bn layer weights\n    y'[i] is the i-th channel output from the previous layer (before scale)\n    scale[i] is the i-th channel kernel quantizer scale\n    fused_bias[i] = inv[i] * bias[i] + beta[i] - inv[i]*mean[i] where bias is\n      the bias term from the previous layer, beta and mean are the bn\n      layer weights.\n\n  Args:\n    prev_layer: QKeras layer, could be QConv2D/QDepthwiseConv2D/QDense.\n    bn_layer: The following QBatchNormalization layer that needs to be\n      fused with the previous layer.\n    saved_weights: Dict. The centralized weights dictionary that exports\n      relevant weights and parameters for hardware inference.\n  \"\"\"\n  bn_qs = bn_layer.quantizers\n  bn_ws = bn_layer.get_weights()\n\n  if bn_qs[4] is not None:\n    assert bn_qs[0] is None and bn_qs[3] is None, (\n        \"If using the inverse quantizer, the gamma and variance quantizers \"\n        \"should not be used in order to avoid quantizing a value twice.\")\n\n  def apply_quantizer(quantizer, input_weight):\n    if quantizer:\n      weight = tf.constant(input_weight)\n      weight = tf.keras.backend.eval(quantizer(weight))\n    else:\n      weight = input_weight\n    return weight\n\n  # Quantize respective bn layer weights\n  gamma = 1.0\n  beta = 0\n  idx = 0\n  if bn_layer.scale:\n    gamma = apply_quantizer(bn_layer.gamma_quantizer_internal, bn_ws[idx])\n    idx += 1\n  if bn_layer.center:\n    beta = apply_quantizer(bn_layer.beta_quantizer_internal, bn_ws[idx])\n    idx += 1\n  mean = apply_quantizer(bn_layer.mean_quantizer_internal, bn_ws[idx])\n  idx += 1\n  variance = apply_quantizer(bn_layer.variance_quantizer_internal, bn_ws[idx])\n\n  # Compute inv[i]\n  inv = gamma * math_ops.rsqrt(variance + bn_layer.epsilon)\n  inv = inv.numpy()\n  if bn_layer.inverse_quantizer_internal is not None:\n    quantizer = bn_layer.inverse_quantizer_internal\n    inv = tf.keras.backend.eval(quantizer(inv))\n\n  # Compute fused_bias[i]\n  if prev_layer.use_bias:\n    cur_weights = prev_layer.get_weights()\n    assert len(cur_weights) == 2, (\"Weights should have length of 2. Found\"\n                                   f\"{len(cur_weights)} instead.\")\n    prev_bias = cur_weights[-1]\n  else:\n    prev_bias = 0\n  b_prime = inv * prev_bias + beta - inv * mean\n\n  saved_weights[prev_layer.name][\"enable_bn_fusing\"] = True\n  saved_weights[prev_layer.name][\"fused_bn_layer_name\"] = bn_layer.name\n  saved_weights[prev_layer.name][\"bn_inv\"] = inv\n  saved_weights[prev_layer.name][\"fused_bias\"] = b_prime\n\n\n# Model utilities: before saving the weights, we want to apply the quantizers\ndef model_save_quantized_weights(model, filename=None, custom_objects={}):\n  \"\"\"Quantizes model for inference and save it.\n\n  Takes a model with weights, apply quantization function to weights and\n  returns a dictionary with quantized weights.\n\n  User should be aware that \"po2\" quantization functions cannot really\n  be quantized in meaningful way in Keras. So, in order to preserve\n  compatibility with inference flow in Keras, we do not covert \"po2\"\n  weights and biases to exponents + signs (in case of quantize_po2), but\n  return instead (-1)**sign*(2**round(log2(x))). In the returned dictionary,\n  we will return the pair (sign, round(log2(x))).\n\n  Special care needs to be given to quantized_bits(alpha=\"auto_po2\") as well.\n  Since in this quantizer, hardware needs the integer weights and scale for\n  hardware inference, this function will return the pair (scale,\n  integer_weights) in the returned dictionary.\n\n  Arguments:\n    model: model with weights to be quantized.\n    filename: if specified, we will save the hdf5 containing the quantized\n      weights so that we can use them for inference later on.\n    custom_objects: Dict of model specific objects needed to load/store.\n\n  Returns:\n    dictionary containing layer name and quantized weights that can be used\n    by a hardware generator.\n  \"\"\"\n\n  saved_weights = {}\n\n  # Find the conv/dense layers followed by Batchnorm layers\n  (fusing_layer_pair_dict, bn_layers_to_skip) = find_bn_fusing_layer_pair(\n      model, custom_objects\n  )\n\n  print(\"... quantizing model\")\n  for layer in model.layers:\n    if hasattr(layer, \"get_quantizers\"):\n      # weights for software inference\n      weights = []\n      signs = []\n      scales = []\n      # weights for hardware inference\n      hw_weights = []\n\n      if any(isinstance(layer, t) for t in [\n          QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]):\n        qs = layer.get_quantizers()\n        ws = layer.get_folded_weights()\n      elif any(isinstance(layer, t) for t in [QSimpleRNN, QLSTM, QGRU]):\n        qs = layer.get_quantizers()[:-1]\n        ws = layer.get_weights()\n      else:\n        qs = layer.get_quantizers()\n        ws = layer.get_weights()\n\n      has_sign = False\n      has_scale = False\n      enable_bn_fusing = False\n\n      # isinstance() might fail due to inconsistent module import path.\n      # Use __class__.__name__ instead.\n      layer_class = layer.__class__.__name__\n      if (layer_class == \"QBatchNormalization\" and\n          layer.name in bn_layers_to_skip):\n        # Mark current bn layer to be fused with the previous layer\n        enable_bn_fusing = True\n\n      for quantizer, weight in zip(qs, ws):\n        if quantizer:\n          weight = tf.constant(weight)\n          weight = tf.keras.backend.eval(quantizer(weight))\n\n        # If quantizer is power-of-2 (quantized_po2 or quantized_relu_po2),\n        # we would like to process it here.\n        #\n        # However, we cannot, because we will lose sign information as\n        # quanized_po2 will be represented by the tuple (sign, log2(abs(w))).\n        #\n        # In addition, we will not be able to use the weights on the model\n        # any longer.\n        #\n        # So, instead of \"saving\" the weights in the model, we will return\n        # a dictionary so that the proper values can be propagated.\n\n        # Weights store the weight in the format that software inference uses.\n        weights.append(weight)\n\n        q_name = \"\"\n        if quantizer:\n          if isinstance(quantizer, six.string_types):\n            q_name = quantizer\n          elif hasattr(quantizer, \"__name__\"):\n            q_name = quantizer.__name__\n          elif hasattr(quantizer, \"name\"):\n            q_name = quantizer.name\n          elif hasattr(quantizer, \"__class__\"):\n            q_name = quantizer.__class__.__name__\n\n        if quantizer and (\"_po2\" in q_name):\n          # Quantized_relu_po2 does not have a sign.\n          if q_name == \"quantized_po2\":\n            has_sign = True\n          sign = np.sign(weight)\n          # Makes sure values are -1 or +1 only\n          sign += (1.0 - np.abs(sign))\n          # hw_weight store the weight in the format that hardware inference\n          # uses.\n          hw_weight = np.round(np.log2(np.abs(weight)))\n          signs.append(sign)\n          scales.append([])\n        elif (q_name == \"quantized_bits\" and\n              quantizer.alpha == \"auto_po2\"):\n          unsigned_bits = quantizer.bits - quantizer.keep_negative\n          m = K.cast_to_floatx(pow(2, unsigned_bits))\n          m_i = K.cast_to_floatx(K.pow(2, quantizer.integer))\n\n          assert hasattr(quantizer.scale, \"numpy\") or isinstance(\n              quantizer.scale, np.ndarray), (\n                  \"The auto_po2 quantizer has to be called first in order \"\n                  \"to know the values of scale.\")\n          scale = quantizer.scale if isinstance(\n              quantizer.scale, np.ndarray) else quantizer.scale.numpy()\n          scale = K.cast_to_floatx(scale)\n          # Make sure scale is power of 2 values\n          log2val = np.log2(scale)\n          diff = np.round(log2val) - log2val\n          assert np.all(diff == 0), \"scale must be power of 2 values!\"\n          # Convert fixed point weight to integer weight, just\n          hw_weight = weight * m / m_i\n          # Because hw_weight is integer weights, set scale = scale * m_i / m\n          # so that when we can multiply scale with the integer weight\n          # during hardware inference to get the fixed point weights\n          scale = scale * m_i / m\n          has_scale = True\n          scales.append(scale)\n        else:\n          hw_weight = weight\n          signs.append([])\n          scales.append([])\n        hw_weights.append(hw_weight)\n\n      # Save the weights in the format that hardware inference uses\n      saved_weights[layer.name] = {\"weights\": hw_weights,\n                                   \"enable_bn_fusing\": enable_bn_fusing}\n\n      if (isinstance(layer, QAveragePooling2D) or\n          isinstance(layer, QGlobalAveragePooling2D)):\n        if isinstance(layer, QAveragePooling2D):\n          pool_area = layer.pool_size\n          if isinstance(layer.pool_size, int):\n            pool_area = layer.pool_size * layer.pool_size\n          else:\n            pool_area = np.prod(layer.pool_size)\n        else:\n          pool_area = layer.compute_pooling_area(input_shape=layer.input_shape)\n        saved_weights[\n            layer.name][\"q_mult_factor\"] = layer.average_quantizer_internal(\n                1.0 / pool_area).numpy()\n        saved_weights[layer.name][\"mult_factor\"] = 1.0 / pool_area\n        saved_weights[layer.name][\"pool_area\"] = pool_area\n\n      if has_sign:\n        saved_weights[layer.name][\"signs\"] = signs\n      if has_scale:\n        saved_weights[layer.name][\"scales\"] = scales\n      if not any(isinstance(layer, t) for t in [\n          QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]):\n        # Set layer weights in the format that software inference uses\n        layer.set_weights(weights)\n      else:\n        print(layer.name, \" conv and batchnorm weights cannot be seperately\"\n              \" quantized because they will be folded before quantization.\")\n\n      # adjust weights for bn fusing if necessary\n      if layer.name in fusing_layer_pair_dict.keys():\n        print(f\"Fuse {layer.name} output with \"\n              f\"{fusing_layer_pair_dict[layer.name]} for hardware inference.\")\n        add_bn_fusing_weights(\n            prev_layer=layer,\n            bn_layer=model.get_layer(fusing_layer_pair_dict[layer.name]),\n            saved_weights=saved_weights)\n    else:\n      if layer.get_weights():\n        print(\" \", layer.name, \"has not been quantized\")\n\n  if filename:\n    model.save_weights(filename)\n\n  return saved_weights\n\n\ndef quantize_activation(layer_config, activation_bits):\n  \"\"\"Replaces activation by quantized activation functions.\"\"\"\n  str_act_bits = str(activation_bits)\n  # relu -> quantized_relu(bits)\n  # tanh -> quantized_tanh(bits)\n  # sigmoid -> quantized_sigmoid(bits)\n  # more to come later\n  if layer_config.get(\"activation\", None) is None:\n    return\n  if isinstance(layer_config[\"activation\"], six.string_types):\n    a_name = layer_config[\"activation\"]\n  elif isinstance(layer_config[\"activation\"], types.FunctionType):\n    a_name = layer_config[\"activation\"].__name__\n  else:\n    a_name = layer_config[\"activation\"].__class__.__name__\n\n  if a_name == \"linear\":\n    return\n  if a_name == \"relu\":\n    layer_config[\"activation\"] = \"quantized_relu(\" + str_act_bits + \")\"\n  elif a_name == \"tanh\":\n    layer_config[\"activation\"] = \"quantized_tanh(\" + str_act_bits + \")\"\n  elif a_name == \"sigmoid\":\n    layer_config[\"activation\"] = \"quantized_sigmoid(\" + str_act_bits + \")\"\n\n\ndef get_config(quantizer_config, layer, layer_class, parameter=None):\n  \"\"\"Returns search of quantizer on quantizer_config.\"\"\"\n  quantizer = quantizer_config.get(layer[\"config\"][\"name\"],\n                                   quantizer_config.get(layer_class, None))\n\n  if quantizer is not None and parameter is not None:\n    quantizer = quantizer.get(parameter, None)\n\n  return quantizer\n\n\ndef is_TFOpLambda_layer(layer):\n  return layer.__class__.__name__ == \"TFOpLambda\"\n\n\ndef get_y_from_TFOpLambda(model_cfg, layer):\n  \"\"\"Get the value of \"y\" from the TFOpLambda layer's configuration.\n  Args:\n    model_cfg: dictionary type, model.get_config() output\n    layer: a given layer instance\n\n  Return:\n    value of \"y\" for a TFOpLambda layer. 'y' here corresponds to how tensorflow\n    stores TFOpLambda layer parameter in serialization. for example,\n    TFOpLambda(func), where func is tf.multiply(input_tensor, 3). \"y\" would be\n    the value 3.\n  \"\"\"\n\n  for layer_config in model_cfg[\"layers\"]:\n    op_name = layer_config[\"config\"][\"name\"]\n    class_name = layer_config[\"class_name\"]\n\n    # TODO(lishanok): Extend support for other TFOpLambda types when needed\n    if op_name == layer.name and class_name == \"TFOpLambda\":\n      assert (\"tf.__operators__.add\" in op_name or \"tf.math.multiply\"\n              in op_name), \"TFOpLambda layer {} not supported!\".format(op_name)\n      return layer_config[\"inbound_nodes\"][-1][-1][\"y\"]\n\n  return None\n\n\ndef convert_to_folded_model(model):\n  \"\"\"Find conv/dense layers followed by bn layers and fold them.\n\n  Args:\n    model: input model\n\n  Returns:\n    new model without bn layers\n    list of layers being folded\n\n  Note: supports sequential and non-sequential model\n  \"\"\"\n\n  fold_model = clone_model(model)\n  model_cfg = model.get_config()\n  (graph, _) = qgraph.GenerateGraphFromModel(\n      fold_model, \"quantized_bits(8, 0, 1)\", \"quantized_bits(8, 0, 1)\")\n\n  qgraph.GraphAddSingleSourceSingleSink(graph)\n  qgraph.GraphRemoveNodeWithNodeType(graph, \"InputLayer\")\n  qgraph.GraphPropagateActivationsToEdges(graph)\n\n  # Finds the Batchnorm nodes to be deleted and mark them.\n  bn_nodes_to_delete = []\n  layers_to_fold = []\n  for node_id in nx.topological_sort(graph):\n    layer_input_tensors = []\n    node = graph.nodes[node_id]\n    layer = node[\"layer\"][0]\n    if layer:\n      successor_ids = list(graph.successors(node_id))\n      is_single = len(successor_ids) == 1\n      successor_layer = graph.nodes[successor_ids[0]][\"layer\"][0]\n      followed_by_bn = (successor_layer.__class__.__name__ ==\n                        \"BatchNormalization\")\n      # TODO(lishanok): extend to QDense types\n      is_foldable = layer.__class__.__name__ in [\n          \"Conv2D\", \"DepthwiseConv2D\"\n      ] and is_single and followed_by_bn\n\n      if is_foldable:\n        # Removes the batchnorm node from the graph.\n        bn_nodes_to_delete.append(successor_ids[0])\n        layers_to_fold.append(layer.name)\n\n  # Deletes the marked nodes.\n  for node_id in bn_nodes_to_delete:\n    qgraph.GraphRemoveNode(graph, node_id)\n\n  # Modifies model according to the graph.\n  model_outputs = []\n  x = model_inputs = fold_model.inputs\n\n  for node_id in nx.topological_sort(graph):\n    layer_input_tensors = []\n    node = graph.nodes[node_id]\n\n    layer = node[\"layer\"][0]\n    if layer:\n      # Gets layer input tensors from graph edge.\n      for parent_node_id in graph.predecessors(node_id):\n        edge = graph.edges[(parent_node_id, node_id)]\n        input_tensor = edge[\"tensor\"]\n        layer_input_tensors.append(input_tensor)\n\n      # We call the layer to get output tensor.\n      if len(layer_input_tensors) == 1:\n        layer_input_tensors = layer_input_tensors[0].deref()\n      else:\n        layer_input_tensors = [t.deref() for t in layer_input_tensors]\n\n      if is_TFOpLambda_layer(layer):\n        # TFOpLambda layer requires one extra input: \"y\"\n        y = get_y_from_TFOpLambda(model_cfg, layer)\n        x = layer(layer_input_tensors, y)\n      else:\n        x = layer(layer_input_tensors)\n\n      # Replaces edge tensors between the predecessor and successor\n      for u, v in graph.edges(node_id):\n        # u is current layer node, v is successor layer node\n        # graph[u][v] is the edge between the two nodes\n        # Replace the tensor on this edge so that the input tensor for the\n        # successor layer can be updated accordingly.\n        graph[u][v][\"tensor\"] = x.ref()\n\n        if v == -2 and x not in model_outputs:\n          # When it is output layer, add the output tensor of this layer\n          # into model outputs.\n          model_outputs.append(x)\n\n  new_model = Model(inputs=model_inputs, outputs=model_outputs)\n\n  return new_model, layers_to_fold\n\n\ndef model_quantize(model,\n                   quantizer_config,\n                   activation_bits,\n                   custom_objects=None,\n                   transfer_weights=False,\n                   prefer_qadaptiveactivation=False,\n                   enable_bn_folding=False):\n  \"\"\"Creates a quantized model from non-quantized model.\n\n  The quantized model translation is based on json interface of Keras,\n  which requires a custom_objects dictionary for \"string\" types.\n\n  Because of the way json works, we pass \"string\" objects for the\n  quantization mechanisms and we perform an eval(\"string\") which\n  technically is not safe, but it will do the job.\n\n  The quantizer_config is a dictionary with the following form.\n  {\n    Dense_layer_name: {\n        \"kernel_quantizer\": \"quantizer string\",\n        \"bias_quantizer\": \"quantizer_string\"\n    },\n\n    Conv2D_layer_name: {\n        \"kernel_quantizer\": \"quantizer string\",\n        \"bias_quantizer\": \"quantizer_string\"\n    },\n\n    Activation_layer_name: \"quantizer string\",\n\n    \"QActivation\": { \"relu\": \"quantizer_string\" },\n\n    \"QConv2D\": {\n        \"kernel_quantizer\": \"quantizer string\",\n        \"bias_quantizer\": \"quantizer_string\"\n    },\n\n    \"QBatchNormalization\": {}\n  }\n\n  In the case of \"QBidirectional\", we can follow the same form as above.\n  The specified configuration will be used for both forward and backwards\n  layer.\n  {\n    \"Bidirectional\" : {\n        \"kernel_quantizer\" : \"quantizer string\",\n        \"bias_quantizer\" : \"quantizer string\",\n        \"recurrent_quantizer\" : \"quantizer string\"\n    }\n  }\n\n  In the case of \"QActivation\", we can modify only certain types of\n  activations, for example, a \"relu\". In this case we represent the\n  activation name by a dictionary, or we can modify all activations,\n  without representhing as a set.\n\n  We right now require a default case in case we cannot find layer name.\n  This simplifies the dictionary because the simplest case, we can just\n  say:\n\n  {\n    \"default\": {\n        \"kernel\": \"quantized_bits(4)\",\n        \"bias\": \"quantized_bits(4)\"\n    }\n  }\n\n  and this will quantize all layers' weights and bias to be created with\n  4 bits.\n\n  Arguments:\n    model: model to be quantized\n    quantizer_config: dictionary (as above) with quantized parameters\n    activation_bits: number of bits for quantized_relu, quantized_tanh,\n      quantized_sigmoid\n    custom_objects: dictionary following keras recommendations for json\n      translation.\n    transfer_weights: if true, weights are to be transfered from model to\n      qmodel.\n    prefer_qadaptiveactivation: Bool. If true, try to use QAdaptiveActivation\n      over QActivation whenever possible\n    enable_bn_folding: Bool. If true, fold conv/dense layers with\n      following batch normalization layers whenever possible. use\n      QConv2DBatchnorm for example, to replace conv2d layers\n\n  Returns:\n    qmodel with quantized operations and custom_objects.\n  \"\"\"\n\n  if enable_bn_folding:\n    # Removes bn layers from the model and find a list of layers to fold.\n    model, layers_to_fold = convert_to_folded_model(model)\n    if len(layers_to_fold) == 0:\n      # If no layers to fold, no need to perform folding.\n      enable_bn_folding = False\n\n  if not custom_objects:\n    custom_objects = {}\n\n  # Let's make a deep copy to make sure our objects are not shared elsewhere.\n  jm = copy.deepcopy(json.loads(model.to_json()))\n  custom_objects = copy.deepcopy(custom_objects)\n  config = jm[\"config\"]\n  layers = config[\"layers\"]\n\n  def quantize_rnn(layer, quantizer_config):\n    q_name = \"Q\" + layer[\"class_name\"]\n    # Needs to add kernel, recurrent bias quantizers.\n    kernel_quantizer = get_config(\n        quantizer_config, layer, q_name, \"kernel_quantizer\")\n    recurrent_quantizer = get_config(\n        quantizer_config, layer, q_name, \"recurrent_quantizer\")\n    if layer[\"config\"]['use_bias']:\n      bias_quantizer = get_config(\n          quantizer_config, layer, q_name, \"bias_quantizer\")\n    else:\n      bias_quantizer = None\n    state_quantizer = get_config(\n            quantizer_config, layer, q_name, \"state_quantizer\")\n\n    # This is to avoid unwanted transformations.\n    if kernel_quantizer is None:\n      return\n\n    layer[\"config\"][\"kernel_quantizer\"] = kernel_quantizer\n    layer[\"config\"][\"recurrent_quantizer\"] = recurrent_quantizer\n    layer[\"config\"][\"bias_quantizer\"] = bias_quantizer\n    layer[\"config\"][\"state_quantizer\"] = state_quantizer\n\n    # If activation is present, add activation here.\n    activation = get_config(\n        quantizer_config, layer, q_name, \"activation_quantizer\")\n    if activation:\n      layer[\"config\"][\"activation\"] = activation\n    else:\n      quantize_activation(layer[\"config\"], activation_bits)\n\n    # If recurrent activation is present, add activation here.\n    if layer[\"class_name\"] in [\"LSTM\", \"GRU\"]:\n      recurrent_activation = get_config(\n          quantizer_config, layer, q_name, \"recurrent_activation_quantizer\")\n      if recurrent_activation:\n        layer[\"config\"][\"recurrent_activation\"] = recurrent_activation\n    layer[\"class_name\"] = q_name\n\n    registered_name = layer.pop(\"registered_name\", None)\n    if registered_name:\n      layer[\"registered_name\"] = q_name\n\n  for layer in layers:\n    layer_config = layer[\"config\"]\n\n    # Dense becomes QDense, Conv1D becomes QConv1D etc\n    # Activation converts activation functions.\n\n    if layer[\"class_name\"] in [\n      \"Dense\", \"Conv1D\", \"Conv2D\", \"Conv2DTranspose\",\n      \"SeparableConv1D\", \"SeparableConv2D\"\n    ]:\n      if (layer[\"class_name\"] in [\"Dense\", \"Conv2D\"] and enable_bn_folding and\n          layer[\"name\"] in layers_to_fold):\n        # Only fold if current layer is followed by BN layer.\n        q_name = \"Q\" + layer[\"class_name\"] + \"Batchnorm\"\n        layer_config[\"use_bias\"] = True  # Folded layers require a bias\n\n        # Sets ema_freeze_delay and folding_mode specific to\n        # QDepthwiseConv2DBatchnorm layer config.\n        folding_mode = get_config(\n            quantizer_config, layer, q_name, \"folding_mode\")\n        layer_config[\"folding_mode\"] = (\n            folding_mode if folding_mode else \"ema_stats_folding\")\n        ema_freeze_delay = get_config(\n            quantizer_config, layer, q_name, \"ema_freeze_delay\")\n        layer_config[\"ema_freeze_delay\"] = (\n            ema_freeze_delay if ema_freeze_delay else None)\n      else:\n        q_name = \"Q\" + layer[\"class_name\"]\n      # Needs to add kernel/bias quantizers.\n      kernel_quantizer = get_config(\n          quantizer_config, layer, q_name, \"kernel_quantizer\")\n\n      if layer_config[\"use_bias\"]:\n        bias_quantizer = get_config(\n            quantizer_config, layer, q_name, \"bias_quantizer\")\n      else:\n        bias_quantizer = None\n\n      if (kernel_quantizer is None and\n          q_name == \"Q\" + layer[\"class_name\"] + \"Batchnorm\"):\n        # Tries none-folded layer quantizer as a back up.\n        kernel_quantizer = get_config(\n            quantizer_config, layer, \"Q\" + layer[\"class_name\"],\n            \"kernel_quantizer\")\n        bias_quantizer = get_config(\n            quantizer_config, layer, \"Q\" + layer[\"class_name\"],\n            \"bias_quantizer\")\n\n      # This is to avoid unwanted transformations.\n      if kernel_quantizer is None:\n        continue\n\n      layer[\"class_name\"] = q_name\n\n      layer_config[\"kernel_quantizer\"] = kernel_quantizer\n      layer_config[\"bias_quantizer\"] = bias_quantizer\n\n      # If activation is present, add activation here.\n      quantizer = get_config(\n          quantizer_config, layer, q_name, \"activation_quantizer\")\n\n      if quantizer:\n        layer_config[\"activation\"] = quantizer\n      else:\n        quantize_activation(layer_config, activation_bits)\n\n    elif layer[\"class_name\"] == \"DepthwiseConv2D\":\n      if enable_bn_folding and layer[\"name\"] in layers_to_fold:\n        q_name = \"QDepthwiseConv2DBatchnorm\"\n        layer_config[\"use_bias\"] = True  # Folded layers require a bias\n\n        # Sets ema_freeze_delay and folding_mode specific to\n        # QDepthwiseConv2DBatchnorm layers.\n        folding_mode = get_config(\n            quantizer_config, layer, q_name, \"folding_mode\")\n        layer_config[\"folding_mode\"] = (\n            folding_mode if folding_mode else \"ema_stats_folding\")\n        ema_freeze_delay = get_config(\n            quantizer_config, layer, q_name, \"ema_freeze_delay\")\n        layer_config[\"ema_freeze_delay\"] = (\n            ema_freeze_delay if ema_freeze_delay else None)\n\n      else:\n        q_name = \"QDepthwiseConv2D\"\n\n      # Needs to add kernel/bias quantizers.\n      depthwise_quantizer = get_config(quantizer_config, layer, q_name,\n                                       \"depthwise_quantizer\")\n\n      if layer_config[\"use_bias\"]:\n        bias_quantizer = get_config(quantizer_config, layer, q_name,\n                                    \"bias_quantizer\")\n      else:\n        bias_quantizer = None\n\n      if depthwise_quantizer is None and q_name == \"QDepthwiseConv2DBatchnorm\":\n        # Tries none-folded layer quantizer as a back up.\n        depthwise_quantizer = get_config(\n            quantizer_config, layer, \"QDepthwiseConv2D\", \"depthwise_quantizer\")\n        bias_quantizer = get_config(\n            quantizer_config, layer, \"QDepthwiseConv2D\", \"bias_quantizer\")\n\n      # This is to avoid unwanted transformations.\n      if depthwise_quantizer is None:\n        continue\n\n      layer[\"class_name\"] = q_name\n\n      layer_config[\"depthwise_quantizer\"] = depthwise_quantizer\n      layer_config[\"bias_quantizer\"] = bias_quantizer\n      # If activation is present, add activation here.\n      quantizer = get_config(quantizer_config, layer, q_name,\n                             \"activation_quantizer\",)\n\n      if quantizer:\n        layer_config[\"activation\"] = quantizer\n      else:\n        quantize_activation(layer_config, activation_bits)\n\n    elif layer[\"class_name\"] in [\"SimpleRNN\", \"LSTM\", \"GRU\"]:\n      quantize_rnn(layer, quantizer_config)\n\n    elif layer[\"class_name\"] == \"Bidirectional\":\n      forward_layer_quantizer_config = {\n          layer_config[\"layer\"][\"config\"][\"name\"]:\n              get_config(quantizer_config, layer, \"QBidirectional\")\n      }\n      quantize_rnn(layer[\"config\"][\"layer\"], forward_layer_quantizer_config)\n      if \"backward_layer\" in layer_config:\n        backward_layer_quantizer_config = {\n            layer_config[\"backward_layer\"][\"config\"][\"name\"]:\n                get_config(quantizer_config, layer, \"QBidirectional\")\n        }\n        quantize_rnn(layer[\"config\"][\"backward_layer\"],\n                     backward_layer_quantizer_config)\n      layer[\"class_name\"] = \"QBidirectional\"\n\n    elif layer[\"class_name\"] == \"Activation\":\n      if prefer_qadaptiveactivation:  # Try to find QAdaptiveActivation first\n        quantizer = get_config(quantizer_config, layer, \"QAdaptiveActivation\")\n        is_qadaptiveactivation = True\n        if quantizer is None:  # Try QActivation as a backup\n          quantizer = get_config(quantizer_config, layer, \"QActivation\")\n          is_qadaptiveactivation = False\n      else:  # Tries to find QActivation first.\n        quantizer = get_config(quantizer_config, layer, \"QActivation\")\n        is_qadaptiveactivation = False\n        if quantizer is None:  # Try QAdaptiveActivation as a backup\n          quantizer = get_config(quantizer_config, layer, \"QAdaptiveActivation\")\n          is_qadaptiveactivation = True\n\n      # This is to avoid softmax from quantizing in autoq.\n      if quantizer is None:\n        continue\n\n      # If quantizer exists in dictionary related to this name,\n      # use it, otherwise, use normal transformations.\n\n      if not isinstance(quantizer, dict) or quantizer.get(\n          layer_config[\"activation\"], None):\n        # Only change activation layer if we will use a quantized activation.\n\n        layer[\"class_name\"] = (\"QAdaptiveActivation\" if is_qadaptiveactivation\n                               else \"QActivation\")\n        if isinstance(quantizer, dict):\n          quantizer = quantizer[layer_config[\"activation\"]]\n        if quantizer:\n          if is_qadaptiveactivation:\n            assert quantizer.find(\",\") < 0, \\\n                \"Only integer bits should be defined for QAdaptiveActivation\"\n            layer_config[\"total_bits\"] = int(re.sub(r\"[^\\d]\", \"\", quantizer))\n            quantizer = re.sub(r\"\\(.*\", \"\", quantizer)  # remove params\n          layer_config[\"activation\"] = quantizer\n        else:\n          quantize_activation(layer_config, activation_bits)\n\n    # We have to do this because of other instances of ReLU.\n    elif layer[\"class_name\"] in [\"ReLU\", \"relu\", \"LeakyReLU\"]:\n\n      quantizer = get_config(quantizer_config, layer, \"QActivation\")\n      # This is to avoid unwanted transformations.\n      if quantizer is None:\n        continue\n\n      if layer[\"class_name\"] == \"LeakyReLU\":\n        negative_slope = layer[\"config\"][\"alpha\"]\n      elif layer[\"class_name\"] == \"relu\":\n        max_value = layer[\"config\"][\"max_value\"]\n        negative_slope = layer[\"config\"][\"alpha\"]\n        threshold = layer[\"config\"][\"threshold\"]\n      else:  # ReLU from mobilenet\n        max_value = layer[\"config\"][\"max_value\"]\n        negative_slope = layer[\"config\"][\"negative_slope\"]\n        threshold = layer[\"config\"][\"threshold\"]\n\n      if negative_slope > 0:\n        q_name = \"leakyrelu\"\n      else:\n        q_name = \"relu\"\n\n      # If quantizer exists in dictionary related to this name,\n      # use it, otherwise, use normal transformations.\n\n      if not isinstance(quantizer, dict) or quantizer.get(q_name, None):\n        # Only change activation layer if we will use a quantized activation.\n\n        layer[\"class_name\"] = \"QActivation\"\n\n        # Remove relu specific configurations\n        # remember that quantized relu's are always upper bounded.\n\n        if layer[\"class_name\"] == \"LeakyReLU\":\n          del layer[\"config\"][\"alpha\"]\n        elif layer[\"class_name\"] == \"relu\":\n          del layer[\"config\"][\"max_value\"]\n          del layer[\"config\"][\"alpha\"]\n          del layer[\"config\"][\"threshold\"]\n        else:  # ReLU from mobilenet\n          del layer[\"config\"][\"max_value\"]\n          del layer[\"config\"][\"negative_slope\"]\n          del layer[\"config\"][\"threshold\"]\n\n        if isinstance(quantizer, dict):\n          quantizer = quantizer[q_name]\n        if quantizer:\n          layer[\"config\"][\"activation\"] = quantizer\n        else:\n          quantize_activation(layer[\"config\"], activation_bits)\n\n    elif layer[\"class_name\"] == \"BatchNormalization\":\n      # We will assume at least QBatchNormalization or\n      # layer name is in dictionary to enable conversion\n      # otherwise we will just skip it.\n      if (\n          layer_config[\"name\"] not in quantizer_config and\n          \"QBatchNormalization\" not in quantizer_config\n      ):\n        continue\n\n      layer[\"class_name\"] = \"QBatchNormalization\"\n      # Needs to add kernel/bias quantizers.\n      gamma_quantizer = get_config(\n          quantizer_config, layer, \"QBatchNormalization\",\n          \"gamma_quantizer\")\n      beta_quantizer = get_config(\n          quantizer_config, layer, \"QBatchNormalization\",\n          \"beta_quantizer\")\n      mean_quantizer = get_config(\n          quantizer_config, layer, \"QBatchNormalization\",\n          \"mean_quantizer\")\n      variance_quantizer = get_config(\n          quantizer_config, layer, \"QBatchNormalization\",\n          \"variance_quantizer\")\n\n      layer_config[\"gamma_quantizer\"] = gamma_quantizer\n      layer_config[\"beta_quantizer\"] = beta_quantizer\n      layer_config[\"mean_quantizer\"] = mean_quantizer\n      layer_config[\"variance_quantizer\"] = variance_quantizer\n\n    elif layer[\"class_name\"] in [\"AveragePooling2D\", \"GlobalAveragePooling2D\"]:\n      q_name = \"Q\" + layer[\"class_name\"]\n      # Adds the average quanizer to config.\n      average_quantizer = get_config(\n          quantizer_config, layer, q_name, \"average_quantizer\")\n\n      # This is to avoid unwanted transformations.\n      if average_quantizer is None:\n        continue\n\n      layer[\"class_name\"] = q_name\n\n      layer_config[\"average_quantizer\"] = average_quantizer\n\n      # Adds activation to config.\n      quantizer = get_config(\n          quantizer_config, layer, q_name, \"activation_quantizer\")\n\n      if quantizer:\n        layer_config[\"activation\"] = quantizer\n      else:\n        quantize_activation(layer_config, activation_bits)\n\n    registered_name = layer.pop(\"registered_name\", None)\n    if registered_name:\n      layer[\"registered_name\"] = q_name or registered_name\n\n  # We need to keep a dictionary of custom objects as our quantized library\n  # is not recognized by keras.\n\n  qmodel = quantized_model_from_json(json.dumps(jm), custom_objects)\n\n  # If transfer_weights is true, we load the weights from model to qmodel.\n\n  if transfer_weights and not enable_bn_folding:\n    for layer, qlayer in zip(model.layers, qmodel.layers):\n      if layer.get_weights():\n        qlayer.set_weights(copy.deepcopy(layer.get_weights()))\n\n  return qmodel\n\n\ndef _add_supported_quantized_objects(custom_objects):\n  \"\"\"Map all the quantized objects.\"\"\"\n  custom_objects[\"QInitializer\"] = QInitializer\n  custom_objects[\"QDense\"] = QDense\n  custom_objects[\"QConv1D\"] = QConv1D\n  custom_objects[\"QConv2D\"] = QConv2D\n  custom_objects[\"QConv2DTranspose\"] = QConv2DTranspose\n  custom_objects[\"QSimpleRNNCell\"] = QSimpleRNNCell\n  custom_objects[\"QSimpleRNN\"] = QSimpleRNN\n  custom_objects[\"QLSTMCell\"] = QLSTMCell\n  custom_objects[\"QLSTM\"] = QLSTM\n  custom_objects[\"QGRUCell\"] = QGRUCell\n  custom_objects[\"QGRU\"] = QGRU\n  custom_objects[\"QBidirectional\"] = QBidirectional\n  custom_objects[\"QDepthwiseConv2D\"] = QDepthwiseConv2D\n  custom_objects[\"QSeparableConv1D\"] = QSeparableConv1D\n  custom_objects[\"QSeparableConv2D\"] = QSeparableConv2D\n  custom_objects[\"QActivation\"] = QActivation\n  custom_objects[\"QAdaptiveActivation\"] = QAdaptiveActivation\n  custom_objects[\"QBatchNormalization\"] = QBatchNormalization\n  custom_objects[\"Clip\"] = Clip\n  custom_objects[\"quantized_bits\"] = quantized_bits\n  custom_objects[\"bernoulli\"] = bernoulli\n  custom_objects[\"stochastic_ternary\"] = stochastic_ternary\n  custom_objects[\"ternary\"] = ternary\n  custom_objects[\"stochastic_binary\"] = stochastic_binary\n  custom_objects[\"binary\"] = binary\n  custom_objects[\"quantized_relu\"] = quantized_relu\n  custom_objects[\"quantized_ulaw\"] = quantized_ulaw\n  custom_objects[\"quantized_tanh\"] = quantized_tanh\n  custom_objects[\"quantized_sigmoid\"] = quantized_sigmoid\n  custom_objects[\"quantized_po2\"] = quantized_po2\n  custom_objects[\"quantized_relu_po2\"] = quantized_relu_po2\n  # custom_objects[\"quantized_bits_learnable_scale\"] = quantized_bits_learnable_scale\n\n  custom_objects[\"QConv2DBatchnorm\"] = QConv2DBatchnorm\n  custom_objects[\"QDepthwiseConv2DBatchnorm\"] = QDepthwiseConv2DBatchnorm\n\n  custom_objects[\"QAveragePooling2D\"] = QAveragePooling2D\n  custom_objects[\"QGlobalAveragePooling2D\"] = QGlobalAveragePooling2D\n  custom_objects[\"QScaleShift\"] = QScaleShift\n\n\ndef clone_model(model, custom_objects=None):\n  \"\"\"Clones model with custom_objects.\"\"\"\n  if not custom_objects:\n    custom_objects = {}\n\n  # Makes a deep copy to make sure our objects are not shared elsewhere.\n  custom_objects = copy.deepcopy(custom_objects)\n\n  _add_supported_quantized_objects(custom_objects)\n\n  json_string = model.to_json()\n  qmodel = quantized_model_from_json(json_string, custom_objects=custom_objects)\n  qmodel.set_weights(model.get_weights())\n\n  return qmodel\n\n\ndef quantized_model_from_json(json_string, custom_objects=None):\n  if not custom_objects:\n    custom_objects = {}\n\n  # Makes a deep copy to make sure our objects are not shared elsewhere.\n  custom_objects = copy.deepcopy(custom_objects)\n\n  _add_supported_quantized_objects(custom_objects)\n\n  qmodel = model_from_json(json_string, custom_objects=custom_objects)\n\n  return qmodel\n\n\ndef load_qmodel(filepath, custom_objects=None, compile=True):\n  \"\"\"Loads quantized model from Keras's model.save() h5 file.\n\n  Arguments:\n      filepath: one of the following:\n          - string, path to the saved model\n          - h5py.File or h5py.Group object from which to load the model\n          - any file-like object implementing the method `read` that returns\n          `bytes` data (e.g. `io.BytesIO`) that represents a valid h5py file\n          image.\n      custom_objects: Optional dictionary mapping names (strings) to custom\n          classes or functions to be considered during deserialization.\n      compile: Boolean, whether to compile the model after loading.\n\n  Returns:\n      A Keras model instance. If an optimizer was found as part of the saved\n      model, the model is already compiled. Otherwise, the model is uncompiled\n      and a warning will be displayed. When `compile` is set to False, the\n      compilation is omitted without any warning.\n  \"\"\"\n\n  if not custom_objects:\n    custom_objects = {}\n\n  # Makes a deep copy to make sure our objects are not shared elsewhere.\n  custom_objects = copy.deepcopy(custom_objects)\n\n  _add_supported_quantized_objects(custom_objects)\n\n  qmodel = tf.keras.models.load_model(filepath, custom_objects=custom_objects,\n                                      compile=compile)\n  return qmodel\n\n\ndef print_model_sparsity(model):\n  \"\"\"Prints sparsity for the pruned layers in the model.\"\"\"\n\n  def _get_sparsity(weights):\n    return 1.0 - np.count_nonzero(weights) / float(weights.size)\n\n  print(\"Model Sparsity Summary ({})\".format(model.name))\n  print(\"--\")\n  for layer in model.layers:\n    if isinstance(layer, pruning_wrapper.PruneLowMagnitude):\n      prunable_weights = layer.layer.get_prunable_weights()\n    elif isinstance(layer, prunable_layer.PrunableLayer):\n      prunable_weights = layer.get_prunable_weights()\n    elif prune_registry.PruneRegistry.supports(layer):\n      weight_names = prune_registry.PruneRegistry._weight_names(layer)\n      prunable_weights = [getattr(layer, weight) for weight in weight_names]\n    else:\n      prunable_weights = None\n    if prunable_weights:\n      print(\"{}: {}\".format(\n          layer.name, \", \".join([\n              \"({}, {})\".format(weight.name,\n                  str(_get_sparsity(K.get_value(weight))))\n              for weight in prunable_weights\n          ])))\n  print(\"\\n\")\n\n\ndef get_model_sparsity(model, per_layer=False, allow_list=None):\n  \"\"\"Calculates the sparsity of the model's weights and biases.\n\n  Quantizes the model weights using model_save_quantized_weights (but does not\n    save the quantized weights) before calculating the proportion of weights and\n    biases set to zero.\n\n  Arguments:\n      model: The model to use to calculate sparsity. Assumes that this is a\n          QKeras model with trained weights.\n      per_layer: If to return a per-layer breakdown of sparsity\n      allow_list: A list of layer class names that sparsity will be calculated\n        for. If set to None, a default list will be used.\n\n  Returns:\n      A float value representing the proportion of weights and biases set to\n      zero in the quantized model. If per_layer is True, it also returns a\n      per-layer breakdown of model sparsity formatted as a list of tuples in the\n      form (<layer name>, <sparsity proportion>)\n  \"\"\"\n  # Checks if to use a default list of allowed layers to calculate sparsity.\n  if allow_list is None:\n    allow_list = [\n        \"QDense\", \"Dense\", \"QConv1D\", \"Conv1D\", \"QConv2D\", \"Conv2D\",\n        \"QDepthwiseConv2D\", \"DepthwiseConv2D\",\n        \"QSeparableConv1D\", \"SeparableConv1D\",\n        \"QSeparableConv2D\", \"SeparableConv2D\", \"QOctaveConv2D\",\n        \"QSimpleRNN\", \"RNN\", \"QLSTM\", \"QGRU\",\n        \"QConv2DTranspose\", \"Conv2DTranspose\",\n        \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\",\n    ]\n\n  # Quantizes the model weights for a more accurate sparsity calculation.\n  model_save_quantized_weights(model)\n\n  # Calculates the sparsity layer by layer.\n  layer_sparsity = []\n  total_sparsity = 0.\n  all_weights = []\n  for layer in model.layers:\n    if hasattr(layer, \"quantizers\") and layer.__class__.__name__ in allow_list:\n      if layer.__class__.__name__ in [\n          \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n        weights_to_examine = layer.get_folded_weights()\n      else:\n        weights_to_examine = layer.get_weights()\n\n      layer_weights = []\n      for weight in weights_to_examine:\n        try:\n          weight_numpy = weight.ravel()\n        except AttributeError:\n          # In case of EagerTensor.\n          weight_numpy = weight.numpy().ravel()\n        layer_weights.append(weight_numpy)\n        all_weights.append(weight_numpy)\n      layer_weights = np.concatenate(layer_weights)\n      layer_sparsity.append((layer.name, np.mean(layer_weights == 0)))\n\n  if len(all_weights) > 0:\n    # Average the sparsity for the entire model.\n    all_weights = np.concatenate(all_weights)\n    total_sparsity = np.mean(all_weights == 0)\n  if per_layer:\n    return (total_sparsity, layer_sparsity)\n  else:\n    return total_sparsity\n\n\ndef quantized_model_debug(model, X_test, plot=False, plt_instance=None):\n  \"\"\"Debugs and plots model weights and activations.\n\n  Args:\n    model: The QKeras model to debug\n    X_test: The sample data to use to give to model.predict\n    plot: Bool. If to plot the results.\n    plt_instance: A matplotlib.pyplot instance used to plot in an IPython\n      environment.\n  \"\"\"\n  assert (plt_instance and plot) or not plot, (\n      \"plt_instance is required if plt is True\")\n\n  outputs = []\n  output_names = []\n\n  for layer in model.layers:\n    if layer.__class__.__name__ in REGISTERED_LAYERS:\n      output_names.append(layer.name)\n      outputs.append(layer.output)\n\n  model_debug = Model(inputs=model.inputs, outputs=outputs)\n\n  y_pred = model_debug.predict(X_test)\n\n  print(\"{:30} {: 8.4f} {: 8.4f}\".format(\n      \"input\", np.min(X_test), np.max(X_test)))\n\n  for n, p in zip(output_names, y_pred):\n    layer = model.get_layer(n)\n    if (layer.__class__.__name__ in \"QActivation\" or\n        layer.__class__.__name__ in \"QAdaptiveActivation\"):\n      alpha = get_weight_scale(layer.activation, p)\n    else:\n      alpha = 1.0\n    print(\n        \"{:30} {: 8.4f} {: 8.4f}\".format(n, np.min(p / alpha),\n                                         np.max(p / alpha)),\n        end=\"\")\n    if alpha != 1.0:\n      print(\" a[{: 8.4f} {:8.4f}]\".format(np.min(alpha), np.max(alpha)))\n    if plot and layer.__class__.__name__ in [\n        \"QConv1D\", \"QConv2D\", \"QConv2DTranspose\", \"QDense\", \"QActivation\",\n        \"QAdaptiveActivation\", \"QSimpleRNN\", \"QLSTM\", \"QGRU\", \"QBidirectional\",\n        \"QSeparableConv1D\", \"QSeparableConv2D\"\n    ]:\n      plt_instance.hist(p.flatten(), bins=25)\n      plt_instance.title(layer.name + \"(output)\")\n      plt_instance.show()\n    alpha = None\n\n    if layer.__class__.__name__ not in [\n        \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n      weights_to_examine = layer.get_weights()\n    else:\n      weights_to_examine = layer.get_folded_weights()\n\n    for i, weights in enumerate(weights_to_examine):\n      if hasattr(layer, \"get_quantizers\") and layer.get_quantizers()[i]:\n        weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))\n        if i == 0 and layer.__class__.__name__ in [\n            \"QConv1D\", \"QConv2D\", \"QConv2DTranspose\", \"QDense\",\n            \"QSimpleRNN\", \"QLSTM\", \"QGRU\",\n            \"QSeparableConv1D\", \"QSeparableConv2D\",\n            \"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"\n        ]:\n          alpha = get_weight_scale(layer.get_quantizers()[i], weights)\n          # if alpha is 0, let's remove all weights.\n          alpha_mask = (alpha == 0.0)\n          weights = np.where(alpha_mask, weights * alpha, weights / alpha)\n          if plot:\n            plt_instance.hist(weights.flatten(), bins=25)\n            plt_instance.title(layer.name + \"(weights)\")\n            plt_instance.show()\n      print(\" ({: 8.4f} {: 8.4f})\".format(np.min(weights), np.max(weights)),\n            end=\"\")\n    if alpha is not None and isinstance(alpha, np.ndarray):\n      print(\" a({: 10.6f} {: 10.6f})\".format(\n          np.min(alpha), np.max(alpha)), end=\"\")\n    print(\"\")\n\n\ndef quantized_model_dump(model,\n                         x_test,\n                         output_dir=None,\n                         layers_to_dump=[]):\n  \"\"\"Dumps tensors of target layers to binary files.\n\n  Arguments:\n    model: QKeras model object.\n    x_test: numpy type, test tensors to generate output tensors.\n    output_dir: a string for the directory to hold binary data.\n    layers_to_dump: a list of string, specified layers by layer\n      customized name.\n  \"\"\"\n  outputs = []\n  y_names = []\n\n  if not output_dir:\n    with tempfile.TemporaryDirectory() as output_dir:\n      print(\"temp dir\", output_dir)\n\n  if not os.path.exists(output_dir):\n    os.makedirs(output_dir)\n    print(\"create dir\", output_dir)\n\n  for layer in model.layers:\n    if not layers_to_dump or layer.name in layers_to_dump:\n      y_names.append(layer.name)\n      outputs.append(layer.output)\n\n  # Gather the tensor outputs from specified layers at layers_to_dump.\n  model_debug = Model(inputs=model.inputs, outputs=outputs)\n  y_pred = model_debug.predict(x_test)\n\n  # Dumps tensors to files.\n  for name, tensor_data in zip(y_names, y_pred):\n    filename = os.path.join(output_dir, name + \".bin\")\n    print(\"writing the layer output tensor to \", filename)\n    with open(filename, \"w\") as fid:\n      tensor_data.astype(np.float32).tofile(fid)\n\n\ndef clone_model_and_freeze_auto_po2_scale(\n    orig_model, orig_model_path=None, quantize_model_weights=False):\n  \"\"\"Clone model and freeze the scale value of auto_po2 type quantizers.\n\n  Args:\n    orig_model: original model which will be used to clone the new model.\n      If set to None, the function will load the original model\n      from orig_model_path argument.\n    orig_model_path: The path to the original model file.\n      If set to None, the function will load the original model from the\n      orig_model argument.\n    quantize_model_weights: Bool to quantize weights to HW format.\n      If set to False, the model weights will be in float format.\n      If set to True, the model weights will be in HW format and the function\n        will also check if the hw weights extracted from the new model matches\n        the original model.\n\n  Returns:\n    A tuple of the new model and the new model's hw weights.\n\n  Note:\n    + When using this function to retrain model with fixed scale value.\n      Set quantize_model_weights to False in this case.\n    + This function only supports a collection of common layers that will use\n      auto_po2 quantizers. For less common layers, it will raise errors and we\n      will add more support case by case.\n\n  Example usage:\n    model, _ = clone_model_and_freeze_auto_po2_scale(\n        orig_model_path=\"path/to/model\",\n        quantize_model_weights=False)\n  \"\"\"\n\n  def _create_bn_layer(layer_cfg, bn_inv_quantizer):\n    # Clone batch normalization layer with the new inverse quantizer.\n    if bn_inv_quantizer is not None:\n      layer_cfg[\"inverse_quantizer\"][\"config\"] = bn_inv_quantizer.get_config()\n    return QBatchNormalization(**layer_cfg)\n\n  def _create_qconv2d_layer(layer_cfg, kernel_quantizer):\n    # Clone QConv2D layer wiht the new kernel quantizers.\n    if kernel_quantizer is not None:\n      layer_cfg[\"kernel_quantizer\"][\"config\"] = kernel_quantizer.get_config()\n    return QConv2D(**layer_cfg)\n\n  def _create_qdepthwise_conv2d_layer(layer_cfg, depthwise_quantizer):\n    # Clone QDepthwiseConv2D layer with the new depthwise_quantizer quantizer.\n    if depthwise_quantizer is not None:\n      layer_cfg[\"depthwise_quantizer\"][\n          \"config\"] = depthwise_quantizer.get_config()\n    return QDepthwiseConv2D(**layer_cfg)\n\n  def _create_qdense_layer(layer_cfg, kernel_quantizer):\n    # Clone QDense layer with the new kernel quantizer.\n    if kernel_quantizer is not None:\n      layer_cfg[\"kernel_quantizer\"][\"config\"] = kernel_quantizer.get_config()\n    return QDense(**layer_cfg)\n\n  def _create_other_layer(orig_layer):\n    # Clone other layers.\n    config = orig_layer.get_config()\n    return orig_layer.__class__.from_config(config)\n\n  def _create_quantized_bits_with_post_training_scale(q):\n    # Create a new quantized_bits instance with the fixed scale value.\n    if q is not None:\n      q_cfg = q.get_config()\n      q_cfg[\"post_training_scale\"] = q.scale.numpy()\n      q = quantized_bits(**q_cfg)\n    return q\n\n  def _find_auto_po2_quantizer(layer):\n    # Find the auto_po2 quantizer in the layer. Note that we allow at\n    # most one auto_po2 quantizer in each layer due to the limitation of\n    # the current HW implementation.\n    num_auto_po2_quantizers = 0\n    auto_po2_quantizer = None\n    if hasattr(layer, \"quantizers\"):\n      for q in layer.quantizers:\n        if hasattr(q, \"alpha\") and q.alpha == \"auto_po2\":\n          num_auto_po2_quantizers += 1\n          auto_po2_quantizer = q\n    if num_auto_po2_quantizers > 1:\n      raise ValueError(\n          f\"{layer.name} has more than one auto_po2 quantizer. \"\n          \"Please check if this is expected.\")\n    else:\n      return auto_po2_quantizer\n\n  def _check_hw_weights_equal(hw_weights_1, hw_weights_2):\n    # Check if the hw weights extracted from the new model matches the\n    # original model.\n    for layer_name in hw_weights_2.keys():\n      for key in hw_weights_2[layer_name].keys():\n\n        val1 = hw_weights_2[layer_name][key]\n        val2 = hw_weights_1[layer_name][key]\n        if isinstance(val1, list):\n          for (v1, v2) in zip(val1, val2):\n            if not np.all(v1 == v2):\n              raise ValueError(\n                  f\"{layer_name}/{key}: No Match! v1={v1}, v2={v2}\")\n        else:\n          if not np.all(val1 == val2):\n            raise ValueError(\n                f\"{layer_name}/{key}: No Match! val1={val1}, val2={val2}\")\n\n  # Load the original model with float weights.\n  # Note: weights will be quantized later in silicon flow by calling\n  # model_save_quantized_weights.\n  if orig_model is not None and orig_model_path is not None:\n    raise ValueError(\n        \"Only one of orig_model and orig_model_path can be set.\")\n  elif orig_model is None and orig_model_path is None:\n    raise ValueError(\n        \"One of orig_model and orig_model_path must be set.\")\n  elif orig_model_path is not None:\n    orig_model = load_qmodel(orig_model_path, compile=False)\n\n  # Quantize model weights and compute quantizer scale values.\n  quantized_model = tf.keras.models.clone_model(orig_model)\n  quantized_model.set_weights(orig_model.get_weights())\n  # In silicon flow, weight binary files are generated from hw weights.\n  orig_hw_weights = model_save_quantized_weights(\n      quantized_model)\n\n  # Create a new model with fixed scale quantizers.\n  x = inputs = tf.keras.Input(\n      shape=orig_model.input_shape[1:], name=orig_model.layers[0].name)\n  for layer in quantized_model.layers[1:]:\n    layer_class = layer.__class__.__name__\n    auto_po2_quantizer = _find_auto_po2_quantizer(layer)\n    auto_po2_quantizer_with_frozen_scale = (\n        _create_quantized_bits_with_post_training_scale(auto_po2_quantizer))\n    layer_cfg = layer.get_config()\n\n    # To be compatible with different python versions, we do not use\n    # match-case style here.\n    if layer_class == \"QConv2D\":\n      x = _create_qconv2d_layer(layer_cfg,\n                                auto_po2_quantizer_with_frozen_scale)(x)\n    elif layer_class == \"QDepthwiseConv2D\":\n      x = _create_qdepthwise_conv2d_layer(\n          layer_cfg, auto_po2_quantizer_with_frozen_scale)(x)\n    elif layer_class == \"QBatchNormalization\":\n      x = _create_bn_layer(layer_cfg,\n                           auto_po2_quantizer_with_frozen_scale)(x)\n    elif layer_class == \"QDense\":\n      x = _create_qdense_layer(layer_cfg,\n                               auto_po2_quantizer_with_frozen_scale)(x)\n    else:\n      x = _create_other_layer(layer)(x)\n\n  new_model = tf.keras.Model(inputs, x)\n  # Set the weights of the new model to the original model (float weights).\n  new_model.set_weights(orig_model.get_weights())\n\n  # Check if the new model still has auto_po2 quantizer.\n  # This function only supports a colleciton of common layers that will use\n  # auto_po2 quantizers. For less common layers, we need to add extra support\n  # in the future.\n  for layer in new_model.layers:\n    q = _find_auto_po2_quantizer(layer)\n    if q is not None and q.post_training_scale is None:\n      raise ValueError(\n          f\"{layer.name} in the new model still has auto_po2 quantizer with \"\n          \"adaptive scales. Please check if this is expected!\")\n\n  new_hw_weights = None\n  if quantize_model_weights:\n    new_hw_weights = model_save_quantized_weights(new_model)\n    # Check if the hw weights extracted from the new model matches the original\n    # nima model.\n    _check_hw_weights_equal(orig_hw_weights, new_hw_weights)\n\n  return new_model, new_hw_weights\n"
  },
  {
    "path": "requirements.txt",
    "content": "tensorflow>=2.5.0rc0\nnumpy>=1.16.5\npyparser\npandas>=1.1.0\nmatplotlib>=3.3.0\nscipy>=1.4.1\nsetuptools>=41.0.0\nargparse>=1.4.0\npyasn1<0.5.0,>=0.4.6\nrequests<3,>=2.21.0\npyparsing\npytest>=4.6.9\ntensorflow-model-optimization>=0.2.1\nnetworkx>=2.1\n# prompt_toolkit is required by IPython.\n# IPython is required by keras-tuner.\n# Later prompt_toolkit version requires Python 3.6.2,\n# which is not supported. cl/380856863\nprompt_toolkit<=3.0.18\nkeras-tuner==1.0.3\nscikit-learn>=0.23.1\ntqdm>=4.48.0\n"
  },
  {
    "path": "setup.cfg",
    "content": "[metadata]\nname = qkeras\nversion = 0.9.0\nauthor = Google\nauthor_email = qkeras-team@google.com\ndescription = A quantization extension to Keras that provides drop-in layer replacements\nlong_description = file: README.md\nlong_description_content_type = text/markdown\nurl = https://github.com/google/qkeras\nclassifiers =\n    Programming Language :: Python :: 3\n    License :: OSI Approved :: Apache Software License\n    Operating System :: OS Independent\n\n[options]\npackages = find:\npython_requires = >=3.7\n\n[options.packages.find]\nwhere = qkeras\n\n[aliases]\ntest=pytest"
  },
  {
    "path": "setup.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Setup script for qkeras.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport io\nimport setuptools\n\nwith io.open(\"README.md\", \"r\", encoding=\"utf8\") as fh:\n  long_description = fh.read()\n\nsetuptools.setup(\n    name=\"QKeras\",\n    version=\"0.9.0\",\n    author=\"QKeras Team\",\n    author_email=\"qkeras-team@google.com\",\n    maintainer=\"Shan Li\",\n    maintainer_email=\"lishanok@google.com\",\n    packages=setuptools.find_packages(),\n    scripts=[],\n    url=\"\",\n    license=\"Apache v.2.0\",\n    description=\"Quantization package for Keras\",\n    long_description=long_description,\n    install_requires=[\n        \"numpy>=1.16.0\",\n        \"scipy>=1.4.1\",\n        \"pyparser\",\n        \"setuptools>=41.0.0\",\n        \"tensorflow-model-optimization>=0.2.1\",\n        \"networkx>=2.1\",\n        \"keras-tuner>=1.0.1\",\n        \"scikit-learn>=0.23.1\",\n        \"tqdm>=4.48.0\"\n    ],\n    setup_requires=[\n        \"pytest-runner\",\n    ],\n    tests_require=[\n        \"pytest\",\n    ],\n)\n"
  },
  {
    "path": "tests/automatic_conversion_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport pytest\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import *\n\nfrom qkeras import *\nfrom qkeras.utils import model_quantize\n\n\ndef create_network():\n  xi = Input((28,28,1))\n  x = Conv2D(32, (3, 3))(xi)\n  x = Activation(\"relu\", name='relu_act')(x)\n  x = Conv2D(32, (3, 3), activation=\"relu\")(x)\n  x = Activation(\"softmax\")(x)\n  x = QConv2D(32, (3, 3), activation=\"quantized_relu(4)\")(x)\n  return Model(inputs=xi, outputs=x)\n\ndef create_network_with_bn():\n  xi = Input((28,28,1))\n  x = Conv2D(32, (3, 3))(xi)\n  x = BatchNormalization(axis=-1)(x)\n  x = Activation(\"relu\", name='relu_act')(x)\n  x = Conv2D(32, (3, 3), activation=\"relu\")(x)\n  x = Activation(\"softmax\")(x)\n  x = DepthwiseConv2D((3, 3))(x)\n  x = BatchNormalization(axis=-1)(x)\n  return Model(inputs=xi, outputs=x)\n\ndef create_network_sequential():\n  model = Sequential([\n    Conv2D(32, (3, 3), input_shape=(28,28,1)),\n    Activation('relu'),\n    Conv2D(32, (3, 3), activation=\"relu\"),\n    Activation('softmax'),\n    QConv2D(32, (3, 3), activation=\"quantized_relu(4)\")\n  ])\n  return model\n\ndef test_linear_activation():\n  m = create_network()\n\n  assert m.layers[1].activation.__name__ == \"linear\", \"test failed\"\n\n\ndef test_linear_activation_conversion():\n  m = create_network()\n\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\",\n          \"activation_quantizer\": \"binary\"\n      }\n  }\n  qq = model_quantize(m, d, 4)\n\n  assert str(qq.layers[1].activation) == \"binary()\"\n\n\ndef test_no_activation_conversion_to_quantized():\n  m = create_network()\n  d = {\"QConv2D\": {\"kernel_quantizer\": \"binary\", \"bias_quantizer\": \"binary\"}}\n  qq = model_quantize(m, d, 4)\n  assert qq.layers[2].__class__.__name__ == \"Activation\"\n  assert qq.layers[4].__class__.__name__ == \"Activation\"\n\n\ndef test_automatic_conversion_from_relu_to_qr():\n  m = create_network()\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      }}\n  qq = model_quantize(m, d, 4)\n  assert str(qq.layers[3].activation) == \"quantized_relu(4,0)\"\n\n\ndef test_conversion_from_relu_activation_to_qr_qactivation():\n  m = create_network()\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QActivation\": {\n          \"relu\": \"ternary\"\n      }\n  }\n  qq = model_quantize(m, d, 4)\n  assert qq.layers[2].__class__.__name__ == \"QActivation\"\n  assert str(qq.layers[2].quantizer) == \"ternary()\"\n  assert qq.layers[4].__class__.__name__ == \"Activation\"\n\n\ndef test_conversion_from_relu_activation_to_qadaptiveactivation():\n  m = create_network()\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QAdaptiveActivation\": {\n          \"relu\": \"quantized_relu(8)\"\n      }\n  }\n  qq = model_quantize(m, d, 4)\n  assert qq.layers[2].__class__.__name__ == \"QAdaptiveActivation\"\n  assert str(qq.layers[2].quantizer).startswith(\"quantized_relu(8,\")\n  assert qq.layers[4].__class__.__name__ == \"Activation\"\n\n\ndef test_conversion_qadaptiveactivation_with_preference():\n  m = create_network()\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"relu_act\": {\n          \"relu\": \"quantized_relu(8)\"\n      }\n  }\n\n  # Test with QActivation preference\n  qq1 = model_quantize(m, d, 4, prefer_qadaptiveactivation=False)\n  assert qq1.layers[2].__class__.__name__ == \"QActivation\"\n  assert str(qq1.layers[2].quantizer).startswith(\"quantized_relu(8,\")\n  assert qq1.layers[4].__class__.__name__ == \"Activation\"\n\n  # Test with QAdaptiveActivation preference\n  qq2 = model_quantize(m, d, 4, prefer_qadaptiveactivation=True)\n  assert qq2.layers[2].__class__.__name__ == \"QAdaptiveActivation\"\n  assert str(qq2.layers[2].quantizer).startswith(\"quantized_relu(8,\")\n  assert qq2.layers[4].__class__.__name__ == \"Activation\"\n\n\ndef test_sequential_model_conversion():\n  m = create_network_sequential()\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      }}\n  qq = model_quantize(m, d, 4)\n  assert str(qq.layers[2].activation) == \"quantized_relu(4,0)\"\n\n\ndef test_folded_layer_conversion():\n  # create a sequential model with conv2d layer and activation layers\n  m1 = create_network()\n\n  # create a sequantial model with conv2d layer followed by bn layer\n  m2 = create_network_with_bn()\n\n  # quantization config\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QDepthwiseConv2D\": {\n          \"depthwise_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QConv2DBatchnorm\": {\n          \"kernel_quantizer\": \"ternary\",\n          \"bias_quantizer\": \"ternary\",\n      },\n      \"QDepthwiseConv2DBatchnorm\": {\n          \"depthwise_quantizer\": \"ternary\",\n          \"bias_quantizer\": \"ternary\",\n      },\n      \"relu_act\": {\n          \"relu\": \"quantized_relu(8)\"\n      }\n  }\n\n  # test when model has no layer to fold\n  # desired behavior: un-folded layers\n  qq1 = model_quantize(m1, d, 4, enable_bn_folding=True)\n  assert qq1.layers[1].__class__.__name__ == \"QConv2D\"\n  assert str(qq1.layers[1].quantizers[0]).startswith(\"binary\")\n\n  # test when the 1st conv2d layers needs to fold but the 2nd conv2d layer\n  # does not (not followed by bn layer)\n  # desired behavior: 1st conv2d is folded, 2nd conv2d unfolded\n  # also test the depthwiseconv2d layer should fold\n  qq2 = model_quantize(m2, d, 4, enable_bn_folding=True)\n  assert qq2.layers[1].__class__.__name__ == \"QConv2DBatchnorm\"\n  assert str(qq2.layers[1].quantizers[0]).startswith(\"ternary\")\n  assert qq2.layers[3].__class__.__name__ == \"QConv2D\"\n  assert str(qq2.layers[3].quantizers[0]).startswith(\"binary\")\n  assert qq2.layers[5].__class__.__name__ == \"QDepthwiseConv2DBatchnorm\"\n  assert str(qq2.layers[5].quantizers[0]).startswith(\"ternary\")\n\n  # test when there are layers to fold but folding is disabled\n  # desired behavior: all conv2d/depthwise2d layers are not folded\n  qq3 = model_quantize(m2, d, 4, enable_bn_folding=False)\n  assert qq3.layers[1].__class__.__name__ == \"QConv2D\"\n  assert str(qq3.layers[1].quantizers[0]).startswith(\"binary\")\n  assert qq3.layers[2].__class__.__name__ == \"BatchNormalization\"\n  assert str(qq3.layers[3].quantizer).startswith(\"quantized_relu\")\n  assert qq3.layers[6].__class__.__name__ == \"QDepthwiseConv2D\"\n  assert str(qq3.layers[6].quantizers[0]).startswith(\"binary\")\n\n  # test when QConv2DBatchnorm quantizer, e.g., is not given in config\n  # desired behavior: quantizers for QConv2DBatchnorm layer fall back to QConv2D\n  #   quantizers\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QDepthwiseConv2D\": {\n          \"depthwise_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"relu_act\": {\n          \"relu\": \"quantized_relu(8)\"\n      }\n  }\n  qq4 = model_quantize(m2, d, 4, enable_bn_folding=True)\n  assert qq4.layers[1].__class__.__name__ == \"QConv2DBatchnorm\"\n  assert str(qq4.layers[1].quantizers[0]).startswith(\"binary\")\n  assert qq4.layers[3].__class__.__name__ == \"QConv2D\"\n  assert str(qq4.layers[3].quantizers[0]).startswith(\"binary\")\n  assert qq4.layers[5].__class__.__name__ == \"QDepthwiseConv2DBatchnorm\"\n  assert str(qq4.layers[5].quantizers[0]).startswith(\"binary\")\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/autoqkeras_test.py",
    "content": "# ==============================================================================\n# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\nimport tempfile\nimport numpy as np\nimport pytest\nfrom sklearn.datasets import load_iris\nfrom sklearn.preprocessing import MinMaxScaler\nimport tensorflow.compat.v2 as tf\ntf.enable_v2_behavior()\n\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import BatchNormalization\nfrom tensorflow.keras.layers import Dense\nfrom tensorflow.keras.layers import Dropout\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras.utils import to_categorical\n\nfrom qkeras.autoqkeras import AutoQKerasScheduler\n\n\ndef dense_model():\n  \"\"\"Creates test dense model.\"\"\"\n\n  x = x_in = Input((4,), name=\"input\")\n  x = Dense(20, name=\"dense_0\")(x)\n  x = BatchNormalization(name=\"bn0\")(x)\n  x = Activation(\"relu\", name=\"relu_0\")(x)\n  x = Dense(40, name=\"dense_1\")(x)\n  x = BatchNormalization(name=\"bn1\")(x)\n  x = Activation(\"relu\", name=\"relu_1\")(x)\n  x = Dense(20, name=\"dense_2\")(x)\n  x = BatchNormalization(name=\"bn2\")(x)\n  x = Activation(\"relu\", name=\"relu_2\")(x)\n  x = Dense(3, name=\"dense\")(x)\n  x = Activation(\"softmax\", name=\"softmax\")(x)\n\n  model = Model(inputs=x_in, outputs=x)\n  return model\n\n\ndef test_autoqkeras():\n  \"\"\"Tests AutoQKeras scheduler.\"\"\"\n  np.random.seed(42)\n  tf.random.set_seed(42)\n\n  x_train, y_train = load_iris(return_X_y=True)\n\n  scaler = MinMaxScaler(feature_range=(-0.5, 0.5))\n  scaler.fit(x_train)\n  x_train = scaler.transform(x_train)\n\n  nb_classes = np.max(y_train) + 1\n  y_train = to_categorical(y_train, nb_classes)\n\n  quantization_config = {\n      \"kernel\": {\n          \"stochastic_ternary\": 2,\n          \"quantized_bits(8,0,1,alpha=1.0)\": 8\n      },\n      \"bias\": {\n          \"quantized_bits(4,0,1)\": 4\n      },\n      \"activation\": {\n          \"quantized_relu(4,1)\": 4\n      },\n      \"linear\": {\n          \"binary\": 1\n      }\n  }\n\n  goal = {\n      \"type\": \"energy\",\n      \"params\": {\n          \"delta_p\": 8.0,\n          \"delta_n\": 8.0,\n          \"rate\": 2.0,\n          \"stress\": 1.0,\n          \"process\": \"horowitz\",\n          \"parameters_on_memory\": [\"sram\", \"sram\"],\n          \"activations_on_memory\": [\"sram\", \"sram\"],\n          \"rd_wr_on_io\": [False, False],\n          \"min_sram_size\": [0, 0],\n          \"reference_internal\": \"int8\",\n          \"reference_accumulator\": \"int32\"\n      }\n  }\n\n  model = dense_model()\n  model.summary()\n  optimizer = Adam(lr=0.01)\n  model.compile(optimizer=optimizer, loss=\"categorical_crossentropy\",\n                metrics=[\"acc\"])\n\n  limit = {\n      \"dense_0\": [[\"stochastic_ternary\"], 8, 4],\n      \"dense\": [[\"quantized_bits(8,0,1,alpha=1.0)\"], 8, 4],\n      \"BatchNormalization\": [],\n      \"Activation\": [4]\n  }\n\n  run_config = {\n      \"output_dir\": tempfile.mkdtemp(),\n      \"goal\": goal,\n      \"quantization_config\": quantization_config,\n      \"learning_rate_optimizer\": False,\n      \"transfer_weights\": False,\n      \"mode\": \"random\",\n      \"seed\": 42,\n      \"limit\": limit,\n      \"tune_filters\": \"layer\",\n      \"tune_filters_exceptions\": \"^dense$\",\n      \"max_trials\": 1,\n\n      \"blocks\": [\n          \"^.*0$\",\n          \"^dense$\"\n      ],\n      \"schedule_block\": \"cost\"\n  }\n\n  autoqk = AutoQKerasScheduler(model, metrics=[\"acc\"], **run_config)\n  autoqk.fit(x_train, y_train, validation_split=0.1, batch_size=150, epochs=4)\n\n  qmodel = autoqk.get_best_model()\n\n  optimizer = Adam(lr=0.01)\n  qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\",\n                 metrics=[\"acc\"])\n  history = qmodel.fit(x_train, y_train, epochs=5, batch_size=150,\n                       validation_split=0.1)\n\n  quantized_acc = history.history[\"acc\"][-1]\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n\n"
  },
  {
    "path": "tests/bn_folding_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests layers from folded_layers.py.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nfrom numpy.testing import assert_allclose\nfrom numpy.testing import assert_equal\nfrom numpy.testing import assert_raises\nimport tempfile\nimport tensorflow as tf\nfrom tensorflow.keras import layers\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.backend import clear_session\nfrom tensorflow.keras.utils import to_categorical\nfrom tensorflow.keras import metrics\n\nfrom qkeras import QConv2DBatchnorm\nfrom qkeras import QConv2D\nfrom qkeras import QDense\nfrom qkeras import QActivation\nfrom qkeras import QDepthwiseConv2D\nfrom qkeras import QDepthwiseConv2DBatchnorm\nfrom qkeras import utils as qkeras_utils\nfrom qkeras import bn_folding_utils\n\ndef get_sgd_optimizer(learning_rate):\n  if hasattr(tf.keras.optimizers, \"legacy\"):\n    return tf.keras.optimizers.legacy.SGD(learning_rate)\n  else:\n    return tf.keras.optimizers.SGD(learning_rate)\n\n\ndef get_qconv2d_model(input_shape, kernel_size, kernel_quantizer=None):\n  num_class = 2\n\n  x = x_in = layers.Input(input_shape, name=\"input\")\n\n  x = QConv2D(\n      filters=2, kernel_size=kernel_size, strides=(4, 4),\n      kernel_initializer=\"ones\",\n      bias_initializer=\"zeros\", use_bias=False,\n      kernel_quantizer=kernel_quantizer, bias_quantizer=None,\n      name=\"conv2d\")(x)\n\n  x = layers.BatchNormalization(\n      axis=-1,\n      momentum=0.99,\n      epsilon=0.001,\n      center=True,\n      scale=True,\n      beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\",\n      moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\",\n      beta_regularizer=None,\n      gamma_regularizer=None,\n      beta_constraint=None,\n      gamma_constraint=None,\n      renorm=False,\n      renorm_clipping=None,\n      renorm_momentum=0.99,\n      fused=None,\n      trainable=True,\n      virtual_batch_size=None,\n      adjustment=None,\n      name=\"bn\")(\n          x)\n  x = layers.Flatten(name=\"flatten\")(x)\n  x = layers.Dense(num_class, use_bias=False, kernel_initializer=\"ones\",\n                   name=\"dense\")(x)\n  x = layers.Activation(\"softmax\", name=\"softmax\")(x)\n  model = Model(inputs=[x_in], outputs=[x])\n  return model\n\n\ndef get_qconv2d_batchnorm_model(input_shape, kernel_size, folding_mode,\n                                kernel_quantizer=None):\n  num_class = 2\n\n  x = x_in = layers.Input(input_shape, name=\"input\")\n  x = QConv2DBatchnorm(\n      filters=2, kernel_size=kernel_size, strides=(4, 4),\n      kernel_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n      kernel_quantizer=kernel_quantizer, beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\", folding_mode=folding_mode,\n      name=\"foldconv2d\")(x)\n\n  x = layers.Flatten(name=\"flatten\")(x)\n  x = layers.Dense(num_class, use_bias=False, kernel_initializer=\"ones\",\n                   name=\"dense\")(x)\n  x = layers.Activation(\"softmax\", name=\"softmax\")(x)\n  model = Model(inputs=[x_in], outputs=[x])\n  return model\n\n\ndef get_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay):\n\n  x_shape = (2, 2, 1)\n  loss_fn = tf.keras.losses.MeanSquaredError()\n  optimizer = get_sgd_optimizer(learning_rate=1e-3)\n\n  # define a model with seperate conv2d and bn layers\n  x = x_in = layers.Input(x_shape, name=\"input\")\n  x = QConv2D(\n      filters=2, kernel_size=(2, 2), strides=(4, 4),\n      kernel_initializer=\"ones\",\n      bias_initializer=\"zeros\", use_bias=False,\n      kernel_quantizer=kernel_quantizer, bias_quantizer=None,\n      name=\"conv2d\")(x)\n  x = layers.BatchNormalization(\n      axis=-1,\n      momentum=0.99,\n      epsilon=0.001,\n      center=True,\n      scale=True,\n      beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\",\n      moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\",\n      beta_regularizer=None,\n      gamma_regularizer=None,\n      beta_constraint=None,\n      gamma_constraint=None,\n      renorm=False,\n      renorm_clipping=None,\n      renorm_momentum=0.99,\n      fused=None,\n      trainable=True,\n      virtual_batch_size=None,\n      adjustment=None,\n      name=\"bn\")(x)\n  unfold_model = Model(inputs=[x_in], outputs=[x])\n  unfold_model.compile(loss=loss_fn, optimizer=optimizer, metrics=\"acc\")\n\n  x = x_in = layers.Input(x_shape, name=\"input\")\n  x = QConv2DBatchnorm(\n      filters=2, kernel_size=(2, 2), strides=(4, 4),\n      kernel_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n      kernel_quantizer=kernel_quantizer, beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\", folding_mode=folding_mode,\n      ema_freeze_delay=ema_freeze_delay,\n      name=\"foldconv2d\")(x)\n  fold_model = Model(inputs=[x_in], outputs=[x])\n  fold_model.compile(loss=loss_fn, optimizer=optimizer, metrics=\"acc\")\n\n  return (unfold_model, fold_model)\n\n\ndef get_debug_model(model):\n  layer_output_list = []\n  for layer in model.layers:\n    if layer.__class__.__name__ not in [\"Flatten\", \"InputLayer\"]:\n      layer_output_list.append(layer.output)\n\n  debug_model = Model(inputs=model.inputs, outputs=layer_output_list)\n  return debug_model\n\n\ndef generate_dataset(train_size=10,\n                     batch_size=5,\n                     input_shape=(3, 3, 1),\n                     num_class=2,\n                     output_shape=None):\n  \"\"\"create tf.data.Dataset with shape: (N,) + input_shape.\"\"\"\n\n  x_train = np.random.randint(\n      4, size=(train_size, input_shape[0], input_shape[1], input_shape[2]))\n  x_train = np.random.rand(\n      train_size, input_shape[0], input_shape[1], input_shape[2])\n\n  if output_shape:\n    y_train = np.random.random_sample((train_size,) + output_shape)\n  else:\n    y_train = np.random.randint(num_class, size=train_size)\n    y_train = to_categorical(y_train, num_class)\n\n  train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n  train_ds = train_ds.batch(batch_size)\n  return train_ds\n\n\ndef run_training(model, epochs, loss_fn, loss_metric, optimizer,\n                 train_ds, do_print=False):\n\n  # Iterate over epochs.\n  for epoch in range(epochs):\n    if do_print:\n      print(\"- epoch {} -\".format(epoch))\n\n    # Iterate over the batches of the dataset.\n    for step, (x_batch_train, y_batch_train) in enumerate(train_ds):\n      if do_print:\n        print(\"\\n   - step {} -\".format(step))\n      with tf.GradientTape() as tape:\n        predictions = model(x_batch_train, training=True)\n\n        if epoch == epochs - 1:\n          if do_print:\n            print(\"y_pred:\", predictions)\n            print(\"y:\", y_batch_train)\n          output_predictions = predictions\n\n        # Compute loss\n        loss = loss_fn(y_batch_train, predictions)\n\n        grads = tape.gradient(loss, model.trainable_weights)\n        if do_print:\n          if epoch == epochs - 1:\n            # print(\"old trainable:\", model.trainable_weights)\n            print(\"grads:\", grads)\n        optimizer.apply_gradients(zip(grads, model.trainable_weights))\n\n        if do_print:\n          if epoch == epochs - 1:\n            # print(\"new trainable:\", model.trainable_weights)\n            print(\"loss:\", loss)\n        loss_metric(loss)\n        if do_print:\n          if epoch == epochs - 1:\n            print(\"mean loss = %.4f\" % (loss_metric.result()))\n\n  return output_predictions\n\n\ndef test_unfold_model():\n  \"\"\"Test if unfold_model works properly.\n\n  Convert a folded model to a normal model. The kernel/bias weight in\n  the normal model should be the same as the folded kernel/bias in the folded\n  model. Test if the function can convert both sequential and non-sequantial\n  models properly.\n  \"\"\"\n\n  x_shape = (2, 2, 1)\n  kernel_quantizer = \"quantized_bits(4, 0, 1)\"\n  folding_mode = \"batch_stats_folding\"\n  ema_freeze_delay = 10\n  kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]])\n  gamma = np.array([2., 1.])\n  beta = np.array([0., 1.])\n  moving_mean = np.array([1., 1.])\n  moving_variance = np.array([1., 2.])\n  iteration = np.array(-1)\n\n  def _get_sequantial_folded_model(x_shape):\n    x = x_in = layers.Input(x_shape, name=\"input\")\n    x = QConv2DBatchnorm(\n        filters=2, kernel_size=(2, 2), strides=(2, 2),\n        kernel_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n        kernel_quantizer=kernel_quantizer, beta_initializer=\"zeros\",\n        gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n        moving_variance_initializer=\"ones\", folding_mode=folding_mode,\n        ema_freeze_delay=ema_freeze_delay,\n        name=\"foldconv2d\")(x)\n    x = QDepthwiseConv2DBatchnorm(\n        kernel_size=(2, 2),\n        strides=(1, 1),\n        use_bias=False,\n        depthwise_quantizer=kernel_quantizer,\n        folding_mode=folding_mode,\n        ema_freeze_delay=ema_freeze_delay,\n        name=\"folddepthwiseconv2d\")(x)\n    model = Model(inputs=[x_in], outputs=[x])\n    model.layers[1].set_weights([\n        kernel, gamma, beta, iteration, moving_mean, moving_variance\n    ])\n\n    return model\n\n  def _get_nonseq_folded_model(x_shape):\n    x = x_in = layers.Input(x_shape, name=\"input\")\n    x1 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1),\n                       name=\"conv2d_1\")(x)\n    x2 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1),\n                       name=\"conv2d_2\")(x)\n    x = layers.Maximum()([x1, x2])\n    x = QConv2DBatchnorm(\n        filters=2, kernel_size=(2, 2), strides=(4, 4),\n        kernel_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n        kernel_quantizer=kernel_quantizer, beta_initializer=\"zeros\",\n        gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n        moving_variance_initializer=\"ones\", folding_mode=folding_mode,\n        ema_freeze_delay=ema_freeze_delay,\n        name=\"foldconv2d\")(x)\n    x = layers.Flatten(name=\"flatten\")(x)\n    x = layers.Dense(2, use_bias=False, kernel_initializer=\"ones\",\n                     name=\"dense\")(x)\n    model = Model(inputs=[x_in], outputs=[x])\n    model.layers[4].set_weights([\n        kernel, gamma, beta, iteration, moving_mean, moving_variance\n    ])\n    return model\n\n  seq_model = _get_sequantial_folded_model((4, 4, 1))\n  nonseq_model = _get_nonseq_folded_model(x_shape)\n\n  for model in [nonseq_model, seq_model]:\n\n    # preparing data for testing if model prediction matches\n\n    output_shape = model.output_shape[1:]\n    input_shape = model.input_shape[1:]\n    train_ds = generate_dataset(train_size=10, batch_size=5,\n                                input_shape=input_shape,\n                                output_shape=output_shape)\n\n    # convert model with folded layers to a model with coresspoinding QConv2D\n    # or QDepthwiseConv2D layers\n    cvt_model = bn_folding_utils.unfold_model(model)\n\n    for layer_type in [\"QConv2DBatchnorm\", \"QDepthwiseConv2DBatchnorm\"]:\n      weight1 = None\n      weight2 = None\n      for layer in model.layers:\n        if layer.__class__.__name__ == layer_type:\n          weight1 = layer.get_folded_weights()\n          break\n\n      for layer in cvt_model.layers:\n        if layer.__class__.__name__ == layer_type[:-9]:\n          weight2 = layer.get_weights()\n          break\n\n      # test if the corresponding layers have identical weights\n      if weight1 and weight2:\n        assert_equal(weight1[0], weight2[0])\n        assert_equal(weight1[1], weight2[1])\n\n    # test if the predictions of the two models are identical\n    pred1 = model.predict(train_ds)\n    pred2 = cvt_model.predict(train_ds)\n    assert_equal(pred1, pred2)\n\n\ndef test_loading():\n  \"\"\"Test to load model using different approahches.\"\"\"\n\n  loss_fn = tf.keras.losses.MeanSquaredError()\n  loss_metric = metrics.Mean()\n  optimizer = get_sgd_optimizer(learning_rate=1e-3)\n  x_shape = (2, 2, 1)\n\n  custom_objects = {}\n  qkeras_utils._add_supported_quantized_objects(custom_objects)\n\n  train_ds = generate_dataset(train_size=1, batch_size=1,\n                              input_shape=x_shape, num_class=2)\n\n  model_fold = get_qconv2d_batchnorm_model(\n      input_shape=x_shape, kernel_size=(2, 2),\n      folding_mode=\"ema_stats_folding\")\n  model_fold.compile(loss=loss_fn, optimizer=optimizer, metrics=\"acc\")\n\n  run_training(model_fold, 10, loss_fn, loss_metric, optimizer, train_ds,\n               do_print=False)\n\n  # test load model from json to ensure saving/loading model architecture works\n  model_fold.use_legacy_config = True  # Ensures old Keras serialization\n  json_string = model_fold.to_json()\n  clear_session()\n  model_from_json = qkeras_utils.quantized_model_from_json(json_string)\n  model_from_json.use_legacy_config = True\n  assert json_string == model_from_json.to_json()\n\n  # test reload model from hdf5 files to ensure saving/loading works\n  _, fname = tempfile.mkstemp(\".h5\")\n  model_fold.save(fname)\n  model_loaded = qkeras_utils.load_qmodel(fname)\n  weight1 = model_fold.layers[1].get_folded_weights()\n  weight2 = model_loaded.layers[1].get_folded_weights()\n  assert_equal(np.array(weight1[0]), np.array(weight2[0]))\n  assert_equal(np.array(weight1[1]), np.array(weight2[1]))\n\n  # test convert a folded model to a normal model for zpm\n  # the kernel/bias weight in the normal model should be the same as the folded\n  # kernel/bias in the folded model\n  normal_model = bn_folding_utils.unfold_model(model_fold)\n  weight2 = normal_model.layers[1].get_weights()\n\n  assert_equal(weight1[0], weight2[0])\n  assert_equal(weight1[1], weight2[1])\n\n\ndef test_same_training_and_prediction():\n  \"\"\"test if fold/unfold layer has the same training and prediction output.\"\"\"\n\n  epochs = 5\n  loss_fn = tf.keras.losses.MeanSquaredError()\n  loss_metric = metrics.Mean()\n  optimizer = get_sgd_optimizer(learning_rate=1e-3)\n\n  x_shape = (2, 2, 1)\n  kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]])\n  gamma = np.array([2., 1.])\n  beta = np.array([0., 1.])\n  moving_mean = np.array([1., 1.])\n  moving_variance = np.array([1., 2.])\n  iteration = np.array(-1)\n\n  train_ds = generate_dataset(train_size=10, batch_size=10, input_shape=x_shape,\n                              num_class=2)\n\n  (unfold_model, fold_model_batch) = get_models_with_one_layer(\n      kernel_quantizer=None, folding_mode=\"batch_stats_folding\",\n      ema_freeze_delay=10)\n  (_, fold_model_ema) = get_models_with_one_layer(\n      kernel_quantizer=None, folding_mode=\"ema_stats_folding\",\n      ema_freeze_delay=10)\n\n  unfold_model.layers[1].set_weights([kernel])\n  unfold_model.layers[2].set_weights(\n      [gamma, beta, moving_mean, moving_variance])\n  fold_model_batch.layers[1].set_weights([\n      kernel, gamma, beta, iteration, moving_mean, moving_variance\n  ])\n  fold_model_ema.layers[1].set_weights([\n      kernel, gamma, beta, iteration, moving_mean, moving_variance\n  ])\n\n  # check if prediction is the same\n  y1 = unfold_model.predict(train_ds)\n  y2_batch = fold_model_batch.predict(train_ds)\n  y2_ema = fold_model_ema.predict(train_ds)\n  assert_allclose(y1, y2_batch, rtol=1e-4)\n  assert_allclose(y1, y2_ema, rtol=1e-4)\n\n  # check if training for a number of epochs, and before bn freeeze, models\n  # reached the same point\n  y1 = run_training(unfold_model, epochs, loss_fn, loss_metric, optimizer,\n                    train_ds, do_print=False)\n  y2_batch = run_training(fold_model_batch, epochs, loss_fn, loss_metric,\n                          optimizer, train_ds, do_print=False)\n  y2_ema = run_training(fold_model_ema, epochs, loss_fn, loss_metric, optimizer,\n                        train_ds, do_print=False)\n  assert_allclose(y1, y2_batch, rtol=1e-4)\n  assert_allclose(y1, y2_ema, rtol=1e-4)\n\n  # check if training for long enough (after bn freezes), unfold model and fold\n  # models should be different, but the two folding modes should be the same\n  epochs = 5\n  iteration = np.array(8)\n  (unfold_model, fold_model_batch) = get_models_with_one_layer(\n      kernel_quantizer=None, folding_mode=\"batch_stats_folding\",\n      ema_freeze_delay=10)\n  (_, fold_model_ema) = get_models_with_one_layer(\n      kernel_quantizer=None, folding_mode=\"ema_stats_folding\",\n      ema_freeze_delay=10)\n  unfold_model.layers[1].set_weights([kernel])\n  unfold_model.layers[2].set_weights(\n      [gamma, beta, moving_mean, moving_variance])\n  fold_model_batch.layers[1].set_weights([\n      kernel, gamma, beta, iteration, moving_mean, moving_variance\n  ])\n  fold_model_ema.layers[1].set_weights([\n      kernel, gamma, beta, iteration, moving_mean, moving_variance\n  ])\n  y1 = run_training(\n      unfold_model,\n      epochs,\n      loss_fn,\n      loss_metric,\n      optimizer,\n      train_ds,\n      do_print=False)\n  y2_batch = run_training(\n      fold_model_batch,\n      epochs,\n      loss_fn,\n      loss_metric,\n      optimizer,\n      train_ds,\n      do_print=False)\n  y2_ema = run_training(\n      fold_model_ema,\n      epochs,\n      loss_fn,\n      loss_metric,\n      optimizer,\n      train_ds,\n      do_print=False)\n  assert_raises(AssertionError, assert_allclose, y1, y2_batch, rtol=1e-4)\n  assert_allclose(y2_batch, y2_ema, rtol=1e-4)\n\n  # test QDepthwiseConv2DBatchnorm layers\n  def _get_models(x_shape, num_class, depthwise_quantizer, folding_mode,\n                  ema_freeze_delay):\n    x = x_in = layers.Input(x_shape, name=\"input\")\n    x = QDepthwiseConv2DBatchnorm(\n        kernel_size=(2, 2), strides=(2, 2), depth_multiplier=1,\n        depthwise_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n        depthwise_quantizer=depthwise_quantizer, beta_initializer=\"zeros\",\n        gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n        moving_variance_initializer=\"ones\", folding_mode=folding_mode,\n        ema_freeze_delay=ema_freeze_delay,\n        name=\"fold_depthwiseconv2d\")(x)\n    x = layers.Flatten(name=\"flatten\")(x)\n    x = layers.Dense(num_class, use_bias=False, kernel_initializer=\"ones\",\n                     name=\"dense\")(x)\n    x = layers.Activation(\"softmax\", name=\"softmax\")(x)\n    fold_model = Model(inputs=[x_in], outputs=[x])\n\n    x = x_in = layers.Input(x_shape, name=\"input\")\n    x = QDepthwiseConv2D(\n        kernel_size=(2, 2), strides=(2, 2), depth_multiplier=1,\n        depthwise_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n        depthwise_quantizer=depthwise_quantizer,\n        name=\"depthwiseconv2d\")(x)\n    x = layers.BatchNormalization(\n        beta_initializer=\"zeros\",\n        gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n        moving_variance_initializer=\"ones\",\n        name=\"bn\")(x)\n    x = layers.Flatten(name=\"flatten\")(x)\n    x = layers.Dense(num_class, use_bias=False, kernel_initializer=\"ones\",\n                     name=\"dense\")(x)\n    x = layers.Activation(\"softmax\", name=\"softmax\")(x)\n    model = Model(inputs=[x_in], outputs=[x])\n\n    return (model, fold_model)\n\n  input_shape = (4, 4, 1)\n  num_class = 2\n  depthwise_quantizer = None\n  folding_mode = \"ema_stats_folding\"\n  ema_freeze_delay = 10\n\n  # weights\n  depthwise_kernel = np.array([[[[1.]], [[0.]]], [[[0.]], [[1.]]]])\n  gamma = np.array([2])\n  beta = np.array([0])\n  moving_mean = np.array([4.])\n  moving_variance = np.array([2.])\n  iteration = np.array(2)\n  folded_depthwise_kernel_quantized = np.array(\n      [[[[1.4138602]], [[0.]]], [[[0.]], [[1.4138602]]]])\n  folded_bias_quantized = np.array([-5.655441])\n  dense_weight = np.array([[1., 0], [0, 0], [0, 0], [0, 0]])\n\n  # generate dataset\n  train_ds = generate_dataset(train_size=3, batch_size=3,\n                              input_shape=input_shape, num_class=2)\n\n  # define models, one with folded layer and one without\n  (model, fold_model) = _get_models(\n      input_shape, num_class=num_class, depthwise_quantizer=depthwise_quantizer,\n      folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay)\n\n  # set weights\n  fold_model.layers[1].set_weights([\n      depthwise_kernel, gamma, beta, iteration, moving_mean, moving_variance])\n  fold_model.layers[3].set_weights([dense_weight])\n\n  model.layers[1].set_weights([depthwise_kernel])\n  model.layers[2].set_weights([gamma, beta, moving_mean, moving_variance])\n  model.layers[4].set_weights([dense_weight])\n\n  # perform training\n  epochs = 5\n  loss_fn = tf.keras.losses.MeanSquaredError()\n  loss_metric = metrics.Mean()\n  optimizer = get_sgd_optimizer(learning_rate=1e-3)\n\n  pred1 = run_training(\n      model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False)\n  pred2 = run_training(\n      fold_model, epochs, loss_fn, loss_metric, optimizer, train_ds,\n      do_print=False)\n\n  # before bn freezes, the two models should reach the same point\n  assert_allclose(pred1, pred2, rtol=1e-4)\n\n  # after bn freezes, the two models will not reach the same\n  iteration = np.array(12)\n  epochs = 5\n  ema_freeze_delay = 10\n  (model, fold_model) = _get_models(\n      input_shape, num_class=num_class, depthwise_quantizer=depthwise_quantizer,\n      folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay)\n  fold_model.layers[1].set_weights([\n      depthwise_kernel, gamma, beta, iteration, moving_mean, moving_variance])\n  fold_model.layers[3].set_weights([dense_weight])\n  model.layers[1].set_weights([depthwise_kernel])\n  model.layers[2].set_weights([gamma, beta, moving_mean, moving_variance])\n  model.layers[4].set_weights([dense_weight])\n  pred1 = run_training(\n      model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False)\n  pred2 = run_training(\n      fold_model, epochs, loss_fn, loss_metric, optimizer, train_ds,\n      do_print=False)\n\n  assert_raises(AssertionError, assert_allclose, pred1, pred2, rtol=1e-4)\n\n\ndef test_populate_bias_quantizer_from_accumulator():\n  \"\"\"Test populate_bias_quantizer_from_accumulator function.\n\n  Define a qkeras model with a QConv2DBatchnorm layer. Set bias quantizer in the\n  layer as None. Call populate_bias_quantizer_from_accumulator function\n  to automatically generate bias quantizer type from the MAC accumulator type.\n  Set the bias quantizer accordingly in the model.\n\n  Call populate_bias_quantizer_from_accumulator again in this model. This time\n  since bias quantizer is already set, populate_bias_quantizer_from_accumulator\n  function should not change the bias quantizer.\n  \"\"\"\n\n  x_shape = (2, 2, 1)\n\n  # get a qkeras model with QConv2DBatchnorm layer. Set bias quantizer in the\n  # layer as None.\n  x = x_in = layers.Input(x_shape, name=\"input\")\n  x1 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False,\n               kernel_quantizer=\"quantized_bits(4, 0, 1)\", name=\"conv2d_1\")(x)\n  x2 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False,\n               kernel_quantizer=\"quantized_bits(4, 0, 1)\", name=\"conv2d_2\")(x)\n  x = layers.Maximum()([x1, x2])\n  x = QActivation(\"quantized_relu(4, 1)\")(x)\n  x = QConv2DBatchnorm(\n      filters=2, kernel_size=(2, 2), strides=(4, 4),\n      kernel_initializer=\"ones\", bias_initializer=\"zeros\", use_bias=False,\n      kernel_quantizer=\"quantized_bits(4, 0, 1)\", bias_quantizer=None,\n      beta_initializer=\"zeros\",\n      gamma_initializer=\"ones\", moving_mean_initializer=\"zeros\",\n      moving_variance_initializer=\"ones\", folding_mode=\"batch_stats_folding\",\n      ema_freeze_delay=10,\n      name=\"foldconv2d\")(x)\n  x1 = x\n  x2 = layers.Flatten(name=\"flatten\")(x)\n  x2 = QDense(2, use_bias=False, kernel_initializer=\"ones\",\n              kernel_quantizer=\"quantized_bits(6, 2, 1)\", name=\"dense\")(x2)\n  model = Model(inputs=[x_in], outputs=[x1, x2])\n  assert_equal(model.layers[5].get_quantizers()[1], None)\n\n  # Call populate_bias_quantizer_from_accumulator function\n  # to automatically generate bias quantizer from the MAC accumulator type.\n  _ = bn_folding_utils.populate_bias_quantizer_from_accumulator(\n      model, [\"quantized_bits(8, 0, 1)\"])\n  q = model.layers[5].get_quantizers()[1]\n  assert_equal(q.__str__(), \"quantized_bits(10,3,1)\")\n\n  # Call populate_bias_quantizer_from_accumulator function again\n  # bias quantizer should not change\n  _ = bn_folding_utils.populate_bias_quantizer_from_accumulator(\n      model, [\"quantized_bits(8, 0, 1)\"])\n  q = model.layers[5].get_quantizers()[1]\n  assert_equal(q.__str__(), \"quantized_bits(10,3,1)\")\n"
  },
  {
    "path": "tests/callbacks_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for callbacks.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport pytest\nfrom numpy.testing import assert_equal\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import *\nimport tensorflow.compat.v2 as tf\n\nfrom qkeras import *\nfrom qkeras.utils import get_model_sparsity\nfrom qkeras.utils import model_quantize\nfrom qkeras.callbacks import QNoiseScheduler\n\n\ndef qconv_model():\n  x = x_in = tf.keras.layers.Input((4, 4, 1), name=\"input\")\n  x = QConv2D(\n      1,\n      2,\n      1,\n      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name=\"qconv2d_1\")(\n          x)\n  x = QActivation(\"quantized_relu(4)\", name=\"QA_1\")(x)\n  model = keras.Model(inputs=[x_in], outputs=[x])\n  return model\n\n\ndef test_QNoiseScheduler():\n  model = qconv_model()\n  model.compile(optimizer=\"sgd\", loss=tf.keras.losses.MeanSquaredError())\n  num_data = 5\n  x_train = np.random.rand(num_data, 4, 4, 1)\n  y_train = np.random.rand(num_data, 1)\n\n  #########################\n  # Test \"step\" freq_type #\n  #########################\n\n  # The number of batch passes the finish of 4.\n  gradual_qnoise_callback_0 = QNoiseScheduler(\n      start=2, finish=4, freq_type=\"step\", exponent=3.0)\n\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=1,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_0,\n      ],\n  )\n\n  # QConv2D has a kernel_quantizer and a bias_quantizer, and QActivation has a\n  # quantizer.\n  num_quantizers_with_qnoise_factor = 0\n  for quantizer in gradual_qnoise_callback_0.quantizers:\n    if hasattr(quantizer, \"qnoise_factor\"):\n      num_quantizers_with_qnoise_factor += 1\n  assert_equal(num_quantizers_with_qnoise_factor, 3)  # Test \"step\"\n\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_0.quantizers\n  ]\n  assert_equal(qnoise_factor, np.ones_like(qnoise_factor))\n\n\n  # The number of batch does not pass the finish of 10. Exponent 3.0\n  gradual_qnoise_callback_1 = QNoiseScheduler(\n      start=2, finish=10, freq_type=\"step\", exponent=3.0)\n\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=1,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_1,\n      ],\n  )\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_1.quantizers\n  ]\n  val = 1 - np.power((10.0 - 4.0) / (10.0 - 2.0), 3)\n  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))\n\n  # The number of batch does not pass the finish of 10. Exponent 2.0\n  gradual_qnoise_callback_2 = QNoiseScheduler(\n      start=2, finish=10, freq_type=\"step\", exponent=2.0)\n\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=1,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_2,\n      ],\n  )\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_2.quantizers\n  ]\n  val = 1 - np.power((10.0 - 4.0) / (10.0 - 2.0), 2)\n  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))\n\n  # The number of batch does not pass the start of 6.\n  gradual_qnoise_callback_3 = QNoiseScheduler(\n      start=6, finish=10, freq_type=\"step\", exponent=3.0)\n\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=1,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_3,\n      ],\n  )\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_3.quantizers\n  ]\n  assert_equal(qnoise_factor, np.zeros_like(qnoise_factor))\n\n\n  # The number of training iterations passes the number of batches of an epoch.\n  gradual_qnoise_callback_4 = QNoiseScheduler(\n      start=6, finish=20, freq_type=\"step\", exponent=3.0)\n  epochs = 2\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=epochs,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_4,\n      ],\n  )\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_4.quantizers\n  ]\n  val = 1 - np.power((20.0 - (epochs*num_data - 1)) / (20.0 - 6.0), 3)\n  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))\n\n  # The number of training iterations passes the number of batches of an epoch\n  # with update_freq = 2.\n  gradual_qnoise_callback_5 = QNoiseScheduler(\n      start=0,\n      finish=20,\n      freq_type=\"step\",\n      update_freq=2,\n      exponent=3.0)\n  epochs = 2\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=epochs,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_5,\n      ],\n  )\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_5.quantizers\n  ]\n  # It updates when the number of training iterations modulo update_freq is 0.\n  val = 1 - np.power(\n      (20.0 - epochs * ((epochs * num_data - 1) // epochs)) / (20.0 - 0.0), 3)\n  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))\n\n\n  ##########################\n  # Test \"epoch\" freq_type #\n  ##########################\n  # The number of epoch does not pass the finish of 5.\n  gradual_qnoise_callback_6 = QNoiseScheduler(\n      start=1, finish=5, freq_type=\"epoch\", exponent=3.0)\n\n  model.fit(\n      x_train,\n      y_train,\n      batch_size=1,\n      epochs=3,\n      verbose=0,\n      callbacks=[\n          gradual_qnoise_callback_6,\n      ],\n  )\n  qnoise_factor = [\n      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_6.quantizers\n  ]\n  val = 1 - np.power((5.0 - 2.0) / (5.0 - 1.0), 3)\n  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))\n  assert_equal(len(gradual_qnoise_callback_6.quantizers), 3)  # Test \"epoch\"\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/codebook_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test activation from qlayers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nfrom numpy.testing import assert_allclose\n\nimport pytest\n\nfrom qkeras import quantized_bits\nfrom qkeras.codebook import weight_compression\n\n\n@pytest.mark.parametrize(\n  'bits, axis, quantizer, weights, expected_result',\n  [\n    (\n      3, 3, quantized_bits(4, 0, 1, alpha='auto_po2'),\n      np.array([\n       [[ 0.14170583, -0.34360626,  0.29548156],\n        [ 0.6517242,  0.06870092, -0.21646781],\n        [ 0.12486842, -0.05406165, -0.23690471]],\n\n       [[-0.07540564,  0.2123149 ,  0.2382695 ],\n        [ 0.78434753,  0.36171672, -0.43612534],\n        [ 0.3685556,  0.41328752, -0.48990643]],\n\n      [[-0.04438099,  0.0590747 , -0.0644061 ],\n        [ 0.15280165,  0.40714318, -0.04622072],\n        [ 0.21560416, -0.22131851, -0.5365659 ]]], dtype=np.float32),\n      np.array([\n       [[ 0.125 , -0.375 ,  0.25  ],\n        [ 0.75  ,  0.125 , -0.25  ],\n        [ 0.125 ,  0.0   , -0.25  ]],\n\n       [[ 0.0   ,  0.25  ,  0.25  ],\n        [ 0.75  ,  0.375 , -0.375 ],\n        [ 0.375 ,  0.375 , -0.5   ]],\n\n       [[ 0.0   ,  0.0   ,  0.0   ],\n        [ 0.125 ,  0.375 ,  0.0   ],\n        [ 0.25  , -0.25  , -0.5   ]]], dtype=np.float32)\n    )\n  ]\n)\ndef test_codebook_weights(bits, axis, quantizer, weights, expected_result):\n  np.random.seed(22)\n  weights = weights.reshape(weights.shape + (1,))\n  expected_result = expected_result.reshape(expected_result.shape + (1,))\n  index_table, codebook_table = weight_compression(weights,\n                                                   bits,\n                                                   axis,\n                                                   quantizer)\n  new_weights = np.zeros(weights.shape)\n  for i in range(weights.shape[axis]):\n    new_weights[:, :, :, i] = codebook_table[i][index_table[:, :, :, i]]\n\n  assert_allclose(new_weights, expected_result, rtol=1e-4)\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/leakyrelu_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test activation from qlayers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nfrom numpy.testing import assert_allclose\n\nimport pytest\nfrom tensorflow.keras import backend as K\n\nfrom qkeras import quantized_relu\nfrom qkeras import quantized_relu_po2\n\n\n@pytest.mark.parametrize(\n    'bits, integer, use_sigmoid, negative_slope, test_values, expected_values',\n    [\n        (6, 2, 0, 0.25,\n         np.array(\n             [[-3.0, -2.0, -1.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875,\n               0.054688, 6.0]],\n             dtype=K.floatx()),\n         np.array([[-0.75, -0.5, -0.25, 0.0, 2.5, 3.375, 1.5, 1.0, 0.0, 3.875]],\n             dtype=K.floatx()),\n        ),\n        (6, 2, 1, 0.125,\n         np.array([[\n             0.458069, 0.573227, 0.194336, 1.539047, 0.045883, 4.009995,\n             3.962494, 3.937500, 0.363266, 0.875198, 0.710938, 4.000000,\n             7.000000, 3.937500, 3.937592, 0.199326, 0.458008, 0.625977,\n             0.544922, 1.046875, 0.586899, 3.367188, 3.804688, 0.312500,\n             0.062500, 0.562500, 0.375000, 3.367188, 1.046875, 2.796875,\n             0.054688, 1.562500, 2.562500\n         ]], dtype=K.floatx()),\n         np.array([[\n             0.5  , 0.5  , 0.25 , 1.5  , 0.   , 3.875, 3.875, 3.875, 0.25 ,\n             1.   , 0.75 , 3.875, 3.875, 3.875, 3.875, 0.25 , 0.5  , 0.75 ,\n             0.5  , 1.   , 0.5  , 3.25 , 3.75 , 0.25 , 0.   , 0.5  , 0.5  ,\n             3.25 , 1.   , 2.75 , 0.   , 1.5  , 2.5\n         ]], dtype=K.floatx())),\n        (6, 2, 1, 0.125,\n         np.array([[\n             -0.458069, -0.573227, -0.194336, -1.539047, -0.045883, -4.009995,\n             -3.962494, -3.937500, -0.363266, -0.875198, -0.710938, -4.000000,\n             -7.000000, -3.937500, -3.937592, -0.199326, -0.458008, -0.625977,\n             -0.544922, -1.046875, -0.586899, -3.367188, -3.804688, -0.312500,\n             -0.062500, -0.562500, -0.375000, -3.367188, -1.046875, -2.796875,\n             -0.054688, -1.562500, -2.562500\n         ]], dtype=K.floatx()),\n         np.array([[\n              0.0,       0.0,       0.0,      -0.25,      0.0,      -0.5,\n             -0.5,      -0.5,       0.0,       0.0,       0.0,      -0.5,\n             -0.5,      -0.5,      -0.5,       0.0,       0.0,       0.0,\n              0.0,      -0.25,      0.0,      -0.5,      -0.5,       0.0,\n              0.0,       0.0,       0.0,      -0.5,      -0.25,     -0.25,\n              0.0,      -0.25,     -0.25\n         ]], dtype=K.floatx())),\n    ])\ndef test_quantized_relu(bits, integer, use_sigmoid, negative_slope, test_values,\n                        expected_values):\n  \"\"\"Test quantized_relu function.\"\"\"\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [quantized_relu(bits, integer, use_sigmoid,\n                                      negative_slope)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, negative_slope, test_values, expected_values',\n    [\n        (\n            8, 2**-4,\n            np.array([[\n                -1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01,\n                -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01,\n                -2.00000000e-01, -1.00000000e-01, -2.22044605e-16,  1.00000000e-01,\n                 2.00000000e-01,  3.00000000e-01,  4.00000000e-01,  5.00000000e-01,\n                 6.00000000e-01,  7.00000000e-01,  8.00000000e-01,  9.00000000e-01\n                ]], dtype=K.floatx()),\n            np.array([[\n                -0.0625   , -0.0625   , -0.0625   , -0.03125  , -0.03125  ,\n                -0.03125  , -0.03125  , -0.015625 , -0.015625 , -0.0078125,\n                 0.       ,  0.125    ,  0.25     ,  0.25     ,  0.5      ,\n                 0.5      ,  0.5      ,  0.5      ,  1.       ,  1.       \n                ]], dtype=K.floatx())\n        ),\n        (\n            3, 2**-4,\n            np.array([[\n                -1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01,\n                -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01,\n                -2.00000000e-01, -1.00000000e-01, -2.22044605e-16,  1.00000000e-01,\n                 2.00000000e-01,  3.00000000e-01,  4.00000000e-01,  5.00000000e-01,\n                 6.00000000e-01,  7.00000000e-01,  8.00000000e-01,  9.00000000e-01\n                ]], dtype=K.floatx()),\n            np.array([[\n                -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625,\n                -0.0625, -0.0625, -0.0625, -0.0625,  0.125 ,  0.25  ,  0.25  ,\n                0.5   ,  0.5   ,  0.5   ,  0.5   ,  1.    ,  1.    \n                ]], dtype=K.floatx())\n        ),\n        (\n            6, 2**-3,\n            np.array([[\n                -3.0, -2.0, -1.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875,\n                0.054688, 6.0]], dtype=K.floatx()),\n            np.array([[\n                -5.00000000e-01, -2.50000000e-01, -1.25000000e-01,  2.32830644e-10,\n                2.00000000e+00,  4.00000000e+00,  2.00000000e+00,  1.00000000e+00,\n                6.25000000e-02,  8.00000000e+00   \n                ]], dtype=K.floatx())\n        )\n        \n    ])\ndef test_quantized_relu_po2(bits, negative_slope, test_values, expected_values):\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [quantized_relu_po2(bits, negative_slope=negative_slope)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/min_max_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests min/max values that are used for autorange.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport pytest\nfrom qkeras import *\nfrom tensorflow.keras import backend as K\n\n\ndef test_binary():\n  q = binary(alpha=1.0)\n  assert q.min() == -1.0\n  assert q.max() == 1.0\n\n  q = stochastic_binary(alpha=1.0)\n  assert q.min() == -1.0\n  assert q.max() == 1.0\n\n\ndef test_ternary():\n  q = ternary(alpha=1.0)\n  assert q.min() == -1.0\n  assert q.max() == 1.0\n\n  q = stochastic_ternary(alpha=1.0)\n  assert q.min() == -1.0\n  assert q.max() == 1.0\n\n\ndef test_quantized_bits():\n  results = {\n      (1,0): [-1.0, 1.0],\n      (2,0): [-1.0, 1.0],\n      (3,0): [-1.0, 1.0],\n      (4,0): [-1.0, 1.0],\n      (5,0): [-1.0, 1.0],\n      (6,0): [-1.0, 1.0],\n      (7,0): [-1.0, 1.0],\n      (8,0): [-1.0, 1.0],\n      (1,1): [-1.0, 1.0],\n      (2,1): [-2.0, 2.0],\n      (3,1): [-2.0, 2.0],\n      (4,1): [-2.0, 2.0],\n      (5,1): [-2.0, 2.0],\n      (6,1): [-2.0, 2.0],\n      (7,1): [-2.0, 2.0],\n      (8,1): [-2.0, 2.0],\n      (3,2): [-4.0, 4.0],\n      (4,2): [-4.0, 4.0],\n      (5,2): [-4.0, 4.0],\n      (6,2): [-4.0, 4.0],\n      (7,2): [-4.0, 4.0],\n      (8,2): [-4.0, 4.0],\n  }\n\n  for i in range(3):\n    for b in range(1,9):\n      if b <= i: continue\n      q = quantized_bits(b,i,1)\n      expected = results[(b,i)]\n      assert expected[0] == q.min()\n      assert expected[1] == q.max()\n\n\ndef test_po2():\n  po2 = {\n    3: [-2, 2],\n    4: [-8, 8],\n    5: [-128, 128],\n    6: [-32768, 32768]\n  }\n\n  po2_max_value = {\n      (3,1): [-1.0, 1.0],\n      (3,2): [-2, 2],\n      (3,4): [-4, 4],\n      (4,1): [-1.0, 1.0],\n      (4,2): [-2, 2],\n      (4,4): [-4, 4],\n      (4,8): [-8, 8],\n      (5,1): [-1.0, 1.0],\n      (5,2): [-2, 2],\n      (5,4): [-4, 4],\n      (5,8): [-8, 8],\n      (5,16): [-16, 16],\n      (6,1): [-1.0, 1.0],\n      (6,2): [-2, 2],\n      (6,4): [-4, 4],\n      (6,8): [-8, 8],\n      (6,16): [-16, 16],\n      (6,32): [-32, 32]\n  }\n\n  po2_quadratic = {\n    4: [-4, 4],\n    5: [-64, 64],\n    6: [-16384, 16384]\n  }\n\n  relu_po2_quadratic = {\n    4: [0.00390625, 64],\n    5: [1.52587890625e-05, 16384],\n    6: [2.3283064365386963e-10, 1073741824]\n  }\n\n  for b in range(3,7):\n    q = quantized_po2(b)\n    assert po2[b][0] == q.min()\n    assert po2[b][1] == q.max()\n    for i in range(0,b):\n      q = quantized_po2(b,2**i)\n      assert po2_max_value[(b,2**i)][0] == q.min()\n      assert po2_max_value[(b,2**i)][1] == q.max()\n\n  for b in range(4,7):\n    q = quantized_po2(b,quadratic_approximation=True)\n    assert po2_quadratic[b][0] == q.min()\n    assert po2_quadratic[b][1] == q.max()\n    q = quantized_relu_po2(b,quadratic_approximation=True)\n    assert relu_po2_quadratic[b][0] == q.min()\n    assert relu_po2_quadratic[b][1] == q.max()\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/print_qstats_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport pytest\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Conv2D\nfrom tensorflow.keras.layers import DepthwiseConv2D\nfrom tensorflow.keras.layers import BatchNormalization\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\n\nfrom qkeras.estimate import print_qstats\nfrom qkeras.utils import model_quantize\nfrom qkeras import QConv2D\nfrom qkeras.quantizers import *\n\n\ndef create_network():\n  xi = Input((28, 28, 1))\n  x = Conv2D(32, (3, 3))(xi)\n  x = Activation(\"relu\")(x)\n  x = Conv2D(32, (3, 3), activation=\"relu\")(x)\n  x = Activation(\"softmax\")(x)\n  return Model(inputs=xi, outputs=x)\n\n\ndef create_mix_network():\n\n  xi = Input((28, 28, 1))\n  x = QConv2D(32, (3, 3), kernel_quantizer=binary())(xi)\n  x = Activation(\"relu\")(x)\n  x = Conv2D(32, (3, 3))(x)\n  x = Activation(\"softmax\")(x)\n  return Model(inputs=xi, outputs=x)\n\n\ndef create_network_with_bn():\n  \"\"\"Creates a network contains both QConv2D and QDepthwiseConv2D layers.\"\"\"\n\n  xi = Input((28, 28, 1))\n  x = Conv2D(32, (3, 3))(xi)\n  x = BatchNormalization()(x)\n  x = Activation(\"relu\")(x)\n  x = DepthwiseConv2D((3, 3), activation=\"relu\")(x)\n  x = BatchNormalization()(x)\n  x = Activation(\"softmax\")(x)\n  return Model(inputs=xi, outputs=x)\n\n\ndef test_conversion_print_qstats():\n  # this tests if references in tensorflow are working properly.\n  m = create_network()\n  d = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QActivation\": {\n          \"relu\": \"ternary\"\n      }\n  }\n  qq = model_quantize(m, d, 4)\n  qq.summary()\n  print_qstats(qq)\n\n  # test if print_qstats works with unquantized layers\n  print_qstats(m)\n\n  # test if print_qstats works with mixture of quantized and unquantized layers\n  m1 = create_mix_network()\n  print_qstats(m1)\n\n  m2 = create_network_with_bn()\n  d2 = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"binary\",\n          \"bias_quantizer\": \"binary\"\n      },\n      \"QActivation\": {\n          \"relu\": \"ternary\"\n      },\n      \"QConv2DBatchnorm\": {\n          \"kernel_quantizer\": \"ternary\",\n          \"bias_quantizer\": \"ternary\",\n      },\n      \"QDepthwiseConv2DBatchnorm\": {\n          \"depthwise_quantizer\": \"ternary\",\n          \"bias_quantizer\": \"ternary\",\n      },\n  }\n  m2 = model_quantize(m2, d2, 4, enable_bn_folding=True)\n  m2.summary()\n  print_qstats(m2)\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qactivation_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test activation from qlayers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nfrom numpy.testing import assert_allclose, assert_array_equal\n\nimport pytest\nfrom tensorflow import keras\nfrom tensorflow.keras import backend as K\nimport tempfile\n\nfrom qkeras import set_internal_sigmoid\nfrom qkeras import binary\nfrom qkeras import hard_sigmoid\nfrom qkeras import quantized_bits\nfrom qkeras import quantized_hswish\nfrom qkeras import quantized_po2\nfrom qkeras import quantized_relu\nfrom qkeras import quantized_relu_po2\nfrom qkeras import quantized_sigmoid\nfrom qkeras import quantized_tanh\nfrom qkeras import smooth_sigmoid\nfrom qkeras import stochastic_binary\nfrom qkeras import stochastic_ternary\nfrom qkeras import ternary\nfrom qkeras.quantizers import _default_sigmoid_type\n\n\n@pytest.mark.parametrize(\n    'bits, max_value, use_stochastic_rounding, quadratic_approximation, '\n    'log2_rounding, test_values, expected_values', [\n        # bits=4 without max_value. Therefore the max exponent is 4 when\n        # quadratic approximiation is enabled. The max and min values from this\n        # quantization function are 16 and -16 respectively.\n        (\n            4, None, 0, 1, \"floor\",\n            np.array(\n                [[-10.0, -0.25, 0.25, 1.0, 1.99, 2.0, 5.0, 10.0, 16.0, 32.0]],\n                dtype=K.floatx()),\n            np.array(\n                [[-4.0, -0.25, 0.25, 1.0, 1.0, 1.0, 4.0, 4.0, 16.0, 16.0]],\n                dtype=K.floatx()),\n        ),\n        # bits=3. The minimum exponent is -4. Therefore, the smallest absolute\n        # value is 0.0625 in this quantization. The max absolute value is 0.5,\n        # which is specified by the second input argument.\n        (\n            3, 0.5, 0, 0, \"floor\",\n            np.array([[-7, -0.12, -0.03, 0.01, 5]], dtype=K.floatx()),\n            np.array([[-0.5, -0.0625, -0.0625, 0.0625, 0.5]], dtype=K.floatx()),\n        ),\n        (8, None, 0, 0, \"floor\",\n         np.array(\n             [[-3, -2, -1.5, -0.5, -0.033, 0.5, 0.667, 1, 1.5, 4, 10]],\n             dtype=K.floatx()),\n         np.array(\n             [[-2, -2, -1, -0.5, -0.03125, 0.5, 0.5, 1, 1, 4, 8]],\n             dtype=K.floatx()),\n        ),\n        (4, None, 0, 0, \"floor\",\n         np.array(\n             [[-16, -7, -0.12, -0.03, 0, 0.01, 5, 10]],\n             dtype=K.floatx()),\n         np.array(\n             [[-8, -4, -0.0625, -0.0625, 0.0625, 0.0625, 4, 8]],\n             dtype=K.floatx()),\n        ),\n        (3, 0.5, 0, 0, \"floor\",\n         np.array([[-7, -0.12, -0.03, 0.01, 5]], dtype=K.floatx()),\n         np.array([[-0.5, -0.0625, -0.0625, 0.0625, 0.5]], dtype=K.floatx()),\n        ),\n        (4, 4, 0, 0, \"floor\",\n         np.array([[-7, -0.12, -0.03, 0, 0.01, 5]], dtype=K.floatx()),\n         np.array([[-4, -0.0625, -0.0625, 0.0625, 0.0625, 4]],\n                  dtype=K.floatx()),\n        ),\n        (4, None, 0, 1, \"floor\",\n         np.array(\n             [[0.01, 0.03, 0.06, 0.5, 1, 2, 5, 10, 16, 32]],\n             dtype=K.floatx()),\n         np.array(\n             [[0.00390625, 0.015625, 0.015625, 0.25, 1, 1, 4, 4, 16, 16]],\n             dtype=K.floatx()),\n        ),\n        (4, None, 0, 1, \"floor\",\n         np.array(\n             [[-32, -16, -10, -5, -2, -1, -0.5, -0.03, -0.01]],\n             dtype=K.floatx()),\n         np.array(\n             [[-16, -16, -4, -4, -1, -1, -0.25, -0.015625, -0.00390625]],\n             dtype=K.floatx()),\n        ),\n        (4, None, 0, 1, \"floor\",\n         np.array(\n             [[-32, -16, -10, -5, -2, -1, -0.5, -0.03, -0.01]],\n             dtype=K.floatx()),\n         np.array(\n             [[-16, -16, -4, -4, -1, -1, -0.25, -0.015625, -0.00390625]],\n             dtype=K.floatx()),\n        ),\n    ])\ndef disable_test_quantized_po2(\n    bits,\n    max_value,\n    use_stochastic_rounding,\n    quadratic_approximation,\n    log2_rounding,\n    test_values,\n    expected_values):\n  \"\"\"Test quantized_po2 function.\"\"\"\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [quantized_po2(\n      bits, max_value, use_stochastic_rounding,\n      quadratic_approximation, log2_rounding)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05, atol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, max_value, use_stochastic_rounding, quadratic_approximation, ' +\n    'log2_rounding, test_values, expected_values',\n    [\n        # bits=3 without max_value. Therefore the max exponent is 4 when\n        # quadratic approximiation is enabled. The max value from this\n        # quantization function is 16. For the negative value, relu enforce it\n        # to be the minimum value of this quantization function, which is 2**-4.\n        (\n            3, None, 0, 1, \"floor\",\n            np.array(\n                [[-10.0, -0.25, 0.25, 1.0, 1.99, 2.01, 5.0, 10.0, 16.0, 32.0]],\n                dtype=K.floatx()),\n            np.array(\n                [[0.0625, 0.0625, 0.25, 1.0, 1.0, 1.0, 4.0, 4.0, 16.0, 16.0]],\n                dtype=K.floatx()),\n        ),\n        # bits=3. The minimum exponent is -4. Therefore, the smallest absolute\n        # value is 0.0625 in this quantization. The max absolute value is 4,\n        # which is specified by the second input argument.\n        (3, 4, 0, 0, \"floor\",\n         np.array([[-7.0, -0.12, -0.03, 0, 0.01, 5.0]], dtype=K.floatx()),\n         np.array([[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0]],\n                  dtype=K.floatx())\n        ),\n        (8, None, 0, 0, \"floor\",\n         np.array([[-0.033, 0.5, 0.667, 1, 1.5, 4, 10]], dtype=K.floatx()),\n         np.array([[0, 0.5, 0.5, 1, 1, 4, 8]], dtype=K.floatx()),\n        ),\n        (3, None, 0, 0, \"floor\",\n         np.array(\n             [[-16.0, -7.0, -0.12, -0.03, 0, 0.01, 5.0, 10.0]],\n             dtype=K.floatx()),\n         np.array(\n             [[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0, 8.0]],\n             dtype=K.floatx()),\n        ),\n        (2, 0.5, 0, 0, \"floor\",\n         np.array([[-7.0, -0.12, -0.03, 0.01, 5.0]], dtype=K.floatx()),\n         np.array([[0.0625, 0.0625, 0.0625, 0.0625, 0.5]], dtype=K.floatx()),\n        ),\n        (3, 4, 0, 0, \"floor\",\n         np.array(\n             [[-7.0, -0.12, -0.03, 0, 0.01, 5.0]],\n             dtype=K.floatx()),\n         np.array(\n             [[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0]],\n             dtype=K.floatx()),\n        ),\n        (3, None, 0, 1, \"floor\",\n         np.array(\n             [[0.01, 0.03, 0.06, 0.5, 1, 2, 5, 10, 16, 32]],\n             dtype=K.floatx()),\n         np.array(\n             [[0.00390625, 0.015625, 0.015625, 0.25, 1, 1, 4, 4, 16, 16]],\n             dtype=K.floatx()),\n        ),\n    ])\ndef disable_test_quantized_relu_po2(bits, max_value, use_stochastic_rounding,\n                                    quadratic_approximation, log2_rounding,\n                                    test_values, expected_values):\n  \"\"\"Test quantized_po2 function.\"\"\"\n  x = K.placeholder(ndim=2)\n  f = K.function([x],\n                 [quantized_relu_po2(bits, max_value, 0,\n                                     use_stochastic_rounding,\n                                     quadratic_approximation,\n                                     log2_rounding)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05, atol=1e-05)\n\n\ndef test_smooth_sigmoid():\n  \"\"\"Test smooth_sigmoid function.\"\"\"\n  test_values = np.array(\n      [[-3.0, -2.0, -1.0, -0.5, 0.005, 0.0, 0.005, 0.5, 1, 4, 10]],\n      dtype=K.floatx())\n\n  def ref_smooth_sigmoid(y):\n    x = 0.1875 * y + 0.5\n    z = 0.0 if x <= 0.0 else (1.0 if x >= 1.0 else x)\n    return z\n\n  sigmoid = np.vectorize(ref_smooth_sigmoid)\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [smooth_sigmoid(x)])\n  result = f([test_values])[0]\n  expected = sigmoid(test_values)\n  assert_allclose(result, expected, rtol=1e-05)\n\n\ndef test_hard_sigmoid():\n  \"\"\"Test hard_sigmoid function.\"\"\"\n  test_values = np.array(\n      [[-3.0, -2.0, -1.0, -0.5, 0.005, 0.0, 0.005, 0.5, 1, 4, 10]],\n      dtype=K.floatx())\n\n  def ref_hard_sigmoid(y):\n    x = 0.5 * y + 0.5\n    z = 0.0 if x <= 0.0 else (1.0 if x >= 1.0 else x)\n    return z\n\n  sigmoid = np.vectorize(ref_hard_sigmoid)\n\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [hard_sigmoid(x)])\n  result = f([test_values])[0]\n  expected = sigmoid(test_values)\n  assert_allclose(result, expected, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, sigmoid_type, use_real_sigmoid, test_values, expected_values', [\n        (\n            6,\n            \"hard\",\n            False,\n            np.array(\n                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],\n                dtype=K.floatx()),\n            np.array([[0.015625, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875]],\n                     dtype=K.floatx()),\n        ),\n        (\n            6,\n            \"smooth\",\n            False,\n            np.array(\n                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],\n                dtype=K.floatx()),\n            np.array([[0.3125, 0.359375, 0.40625, 0.453125,\n                       0.5, 0.546875, 0.59375, 0.640625]],\n                     dtype=K.floatx()),\n        ),\n        (\n            6,\n            \"real\",\n            True,\n            np.array(\n                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],\n                dtype=K.floatx()),\n            np.array([[0.265625, 0.328125, 0.375, 0.4375, 0.5,\n                       0.5625, 0.625, 0.671875]],\n                     dtype=K.floatx()),\n        ),\n    ])\ndef test_quantized_sigmoid(bits, sigmoid_type, use_real_sigmoid,\n                           test_values, expected_values):\n  \"\"\"Test quantized_sigmoid function with three different sigmoid variants.\"\"\"\n\n  set_internal_sigmoid(sigmoid_type)\n  x = K.placeholder(ndim=2)\n  f = K.function([x],\n                 [quantized_sigmoid(bits, symmetric=True,\n                                    use_real_sigmoid=use_real_sigmoid)(x)])\n  set_internal_sigmoid(_default_sigmoid_type)\n\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, sigmoid_type, use_real_sigmoid, test_values, expected_values', [\n        (\n            4,\n            \"hard\",\n            False,\n            np.array(\n                [-15, 15],\n                dtype=K.floatx()),\n            np.array([0.0625, 0.9375],\n                     dtype=K.floatx()),\n        ),\n        (\n            4,\n            \"smooth\",\n            False,\n            np.array(\n                [-15, 15],\n                dtype=K.floatx()),\n            np.array([0.0625, 0.9375],\n                     dtype=K.floatx()),\n        ),\n        (\n            4,\n            \"real\",\n            True,\n            np.array(\n                [-15, 15],\n                dtype=K.floatx()),\n            np.array([0.0625, 0.9375],\n                     dtype=K.floatx()),\n        ),\n    ])\n\ndef test_quantized_sigmoid_limits(\n    bits, sigmoid_type, use_real_sigmoid, test_values, expected_values):\n  \"\"\"Test the min and max values of quantized_sigmoid function with three different sigmoid variants.\"\"\"\n\n  set_internal_sigmoid(sigmoid_type)\n  x = K.placeholder(ndim=2)\n  f = K.function([x],\n                 [quantized_sigmoid(bits, symmetric=True,\n                                    use_real_sigmoid=use_real_sigmoid)(x)])\n  set_internal_sigmoid(_default_sigmoid_type)\n\n  result = f([test_values])[0]\n  min_max = np.array(\n      [quantized_sigmoid(bits, symmetric=True,\n                         use_real_sigmoid=use_real_sigmoid).min(),\n       quantized_sigmoid(bits, symmetric=True,\n                         use_real_sigmoid=use_real_sigmoid).max()])\n\n  assert_allclose(result, expected_values, rtol=1e-05)\n  assert_allclose(result, min_max, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, use_real_tanh, test_values, expected_values', [\n        (\n            4,\n            False,\n            np.array(\n                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],\n                dtype=K.floatx()),\n            np.array([[-0.875, -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],\n                     dtype=K.floatx()),\n        ),\n        (\n            4,\n            True,\n            np.array(\n                [[-1., -0.75, -0.5, -0.25,  0., 0.25, 0.5, 0.75]],\n                dtype=K.floatx()),\n            np.array([[-0.75, -0.625, -0.5, -0.25, 0., 0.25, 0.5, 0.625]],\n                     dtype=K.floatx()),\n        )\n    ])\ndef test_quantized_tanh(bits, use_real_tanh, test_values, expected_values):\n  \"\"\"Test quantized_tanh function with three different sigmoid variants.\"\"\"\n  # store previous sigmoid type\n\n  set_internal_sigmoid('hard')\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [quantized_tanh(\n      bits, symmetric=True, use_real_tanh=use_real_tanh)(x)])\n  set_internal_sigmoid(_default_sigmoid_type)\n\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, sigmoid_type, use_real_tanh, test_values, expected_values', [\n        (\n            4,\n            \"hard\",\n            False,\n            np.array(\n                [-15, 15],\n                dtype=K.floatx()),\n            np.array([-0.875, 0.875],\n                     dtype=K.floatx()),\n        ),\n        (\n            4,\n            \"smooth\",\n            False,\n            np.array(\n                [-15, 15],\n                dtype=K.floatx()),\n            np.array([-0.875, 0.875],\n                     dtype=K.floatx()),\n        ),\n        (\n            4,\n            \"real\",\n            True,\n            np.array(\n                [-15, 15],\n                dtype=K.floatx()),\n            np.array([-0.875, 0.875],\n                     dtype=K.floatx()),\n        ),\n    ])\ndef test_quantized_tanh_limits(bits, sigmoid_type, use_real_tanh, test_values,\n                               expected_values):\n  \"\"\"Test the min and max values of quantized_tanh function with three different sigmoid variants.\"\"\"\n\n  set_internal_sigmoid(sigmoid_type)\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [quantized_tanh(\n      bits, symmetric=True, use_real_tanh=use_real_tanh)(x)])\n  set_internal_sigmoid(_default_sigmoid_type)\n\n  result = f([test_values])[0]\n  min_max = np.array(\n      [quantized_tanh(bits, symmetric=True, use_real_tanh=use_real_tanh).min(),\n       quantized_tanh(bits, symmetric=True, use_real_tanh=use_real_tanh).max()])\n\n  assert_allclose(result, expected_values, rtol=1e-05)\n  assert_allclose(result, min_max, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, integer, use_sigmoid, test_values, expected_values', [\n        (\n            6,\n            2,\n            0,\n            np.array(\n                [[-3.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875, 0.054688]],\n                dtype=K.floatx()),\n            np.array([[0.0, 0.0, 2.5625, 3.375, 1.5625, 1.0625, 0.0625]],\n                     dtype=K.floatx()),\n        ),\n        (6, 2, 1,\n         np.array([[\n             0.458069, 0.573227, 0.194336, 1.539047, 0.045883, 4.009995,\n             3.962494, 3.937500, 0.363266, 0.875198, 0.710938, 4.000000,\n             7.000000, 3.937500, 3.937592, 0.199326, 0.458008, 0.625977,\n             0.544922, 1.046875, 0.586899, 3.367188, 3.804688, 0.312500,\n             0.062500, 0.562500, 0.375000, 3.367188, 1.046875, 2.796875,\n             0.054688, 1.562500, 2.562500\n         ]], dtype=K.floatx()),\n         np.array([[\n             0.500000, 0.625000, 0.250000, 1.500000, 0.000000, 3.937500,\n             3.937500, 3.937500, 0.375000, 0.875000, 0.750000, 3.937500,\n             3.937500, 3.937500, 3.937500, 0.250000, 0.500000, 0.625000,\n             0.500000, 1.000000, 0.625000, 3.375000, 3.750000, 0.250000,\n             0.000000, 0.500000, 0.375000, 3.375000, 1.000000, 2.750000,\n             0.000000, 1.500000, 2.500000\n         ]], dtype=K.floatx())),\n    ])\ndef test_quantized_relu(bits, integer, use_sigmoid, test_values, expected_values):\n  \"\"\"Test quantized_relu function.\"\"\"\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [quantized_relu(bits, integer, use_sigmoid)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    (\n        \"bits, integer, symmetric, keep_negative, test_values, expected_values,\"\n        \" rtol\"\n    ),\n    [\n        (\n            8,\n            100,\n            1,\n            True,\n            np.array([[1.25e+29, 3, -1.1e+30, 4.0e+32]], dtype=K.floatx()),\n            np.array([[1.23794004e+29, 0.0, -1.09929075e+30, 1.26269884e+30]],\n                     dtype=K.floatx()),\n            5.0e+27,  # Effective quantization step size\n        ),\n        (\n            6,\n            2,\n            0,\n            True,\n            np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],\n                     dtype=K.floatx()),\n            np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 3.875, 3.875]],\n                     dtype=K.floatx()),\n            1e-05,\n        ),\n        (\n            6,\n            2,\n            0,\n            False,\n            np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],\n                     dtype=K.floatx()),\n            np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1, 3.9375, 3.9375]],\n                     dtype=K.floatx()),\n            1e-05,\n        ),\n        (\n            6,\n            2,\n            1,\n            True,\n            np.array([[-10, -4, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],\n                     dtype=K.floatx()),\n            np.array([[-3.875, -3.875, -1.0, -0.5, 0.0, 0.5, 1, 3.875, 3.875]],\n                     dtype=K.floatx()),\n            1e-05,\n        )\n    ])\ndef test_quantized_bits(bits, integer, symmetric, keep_negative, test_values,\n                        expected_values, rtol):\n  x = K.placeholder(ndim=2)\n  f = K.function([x],\n                 [quantized_bits(bits, integer, symmetric, keep_negative)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=rtol)\n\n\n@pytest.mark.parametrize(\n    \"bits, integer, expected_output, expected_scale\",\n    [(4, 2,\n      [[0.25, 3.0, 0.09375, 0.25], [0.4375, 0.0, 0.21875, 1.5]],\n      [[0.125, 1., 0.0625, 0.5]]),\n     (4, 1, [[0.25, 3., 0.09375, 0.25], [0.4375, 0., 0.21875, 1.5]],\n      [[0.25, 2., 0.125, 1.]]),\n     (5, 2,\n      [[0.21875, 2.75, 0.09375, 0.375], [0.46875, 0.25, 0.234375, 1.375]],\n      [[0.125, 1, 0.0625, 0.5]]),\n    ])\ndef test_quantized_bits_with_auto_po2_scale(\n    bits, integer, expected_output, expected_scale):\n  # Test if quantizer with the fixed scale works properly.\n  x = np.array([[0.23, 2.76, 0.1, 0.33], [0.53, 0.16, 0.3, 1.43]])\n\n  q = quantized_bits(\n      bits=bits, integer=integer, alpha=\"auto_po2\")\n  q_out = q(x).numpy()\n  scale = q.scale.numpy()\n\n  np.testing.assert_array_equal(q_out, expected_output)\n  np.testing.assert_array_equal(scale, expected_scale)\n\n\ndef test_quantized_bits_with_post_training_scale():\n  # Test if quantizer with the fixed scale works properly.\n  np.random.seed(42)\n  array = np.random.uniform(low=0, high=10, size=(7, 64, 64, 3))\n\n  auto_po2_quantizer = quantized_bits(\n      bits=8, integer=3, alpha=\"auto_po2\")\n  qw = auto_po2_quantizer(array)\n  auto_po2_scale = auto_po2_quantizer.scale.numpy()\n  alpha_ndarray_quantizer = quantized_bits(\n      bits=8, integer=3, alpha=\"auto_po2\",\n      post_training_scale=auto_po2_scale)\n\n  # Check if the scale is the same as auto_po2 quantizer.\n  np.testing.assert_array_equal(auto_po2_scale,\n                                alpha_ndarray_quantizer.scale)\n\n  qw_ndarray = alpha_ndarray_quantizer(array)\n  # Check if the quantized values are the same as auto_po2 quantizer.\n  np.testing.assert_array_equal(qw.numpy(), qw_ndarray.numpy())\n\n\n@pytest.mark.parametrize('alpha, threshold, test_values, expected_values', [\n    (1.0, 0.33,\n     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),\n     np.array([[-1.0, -1.0, -1.0, 0, 0.0, 0.0, 1, 1, 1]], dtype=K.floatx())),\n    (10.0, 5.0,\n     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],\n              dtype=K.floatx()),\n     np.array([[-10.0, -10.0, 0.0, 0, 0.0, 0.0, 0, 0, 10]], dtype=K.floatx())),\n])\ndef test_ternary(alpha, threshold, test_values, expected_values):\n  x = K.placeholder(ndim=2)\n  f = K.function([x],\n                 [ternary(alpha, threshold)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize('use_01, alpha, test_values, expected_values', [\n    (False, 1.0,\n     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),\n     np.array([[-1.0, -1.0, -1.0, -1.0, 1, 1, 1, 1, 1]], dtype=K.floatx())),\n    (False, 5.0,\n     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],\n              dtype=K.floatx()),\n     np.array([[-5.0, -5.0, -5.0, -5, 5.0, 5.0, 5, 5, 5]], dtype=K.floatx())),\n    (True, 5.0,\n     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],\n              dtype=K.floatx()),\n     np.array([[0, 0, 0, 0, 5, 5, 5, 5, 5]], dtype=K.floatx())),\n])\ndef test_binary(use_01, alpha, test_values, expected_values):\n  x = K.placeholder(ndim=2)\n  f = K.function([x], [binary(use_01, alpha)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize('test_values, expected_values', [\n    (np.array([[42.0] * 100000], dtype=K.floatx()), 42.0),\n    (np.array([[100.0] * 100000], dtype=K.floatx()), 100.0),\n    (np.array([[48.0] * 100000], dtype=K.floatx()), 48.0),\n    (np.array([[-141.0] * 100000], dtype=K.floatx()), -141.0),\n    (np.array([[-32.0] * 100000], dtype=K.floatx()), -32.0),\n    (np.array([[32.0] * 100000], dtype=K.floatx()), 32.0),\n    (np.array([[10031.0] * 100000], dtype=K.floatx()), 10031.0),\n    (np.array([[0.0] * 100000], dtype=K.floatx()), 0.0),\n])\ndef test_stochastic_round_quantized_po2(test_values, expected_values):\n  K.set_learning_phase(1)\n  np.random.seed(666)\n  x = K.placeholder(ndim=2)\n  q = quantized_po2(use_stochastic_rounding=True)\n  f = K.function([x], [q(x)])\n  res = f([test_values])[0]\n  res = np.average(res)\n  assert_allclose(res, expected_values, rtol=1e-01, atol=1e-6)\n\n\n@pytest.mark.parametrize('test_values, expected_values', [\n    (np.array([[42.0] * 100000], dtype=K.floatx()), 42.0),\n    (np.array([[-42.0] * 100000], dtype=K.floatx()), 0.0),\n    (np.array([[0.0] * 100000], dtype=K.floatx()), 0.0),\n    (np.array([[100.0] * 100000], dtype=K.floatx()), 100.0),\n    (np.array([[48.0] * 100000], dtype=K.floatx()), 48.0),\n])\ndef test_stochastic_round_quantized_relu_po2(test_values, expected_values):\n  K.set_learning_phase(1)\n  np.random.seed(666)\n  x = K.placeholder(ndim=2)\n  q = quantized_relu_po2(use_stochastic_rounding=True)\n  f = K.function([x], [q(x)])\n  res = f([test_values])[0]\n  res = np.average(res)\n  assert_allclose(res, expected_values, rtol=1e-01, atol=1e-6)\n\n\ndef test_stochastic_binary():\n  np.random.seed(42)\n  K.set_learning_phase(1)\n\n  x = np.random.uniform(-0.01, 0.01, size=10)\n  x = np.sort(x)\n  # Adding a dimension to have a common channel axis for quantization. This is\n  # to cope with a bug fix in \"_get_scale\" without changing the test cases.\n  x = np.expand_dims(x, axis=1)\n\n  s = stochastic_binary(alpha=\"auto_po2\")\n\n  ty = np.zeros_like(s)\n  ts = 0.0\n\n  n = 1000\n\n  for _ in range(n):\n    y = K.eval(s(K.constant(x)))\n    scale = K.eval(s.scale)[0]\n    ts = ts + scale\n    ty = ty + (y / scale)\n\n  # Perform squeezing to remove the common channel axis.\n  result = (ty/n).astype(np.float32)\n  result = np.squeeze(result)\n  scale = np.array([ts/n])\n  scale = np.squeeze(scale)\n\n  expected = np.array(\n      [-1., -1., -1., -0.852, 0.782, 0.768, 0.97, 0.978, 1.0, 1.0]\n  ).astype(np.float32)\n  expected_scale = np.array([0.003906])\n\n  assert_allclose(result, expected, atol=0.1)\n  assert_allclose(scale, expected_scale, rtol=0.1)\n\n\n@pytest.mark.parametrize('alpha, test_values, expected_values', [\n    (1.0,\n     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),\n     np.array([[-1.0, -1.0, -1.0, -1.0, 1, 1, 1, 1, 1]], dtype=K.floatx())),\n    (5.0,\n     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],\n              dtype=K.floatx()),\n     np.array([[-5.0, -5.0, -5.0, -5, 5.0, 5.0, 5, 5, 5]], dtype=K.floatx()))\n])\ndef test_stochastic_binary_inference_mode(alpha, test_values, expected_values):\n  K.set_learning_phase(0)\n  x = K.placeholder(ndim=2)\n  q = stochastic_binary(alpha)\n  f = K.function([x], [q(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bound, alpha, temperature, expected_values, expected_scale', [\n        (\n            0.01,\n            \"auto\",\n            8,\n            np.array([-0.973, -0.903, -0.759, -0.574, -0.242, 0.161, 0.508,\n                      0.723, 0.874, 0.975]).astype(np.float32),\n            np.array([0.008427, 0.007001, 0.0057, 0.004457, 0.003537, 0.003416,\n                      0.004507, 0.005536, 0.006853, 0.008282]\n                     ).astype(np.float32)\n            ),\n        (\n            0.01,\n            \"auto_po2\",\n            8,\n            np.array([-0.979, -0.877, -0.639, -0.586, -0.23, 0.154,\n                      0.327, 0.603, 0.83, 0.986]).astype(np.float32),\n            np.array([0.007812, 0.007812, 0.007812, 0.003906, 0.003906,\n                      0.003906, 0.007812, 0.007812, 0.007812, 0.007812]\n                     ).astype(np.float32)\n        )\n    ])\ndef test_stochastic_ternary(bound, alpha, temperature, expected_values,\n                            expected_scale):\n  np.random.seed(42)\n  K.set_learning_phase(1)\n\n  n = 1000\n\n  x = np.random.uniform(-bound, bound, size=(n, 10))\n  x = np.sort(x, axis=1)\n\n  s = stochastic_ternary(alpha=alpha, temperature=temperature)\n\n  y = K.eval(s(K.constant(x)))\n  scale = K.eval(s.scale).astype(np.float32)[0]\n\n  ty = np.zeros_like(s)\n  for i in range(n):\n    ty = ty + (y[i] / scale)\n\n  result = (ty/n).astype(np.float32)\n\n  assert_allclose(result, expected_values, atol=0.1)\n  assert_allclose(scale, expected_scale, rtol=0.1)\n\n\n@pytest.mark.parametrize('alpha, threshold, test_values, expected_values', [\n    (1.0, 0.33,\n     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),\n     np.array([[-1.0, -1.0, -1.0, 0, 0.0, 0.0, 1, 1, 1]], dtype=K.floatx())),\n    (10.0, 5.0,\n     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],\n              dtype=K.floatx()),\n     np.array([[-10.0, -10.0, 0.0, 0, 0.0, 0.0, 0, 0, 10]], dtype=K.floatx())),\n])\ndef test_stochastic_ternary_inference_mode(alpha, threshold, test_values,\n                                           expected_values):\n  K.set_learning_phase(0)\n  x = K.placeholder(ndim=2)\n  q = stochastic_ternary(alpha, threshold)\n  f = K.function([x],\n                 [q(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    # y = x * relu6(x+3)/6, the total world length is 6 bits with 2 integer\n    # bits. The quantization is in asymmetric mode.\n    ('bits, integer, symmetric, relu_shift, relu_upper_bound,'\n     'test_values, expected_values'), [\n         (6, 2, 0, 3, 6,\n          np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],\n                   dtype=K.floatx()),\n          np.array([[0., -0.375, -0.375, -0.25, 0., 0.25, 0.625,\n                     3.875, 3.875]], dtype=K.floatx()),\n         ),\n         (6, 4, 1, 3, 6,\n          np.array([[-10.0, -2.0, -2.3, -0.25, 0.0, 0.5, 1, 4, 10]],\n                   dtype=K.floatx()),\n          np.array([[0., -0.5, -0.5, 0., 0., 0.5, 0.5, 4., 10.]],\n                   dtype=K.floatx()),\n         ),\n         (2, 0, 0, 3, 6,\n          np.array([[-10.0, -2.0, -2.3, -0.25, 0.0, 0.5, 1, 4, 10]],\n                   dtype=K.floatx()),\n          np.array([[0., -0.5, -0.5, 0., 0., 0.5, 0.5, 0.5, 0.5]],\n                   dtype=K.floatx()),\n         ),])\ndef test_quantized_hswish(bits, integer, symmetric, relu_shift,\n                          relu_upper_bound, test_values, expected_values):\n  x = K.placeholder(ndim=2)\n  f = K.function(\n      [x], [quantized_hswish(bits, integer, symmetric, relu_shift=relu_shift,\n                             relu_upper_bound=relu_upper_bound)(x)])\n  result = f([test_values])[0]\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\ndef test_quantized_relu_fast_inference():\n  q1 = quantized_relu(10, 2, enable_fast_inference=False)\n  q2 = quantized_relu(10, 2, enable_fast_inference=True)\n  x = np.array([-2.1, 0.73, 2.36, 4.98])\n  np.testing.assert_array_equal(q1(x).numpy(), q2(x).numpy())\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qadaptiveactivation_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test the QAdaptiveActivation layer from qlayers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport itertools\n\nimport numpy as np\nimport pytest\nimport tensorflow.compat.v2 as tf\n\nfrom qkeras.qlayers import QAdaptiveActivation\nfrom qkeras.quantizers import _get_integer_bits\n\n\ndef run_qadaptiveactivation_test(input_val, kwargs):\n  \"\"\"Helper function to test QAdaptiveActivation inputs and outputs.\"\"\"\n  err = 'Failed test with {} on input {}'.format(kwargs, input_val)\n\n  # Only test inputs of shape (batch_size, width, height, channels)\n  assert len(input_val.shape) == 4, err\n\n  # Only test short term layer usage with ema_decay == 0\n  assert kwargs['ema_decay'] == 0, err\n  assert kwargs['ema_freeze_delay'] is None, err\n\n  # Prepare layer in a static TF graph\n  model = tf.keras.Sequential([QAdaptiveActivation(**kwargs)])\n  model.compile()\n\n  # Test input on untrained EMAs\n  qout = model(input_val, training=False).numpy()\n  assert np.isclose(model.layers[0].quantizer(input_val), qout).all(), err\n  assert np.isclose(model.layers[0].ema_min.numpy().flatten(), 0).all(), err\n  assert np.isclose(model.layers[0].ema_max.numpy().flatten(), 0).all(), err\n\n  # Run an unquantized input and train the EMA\n  unquantized_out = model(input_val, training=True).numpy()\n  assert kwargs['current_step'].numpy() == 0, err\n  if kwargs['activation'] == 'quantized_relu':\n    assert np.isclose(unquantized_out, np.maximum(input_val, 0)).all(), err\n  elif kwargs['activation'] == 'quantized_bits':\n    assert np.isclose(unquantized_out, input_val).all(), err\n  else:\n    raise ValueError('Invalid quantizer type ', kwargs['activation'])\n\n  # Check EMAs\n  if kwargs['per_channel']:\n    assert np.isclose(model.layers[0].ema_min.numpy(),\n                      np.min(input_val, axis=(0, 1, 2))).all(), err\n    assert np.isclose(model.layers[0].ema_max.numpy(),\n                      np.max(input_val, axis=(0, 1, 2))).all(), err\n  else:\n    assert np.isclose(model.layers[0].ema_min.numpy(),\n                      np.min(input_val, axis=(0, 1, 2, 3))).all(), err\n    assert np.isclose(model.layers[0].ema_max.numpy(),\n                      np.max(input_val, axis=(0, 1, 2, 3))).all(), err\n\n  # Check quantizer\n  quant = model.layers[0].quantizer\n  assert quant.__class__.__name__ == kwargs['activation'], err\n  assert quant.bits == kwargs['total_bits'], err\n  assert quant.symmetric == kwargs['symmetric'], err\n  keep_negative = None\n  if kwargs['activation'] == 'quantized_relu':\n    assert not quant.is_quantized_clip, err\n    assert quant.negative_slope == kwargs['relu_neg_slope'], err\n    assert quant.relu_upper_bound is None, err\n    keep_negative = kwargs['relu_neg_slope'] != 0\n  elif kwargs['activation'] == 'quantized_bits':\n    assert quant.keep_negative, err\n    assert quant.alpha == 1.0, err\n    keep_negative = True\n  expected_integer_bits = _get_integer_bits(model.layers[0].ema_min.numpy(),\n                                            model.layers[0].ema_max.numpy(),\n                                            kwargs['total_bits'],\n                                            kwargs['symmetric'],\n                                            keep_negative,\n                                            kwargs['po2_rounding']).numpy()\n  assert np.isclose(expected_integer_bits, quant.integer.numpy()).all(), err\n\n  # Skip to a step where the quantization is used\n  kwargs['current_step'].assign(tf.constant(kwargs['quantization_delay'],\n                                            tf.int64))\n\n  # Check quantized output\n  # To set qnoise_factor to 1.0 explicitly.\n  qnoise_factor = np.array(quant.qnoise_factor)\n  quant.update_qnoise_factor(1.0)\n  expected_qout = np.copy(quant(input_val))\n  # Revert qnoise_factor to its original value.\n  quant.update_qnoise_factor(qnoise_factor)\n  qout = model(input_val, training=True).numpy()\n  assert np.isclose(expected_qout, qout).all(), err\n\n  # Check testing mode\n  qout = model(input_val, training=False).numpy()\n  assert np.isclose(quant(input_val), qout).all(), err\n\n\n@pytest.mark.parametrize(\n    'momentum, ema_freeze_delay, total_steps, estimate_step_count',\n    [(0.9, 50, 100, False), (0.5, 1000, 1500, False), (0.1, 2, 100, False),\n     (0.999, 98, 100, False), (0.9, 50, 100, True), (0.5, 1000, 1500, True),\n     (0.1, 2, 100, True), (0.999, 98, 100, True)])\ndef test_qadaptiveact_ema(momentum, ema_freeze_delay, total_steps,\n                          estimate_step_count):\n  \"\"\"Test the exponential moving averages over time for QAdaptiveActivation.\"\"\"\n\n  # Initialize a QAdaptiveActivation layer just for testing the EMA\n  if estimate_step_count:\n    step = None\n  else:\n    step = tf.Variable(0, dtype=tf.int64)\n  q_act = QAdaptiveActivation(activation='quantized_bits',\n                              total_bits=8,\n                              current_step=step,\n                              quantization_delay=total_steps*2,\n                              ema_freeze_delay=ema_freeze_delay,\n                              ema_decay=momentum,\n                              per_channel=True,\n                              po2_rounding=False)\n  model = tf.keras.Sequential([q_act])\n  model.compile()\n\n  # Simulate a number of training steps and check the EMA values\n  exp_ema_max = 0.0\n  exp_ema_min = 0.0\n  for i in range(0, total_steps):\n    vals = np.random.random((1, 2, 1)) * i  # generate random values for update\n    model(vals, training=True)  # Simulate training\n\n    # Check the steps match\n    if estimate_step_count:\n      assert np.equal(q_act.step.numpy(), i)\n\n    # Calculate expected values\n    if i <= ema_freeze_delay:\n      exp_ema_max = (exp_ema_max * momentum) + (vals.max() * (1.0 - momentum))\n      exp_ema_min = (exp_ema_min * momentum) + (vals.min() * (1.0 - momentum))\n    exp_int_bits = _get_integer_bits(exp_ema_min, exp_ema_max,\n                                     q_act.quantizer.bits,\n                                     q_act.quantizer.symmetric,\n                                     q_act.quantizer.symmetric, False)\n\n    # Check results\n    assert np.abs(exp_ema_max - q_act.ema_max.numpy()[0]) < 0.0001\n\n    assert np.isclose(exp_int_bits.numpy(), q_act.quantizer.integer.numpy())\n    if not estimate_step_count:\n      step.assign_add(1)\n\n\ndef test_qadaptiveactivation():\n  \"\"\"Test a wide variety of inputs to the QAdaptiveActivation layer.\"\"\"\n  test_options = {\n      'activation': ['quantized_bits', 'quantized_relu'],\n      'total_bits': [1, 2, 4, 8, 16],\n      'symmetric': [True, False],\n      'quantization_delay': [1],  # We will only run for one step\n      'per_channel': [True, False],\n      'po2_rounding': [True, False],\n      'relu_neg_slope': [0.0, -0.5]\n  }\n\n  for args in itertools.product(*test_options.values()):\n    args = {list(test_options.keys())[i]: args[i] for i in range(len(args))}\n    args['ema_freeze_delay'] = None  # This test does not test the EMA freeze\n    args['ema_decay'] = 0 # This test not test the EMA delay\n    for img_shape in [(1, 28, 28, 3), (1, 3, 4, 5)]:\n      for input_scale in [255, 1]:\n        args['current_step'] = tf.Variable(0, dtype=tf.int64)\n        img = np.random.random(img_shape) * input_scale\n        run_qadaptiveactivation_test(img, args)\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qalpha_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test get_weight_scale function with auto and auto_po2 modes of quantizers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nimport logging\nfrom numpy.testing import assert_allclose\nfrom numpy.testing import assert_equal\nimport pytest\nfrom tensorflow.keras import backend as K\nfrom qkeras import binary\nfrom qkeras import get_weight_scale\nfrom qkeras import ternary\nfrom qkeras.quantizers import _get_integer_bits\n\n\n# expected value if input is uniform distribution is:\n#   - alpha = m/2.0 for binary\n#   - alpha = (m+d)/2.0 for ternary\n\n\ndef test_binary_auto():\n  \"\"\"Test binary auto scale quantizer.\"\"\"\n\n  np.random.seed(42)\n  N = 1000000\n  m_list = [1.0, 0.1, 0.01, 0.001]\n\n  for m in m_list:\n    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())\n    x = K.constant(x)\n\n    quantizer = binary(alpha=\"auto\")\n    q = K.eval(quantizer(x))\n\n    result = get_weight_scale(quantizer, q)\n    expected = m / 2.0\n    logging.info(\"expect %s\", expected)\n    logging.info(\"result %s\", result)\n    assert_allclose(result, expected, rtol=0.02)\n\n\ndef test_binary_auto_po2():\n  \"\"\"Test binary auto_po2 scale quantizer.\"\"\"\n\n  np.random.seed(42)\n  N = 1000000\n  m_list = [1.0, 0.1, 0.01, 0.001]\n\n  for m in m_list:\n    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())\n    x = K.constant(x)\n\n    quantizer_ref = binary(alpha=\"auto\")\n    quantizer = binary(alpha=\"auto_po2\")\n\n    q_ref = K.eval(quantizer_ref(x))\n    q = K.eval(quantizer(x))\n\n    ref = get_weight_scale(quantizer_ref, q_ref)\n\n    expected = np.power(2.0, np.round(np.log2(ref)))\n    result = get_weight_scale(quantizer, q)\n\n    assert_allclose(result, expected, rtol=0.0001)\n\n\ndef test_ternary_auto():\n  \"\"\"Test ternary auto scale quantizer.\"\"\"\n\n  np.random.seed(42)\n  N = 1000000\n  m_list = [1.0, 0.1, 0.01, 0.001]\n\n  for m in m_list:\n    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())\n    x = K.constant(x)\n\n    quantizer = ternary(alpha=\"auto\")\n    q = K.eval(quantizer(x))\n\n    d = m/3.0\n    result = np.mean(get_weight_scale(quantizer, q))\n    expected = (m + d) / 2.0\n    assert_allclose(result, expected, rtol=0.02)\n\n\ndef test_ternary_auto_po2():\n  \"\"\"Test ternary auto_po2 scale quantizer.\"\"\"\n\n  np.random.seed(42)\n  N = 1000000\n  m_list = [1.0, 0.1, 0.01, 0.001]\n\n  for m in m_list:\n    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())\n    x = K.constant(x)\n\n    quantizer_ref = ternary(alpha=\"auto\")\n    quantizer = ternary(alpha=\"auto_po2\")\n\n    q_ref = K.eval(quantizer_ref(x))\n    q = K.eval(quantizer(x))\n\n    ref = get_weight_scale(quantizer_ref, q_ref)\n\n    expected = np.power(2.0, np.round(np.log2(ref)))\n    result = get_weight_scale(quantizer, q)\n\n    assert_allclose(result, expected, rtol=0.0001)\n\n\ndef test_get_integer_bits():\n  \"\"\"Test automated integer bit (po2 scale) estimator.\"\"\"\n\n  bits = 4\n  min_value = np.array([\n      -4.0, -4.0, -4.0, -4.0, 1.0, -3.0, -10.0, -16, -25, 0, 0, 0, 0.1, 0.0,\n      -1.0, 0.0, 0.0, 0.0, 0, 0, 0\n  ])\n  max_value = np.array([\n      3.5, 3.51, 3.75, 3.751, 2.0, 4.0, 5.0, 8, 0, 0, 0.1, 0.999, 0.5, 0.8751,\n      0.9375, 0.93751, 1.875, 1.8751, 9, 11, 12\n  ])\n\n  # unsigned number (keep_negative=False) without clippling.\n  symmetric = False  # symmetric is irrelevant.\n  keep_negative = False\n  is_clipping = False\n  integer_bits = _get_integer_bits(\n      min_value=min_value,\n      max_value=max_value,\n      bits=bits,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      is_clipping=is_clipping)\n  assert_equal(\n      integer_bits,\n      np.array([2, 2, 2, 3, 2, 3, 3, 4, 0, 0, 0, 1, 0, 0, 0, 1, 1, 2, 4, 4, 4]))\n\n  # unsigned number (keep_negative=False) with clippling.\n  symmetric = False  # symmetric is irrelevant.\n  keep_negative = False\n  is_clipping = True\n  integer_bits = _get_integer_bits(\n      min_value=min_value,\n      max_value=max_value,\n      bits=bits,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      is_clipping=is_clipping)\n  assert_equal(\n      integer_bits,\n      np.array([2, 2, 2, 2, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 4]))\n\n  # signed number (keep_negative=True) non-symmetric without clippling\n  symmetric = False\n  keep_negative = True\n  is_clipping = False\n  integer_bits = _get_integer_bits(\n      min_value=min_value,\n      max_value=max_value,\n      bits=bits,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      is_clipping=is_clipping)\n  assert_equal(\n      integer_bits,\n      np.array([2, 3, 3, 3, 2, 3, 3, 3, 3, 0, 0, 1, 0, 1, 1, 1, 2, 2, 3, 3, 3]))\n\n  # signed number (keep_negative=True) non-symmetric with clippling\n  symmetric = False\n  keep_negative = True\n  is_clipping = True\n  integer_bits = _get_integer_bits(\n      min_value=min_value,\n      max_value=max_value,\n      bits=bits,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      is_clipping=is_clipping)\n  assert_equal(\n      integer_bits,\n      np.array([2, 2, 2, 2, 1, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3]))\n\n  # signed number (keep_negative=True) symmetric without clippling\n  symmetric = True\n  keep_negative = True\n  is_clipping = False\n  integer_bits = _get_integer_bits(\n      min_value=min_value,\n      max_value=max_value,\n      bits=bits,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      is_clipping=is_clipping)\n  assert_equal(\n      integer_bits,\n      np.array([3, 3, 3, 3, 2, 3, 3, 3, 3, 0, 0, 1, 0, 1, 1, 1, 2, 2, 3, 3, 3]))\n\n  # signed number (keep_negative=True) symmetric with clippling\n  symmetric = True\n  keep_negative = True\n  is_clipping = True\n  integer_bits = _get_integer_bits(\n      min_value=min_value,\n      max_value=max_value,\n      bits=bits,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      is_clipping=is_clipping)\n  assert_equal(\n      integer_bits,\n      np.array([2, 2, 2, 2, 1, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3]))\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qconvolutional_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test layers from qconvolutional.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport os\nimport numpy as np\nfrom numpy.testing import assert_allclose\nimport pytest\nimport tempfile\n\nimport tensorflow as tf\nfrom tensorflow.keras import backend as K\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.backend import clear_session\n\nfrom qkeras import binary\nfrom qkeras import ternary\nfrom qkeras import QActivation\nfrom qkeras import QDense\nfrom qkeras import QConv1D\nfrom qkeras import QConv2D\nfrom qkeras import QConv2DTranspose\nfrom qkeras import QSeparableConv1D\nfrom qkeras import QSeparableConv2D\nfrom qkeras import quantized_bits\nfrom qkeras import quantized_relu\nfrom qkeras.utils import model_save_quantized_weights\nfrom qkeras.utils import quantized_model_from_json\nfrom qkeras.utils import load_qmodel\nfrom qkeras import print_qstats\nfrom qkeras import extract_model_operations\n\n\ndef test_qnetwork():\n  K.set_learning_phase(1)\n  x = x_in = Input((28, 28, 1), name='input')\n  x = QSeparableConv2D(\n      32, (2, 2),\n      strides=(2, 2),\n      depthwise_quantizer=binary(alpha=1.0),\n      pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0),\n      activation=quantized_bits(6, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='conv2d_0_m')(\n          x)\n  x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x)\n  x = QConv2D(\n      64, (3, 3),\n      strides=(2, 2),\n      kernel_quantizer=ternary(alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='conv2d_1_m',\n      activation=quantized_relu(6, 3, 1))(\n          x)\n  x = QConv2D(\n      64, (2, 2),\n      strides=(2, 2),\n      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='conv2d_2_m')(\n          x)\n  x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x)\n  x = Flatten(name='flatten')(x)\n  x = QDense(\n      10,\n      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='dense')(\n          x)\n  x = Activation('softmax', name='softmax')(x)\n\n  model = Model(inputs=[x_in], outputs=[x])\n\n  # reload the model to ensure saving/loading works\n  json_string = model.to_json()\n  clear_session()\n  model = quantized_model_from_json(json_string)\n\n  # generate same output for weights\n  np.random.seed(42)\n  for layer in model.layers:\n    all_weights = []\n\n    for i, weights in enumerate(layer.get_weights()):\n      input_size = np.prod(layer.input.shape.as_list()[1:])\n      if (len(layer.get_weights()) == 3 and i > 0): # pointwise kernel and bias\n        input_size = input_size // np.prod(layer.kernel_size)\n      shape = weights.shape\n      print(shape)\n      assert input_size > 0, 'input size for {} {}'.format(layer.name, i)\n      # he normal initialization with a scale factor of 2.0\n      all_weights.append(\n          10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape))\n    if all_weights:\n      layer.set_weights(all_weights)\n\n  # apply quantizer to weights\n  model_save_quantized_weights(model)\n\n  all_weights = []\n\n  for layer in model.layers:\n    for i, weights in enumerate(layer.get_weights()):\n\n      w = np.sum(weights)\n      all_weights.append(w)\n\n  all_weights = np.array(all_weights)\n\n  # test_qnetwork_weight_quantization\n  all_weights_signature = np.array(\n      [2., -6.75, -0.625, -2., -0.25, -56., 1.125, -1.625, -1.125])\n\n  assert all_weights.size == all_weights_signature.size\n  assert np.all(all_weights == all_weights_signature)\n\n  # test_qnetwork_forward:\n  expected_output = np.array(\n      [[0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n        0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 1.e+00, 0.e+00, 0.e+00, 7.6e-06],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],\n      [0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00,\n       0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00,\n       0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00],\n      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,\n       1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00]]).astype(np.float16)\n  inputs = 2 * np.random.rand(10, 28, 28, 1)\n  actual_output = model.predict(inputs).astype(np.float16)\n  assert_allclose(actual_output, expected_output, rtol=1e-4)\n\n\ndef test_sequential_qnetwork():\n  model = tf.keras.Sequential()\n  model.add(Input((28, 28, 1), name='input'))\n  model.add(\n      QConv2D(\n          32, (2, 2),\n          strides=(2, 2),\n          kernel_quantizer=quantized_bits(4, 0, 1),\n          bias_quantizer=quantized_bits(4, 0, 1),\n          name='conv2d_0_m'))\n  model.add(QActivation(quantized_relu(4, 0), name='act0_m'))\n  model.add(\n      QConv2D(\n          64, (3, 3),\n          strides=(2, 2),\n          kernel_quantizer=quantized_bits(4, 0, 1),\n          bias_quantizer=quantized_bits(4, 0, 1),\n          name='conv2d_1_m'))\n  model.add(QActivation(quantized_relu(4, 0), name='act1_m'))\n  model.add(\n      QConv2D(\n          64, (2, 2),\n          strides=(2, 2),\n          kernel_quantizer=quantized_bits(4, 0, 1),\n          bias_quantizer=quantized_bits(4, 0, 1),\n          name='conv2d_2_m'))\n  model.add(QActivation(quantized_relu(4, 0), name='act2_m'))\n  model.add(Flatten())\n  model.add(\n      QDense(\n          10,\n          kernel_quantizer=quantized_bits(4, 0, 1),\n          bias_quantizer=quantized_bits(4, 0, 1),\n          name='dense'))\n  model.add(Activation('softmax', name='softmax'))\n\n  # Check that all model operation were found correctly\n  model_ops = extract_model_operations(model)\n  for layer in model_ops.keys():\n    assert model_ops[layer]['type'][0] != 'null'\n  return model\n\n\n@pytest.mark.parametrize(\"layer_cls\", [\"QConv1D\", \"QSeparableConv1D\"])\ndef test_qconv1d(layer_cls):\n  np.random.seed(33)\n  if layer_cls == \"QConv1D\":\n    x = Input((4, 4,))\n    y = QConv1D(\n      2, 1,\n      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='qconv1d')(\n          x)\n    model = Model(inputs=x, outputs=y)\n  else:\n    x = Input((4, 4,))\n    y = QSeparableConv1D(\n      2, 2,\n      depthwise_quantizer=quantized_bits(6, 2, 1, alpha=1.0),\n      pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='qconv1d')(\n          x)\n    model = Model(inputs=x, outputs=y)\n\n  # Extract model operations\n  model_ops = extract_model_operations(model)\n\n  # Check the input layer model operation was found correctly\n  assert model_ops['qconv1d']['type'][0] != 'null'\n\n  # Assertion about the number of operations for this (Separable)Conv1D layer\n  if layer_cls == \"QConv1D\":\n    assert model_ops['qconv1d']['number_of_operations'] == 32\n  else:\n    assert model_ops['qconv1d']['number_of_operations'] == 30\n\n  # Print qstats to make sure it works with Conv1D layer\n  print_qstats(model)\n\n  # reload the model to ensure saving/loading works\n  # json_string = model.to_json()\n  # clear_session()\n  # model = quantized_model_from_json(json_string)\n\n  for layer in model.layers:\n    all_weights = []\n    for i, weights in enumerate(layer.get_weights()):\n      input_size = np.prod(layer.input.shape.as_list()[1:])\n      if input_size is None:\n        input_size = 10 * 10\n      shape = weights.shape\n      assert input_size > 0, 'input size for {} {}'.format(layer.name, i)\n      all_weights.append(\n          10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape))\n    if all_weights:\n      layer.set_weights(all_weights)\n  # Save the model as an h5 file using Keras's model.save()\n  fd, fname = tempfile.mkstemp('.h5')\n  model.save(fname)\n  del model  # Delete the existing model\n\n  # Return a compiled model identical to the previous one\n  model = load_qmodel(fname)\n\n  # Clean the created h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  # apply quantizer to weights\n  model_save_quantized_weights(model)\n\n  inputs = np.random.rand(2, 4, 4)\n  p = model.predict(inputs).astype(np.float16)\n  if layer_cls == \"QConv1D\":\n    y = np.array([[[-2.441, 3.816], [-3.807, -1.426], [-2.684, -1.317],\n                   [-1.659, 0.9834]],\n                  [[-4.99, 1.139], [-2.559, -1.216], [-2.285, 1.905],\n                   [-2.652, -0.467]]]).astype(np.float16)\n  else:\n    y = np.array([[[-2.275,   -3.178], [-0.4358, -3.262], [ 1.987,  0.3987]],\n                  [[-0.01251, -0.376], [ 0.3928, -1.328], [-1.243, -2.43  ]]]\n                ).astype(np.float16)\n  assert_allclose(p, y, rtol=1e-4)\n\ndef test_qconv2dtranspose():\n  x = Input((4, 4, 1,))\n  y = QConv2DTranspose(\n    1,\n    kernel_size=(3, 3),\n    kernel_quantizer=binary(),\n    bias_quantizer=binary(),\n    name='conv2d_tran')(x)\n  model = Model(inputs=x, outputs=y)\n  data = np.ones(shape=(1,4,4,1))\n  kernel = np.ones(shape=(3,3,1,1))\n  bias = np.ones(shape=(1,))\n  model.get_layer('conv2d_tran').set_weights([kernel, bias])\n  actual_output = model.predict(data).astype(np.float16)\n  expected_output = np.array(\n      [ [2., 3., 4., 4., 3., 2.],\n      [3., 5., 7., 7., 5., 3.],\n      [4., 7., 10., 10., 7., 4.],\n      [4., 7., 10., 10., 7., 4.],\n      [3., 5., 7., 7., 5., 3.],\n      [2., 3., 4., 4., 3., 2.] ]).reshape((1,6,6,1)).astype(np.float16)\n  assert_allclose(actual_output, expected_output, rtol=1e-4)\n\n\ndef test_masked_qconv2d_creates_correct_parameters():\n  mask = mask = np.ones((5, 5), dtype=np.float32)\n  model = tf.keras.Sequential()\n  model.add(tf.keras.layers.Input(shape=(10, 10, 1)))\n  model.add(QConv2D(mask=mask, filters=1, kernel_size=(5, 5), use_bias=False))\n\n  # There should be no non-trainable params.\n  np.testing.assert_equal(len(model.non_trainable_weights), 0)\n\n  # Validate number of trainable params. This should be equal to one (5,5)\n  # kernel.\n  np.testing.assert_equal(len(model.trainable_weights), 1)\n  num_trainable_params = np.prod(model.trainable_weights[0].shape)\n  np.testing.assert_equal(num_trainable_params, 25)\n\n\ndef test_qconv2d_masks_weights():\n  # Create an arbitrary mask.\n  mask = np.array(\n      [\n          [1.0, 0.0, 1.0, 0.0, 1.0],\n          [0.0, 0.0, 1.0, 0.0, 0.0],\n          [1.0, 0.0, 1.0, 0.0, 1.0],\n          [0.0, 0.0, 1.0, 0.0, 0.0],\n          [1.0, 0.0, 1.0, 0.0, 1.0],\n      ],\n      dtype=np.float32,\n  )\n  model = tf.keras.Sequential()\n  model.add(tf.keras.layers.Input(shape=(5, 5, 1)))\n  model.add(QConv2D(mask=mask, filters=1, kernel_size=(5, 5), use_bias=False))\n\n  # Set the weights to be all ones.\n  model.layers[0].set_weights([np.ones((5, 5, 1, 1), dtype=np.float32)])\n\n  # Run inference on a all ones input.\n  output = model.predict(np.ones((1, 5, 5, 1), dtype=np.float32))\n  # Output should just be summation of number of ones in the mask.\n  np.testing.assert_array_equal(\n      output, np.array([[[[11.0]]]], dtype=np.float32)\n  )\n\n\ndef test_masked_qconv2d_load_restore_works():\n  model = tf.keras.Sequential()\n  model.add(tf.keras.layers.Input(shape=(10, 10, 1)))\n  model.add(\n      QConv2D(\n          mask=np.ones((5, 5), dtype=np.float32),\n          filters=1,\n          kernel_size=(5, 5),\n          use_bias=False,\n      )\n  )\n\n  with tempfile.TemporaryDirectory() as temp_dir:\n    model_path = os.path.join(temp_dir, 'model.keras')\n    # Can save the model.\n    model.save(model_path)\n\n    # Can load the model.\n    custom_objects = {\n        'QConv2D': QConv2D,\n    }\n    loaded_model = tf.keras.models.load_model(\n        model_path, custom_objects=custom_objects\n    )\n\n    np.testing.assert_array_equal(\n        model.layers[0].weights[0], loaded_model.layers[0].weights[0]\n    )\n\n\ndef test_qconv2d_groups_works():\n  model = tf.keras.Sequential()\n  model.add(tf.keras.layers.Input(shape=(10, 10, 10)))\n  model.add(\n      QConv2D(\n          filters=6,\n          kernel_size=(1, 1),\n          use_bias=True,\n          groups=2,\n      )\n  )\n  # Validate number of trainable params.\n  np.testing.assert_equal(len(model.trainable_weights), 2)\n  num_trainable_params = np.prod(model.trainable_weights[0].shape) + np.prod(\n      model.trainable_weights[1].shape\n  )\n  expected_trainable_params = 36  # (5*3)*2 + 6\n  np.testing.assert_equal(num_trainable_params, expected_trainable_params)\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qdepthwise_conv2d_transpose_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test layers from qconvolutional.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport tempfile\n\nimport numpy as np\nfrom numpy.testing import assert_allclose, assert_equal\nimport pytest\nimport tensorflow as tf\n\nfrom qkeras import QDepthwiseConv2DTranspose\nfrom qkeras import quantized_bits\n\n\n# Predicted output from float model.\n_FLOAT_PREDICTED_OUTPUT = np.array([[\n    [\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n    ],\n    [\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n        [0.0, 0.0, 0.0],\n    ],\n    [\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n    ],\n    [\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n        [1.0, 2.0, 3.0],\n    ],\n    [\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n    ],\n    [\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n        [2.0, 4.0, 6.0],\n    ],\n    [\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n    ],\n    [\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n        [3.0, 6.0, 9.0],\n    ],\n]])\n\n\ndef create_model(group_size=1):\n  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))\n  x = QDepthwiseConv2DTranspose(\n      filters=2,\n      kernel_size=(2, 2),\n      strides=(2, 2),\n      padding=\"same\",\n      name=\"conv2d_tran\",\n      depthwise_activation=None,\n      depthwise_kernel_quantizer=None,\n      bias_quantizer=None,\n      group_size=group_size,\n  )(x)\n\n  model = tf.keras.Model(inputs=img_input, outputs=x)\n\n  return model\n\n\ndef create_quantized_model(group_size=1):\n  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))\n  x = QDepthwiseConv2DTranspose(\n      filters=2,\n      kernel_size=(2, 2),\n      strides=(1, 1),\n      padding=\"same\",\n      name=\"conv2d_tran\",\n      depthwise_activation=\"quantized_bits(10, 6, 1)\",\n      depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0),\n      group_size=group_size,\n  )(x)\n\n  model = tf.keras.Model(inputs=img_input, outputs=x)\n\n  return model\n\n\ndef test_qseparable_conv2d_transpose():\n  # By setting the weights and input values manually, we can test\n  # the correctness of the output.\n\n  # Input is (1, 4, 4, 3), with 3 output channels. For i-th channel,\n  # with shape (1, 4, 4, 1), it will convolve with the depthwise kernel at\n  # i-th channel. Depthwise outputs are (1, 8, 8, 3).\n\n  # Create model.\n  model = create_model()\n\n  output_shape = model.output_shape\n  ws = model.layers[1].weights\n\n  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])\n  inputs = np.concatenate([x, x, x], axis=-1)\n  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)\n\n  # depthwise kernel of shape (2, 2, 3, 1)\n  dw_kernel = np.array([\n      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],\n      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],\n  ])\n\n  bias = tf.zeros((2,))\n\n  model.layers[1].set_weights([dw_kernel, bias])\n\n  actual_output = model.predict(inputs).astype(np.float16)\n  assert_equal(output_shape[1:], (8, 8, 3))\n  assert_equal(len(ws), 2)\n\n  # Test if the depthwise conv kernel shape is correct.\n  assert_equal(ws[0].shape, (2, 2, 3, 1))\n\n  # Test if the bias shape is correct.\n  assert_equal(ws[1].shape, (2,))\n\n  # Test if overall output is correct.\n  assert_equal(actual_output, _FLOAT_PREDICTED_OUTPUT)\n\n\ndef test_quantization_in_separable_conv2d_transpose():\n  # Test if quantization is applied correctly.\n\n  # Create model with quantization.\n  model = create_quantized_model()\n\n  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])\n  inputs = np.concatenate([x, x, x], axis=-1)\n  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)\n\n  # depthwise kernel of shape (2, 2, 3, 1)\n  dw_kernel = np.array([\n      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],\n      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],\n  ])\n\n  bias = tf.ones((2,))\n\n  model.layers[1].set_weights([dw_kernel, bias])\n\n  actual_output = model.predict(inputs).astype(np.float16)\n\n  qs = model.layers[1].get_quantizers()\n  assert_equal(len(qs), 3)\n  assert_equal(str(qs[0]), \"quantized_bits(1,0,1,alpha=1.0)\")\n  assert_equal(str(qs[1]), \"quantized_bits(2,2,1,alpha=1.0)\")\n  assert_equal(str(qs[2]), \"quantized_bits(10,6,1)\")\n\n  expected = np.array([[\n      [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],\n      [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],\n      [[3.0, 3.0, 3.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0]],\n      [\n          [5.0, 5.0, 5.0],\n          [10.0, 10.0, 10.0],\n          [10.0, 10.0, 10.0],\n          [10.0, 10.0, 10.0],\n      ],\n  ]])\n\n  assert_equal(actual_output, expected)\n\n\ndef test_qseparable_conv2d_transpose_with_groups():\n  model = create_model(group_size=3)\n\n  output_shape = model.output_shape\n  ws = model.layers[1].weights\n\n  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])\n  inputs = np.concatenate([x, x, x], axis=-1)\n  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)\n\n  # depthwise kernel of shape (2, 2, 3, 3)\n  dw_kernel = np.array([\n      [\n          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],\n          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],\n      ],\n      [\n          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],\n          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],\n      ],\n  ])\n\n  bias = tf.zeros((2,))\n\n  model.layers[1].set_weights([dw_kernel, bias])\n\n  actual_output = model.predict(inputs).astype(np.float16)\n\n  predicted = _FLOAT_PREDICTED_OUTPUT * 3.0  # kernel values replicated 3 times\n\n  assert_equal(output_shape[1:], (8, 8, 3))\n  assert_equal(len(ws), 2)\n\n  # Test if the depthwise conv kernel shape is correct.\n  assert_equal(ws[0].shape, (2, 2, 3, 3))\n\n  # Test if the bias shape is correct.\n  assert_equal(ws[1].shape, (2,))\n\n  # Test if overall output is correct.\n  assert_equal(actual_output, predicted)\n\n\ndef test_save_and_load_model():\n  # Test if the model can be loaded from a saved model.\n  model = create_quantized_model(group_size=3)\n\n  fd, fname = tempfile.mkstemp(\".hdf5\")\n  model.save(fname)\n\n  custom_object = {\n      \"QDepthwiseConv2DTranspose\": QDepthwiseConv2DTranspose,\n  }\n\n  model_loaded = tf.keras.models.load_model(\n      fname, custom_objects=custom_object)\n\n  # Clean the h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  model_weights = model.layers[1].weights\n  loaded_model_weights = model_loaded.layers[1].weights\n\n  assert_equal(len(model_weights), len(loaded_model_weights))\n  for i, model_weight in enumerate(model_weights):\n    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qlayers_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test layers from qlayers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport logging\nimport os\nimport tempfile\n\nimport numpy as np\nfrom numpy.testing import assert_allclose\nfrom numpy.testing import assert_equal\nimport pytest\nimport tensorflow as tf\nfrom tensorflow.keras import backend as K\nfrom tensorflow.keras.backend import clear_session\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\n\nfrom qkeras import QActivation\nfrom qkeras import QDense\nfrom qkeras import quantized_bits\nfrom qkeras import quantized_relu\nfrom qkeras.utils import load_qmodel\nfrom qkeras.utils import model_save_quantized_weights\nfrom qkeras.utils import quantized_model_from_json\n\ndef qdense_util(layer_cls,\n                kwargs=None,\n                input_data=None,\n                weight_data=None,\n                expected_output=None):\n  \"\"\"qlayer test utility.\"\"\"\n  input_shape = input_data.shape\n  input_dtype = input_data.dtype\n  layer = layer_cls(**kwargs)\n  x = Input(shape=input_shape[1:], dtype=input_dtype)\n  y = layer(x)\n  layer.set_weights(weight_data)\n  model = Model(x, y)\n  actual_output = model.predict(input_data)\n  if expected_output is not None:\n    assert_allclose(actual_output, expected_output, rtol=1e-4)\n\n\n@pytest.mark.parametrize(\n    'layer_kwargs, input_data, weight_data, bias_data, expected_output',\n    [\n        (\n            {\n                'units': 2,\n                'use_bias': True,\n                'kernel_initializer': 'glorot_uniform',\n                'bias_initializer': 'zeros'\n            },\n            np.array([[1, 1, 1, 1]], dtype=K.floatx()),\n            np.array([[10, 20], [10, 20], [10, 20], [10, 20]],\n                     dtype=K.floatx()),  # weight_data\n            np.array([0, 0], dtype=K.floatx()),  # bias\n            np.array([[40, 80]], dtype=K.floatx())),  # expected_output\n        (\n            {\n                'units': 2,\n                'use_bias': True,\n                'kernel_initializer': 'glorot_uniform',\n                'bias_initializer': 'zeros',\n                'kernel_quantizer': 'quantized_bits(2,0,alpha=1.0)',\n                'bias_quantizer': 'quantized_bits(2,0)',\n            },\n            np.array([[1, 1, 1, 1]], dtype=K.floatx()),\n            np.array([[10, 20], [10, 20], [10, 20], [10, 20]],\n                     dtype=K.floatx()),  # weight_data\n            np.array([0, 0], dtype=K.floatx()),  # bias\n            np.array([[2, 2]], dtype=K.floatx())),  #expected_output\n    ])\ndef test_qdense(layer_kwargs, input_data, weight_data, bias_data,\n                expected_output):\n  qdense_util(\n      layer_cls=QDense,\n      kwargs=layer_kwargs,\n      input_data=input_data,\n      weight_data=[weight_data, bias_data],\n      expected_output=expected_output)\n\n\ndef test_qactivation_loads():\n  layer_size = 10\n\n  # Create a small model with QActivation layer.\n  x = xin = tf.keras.layers.Input(shape=(layer_size,), name='input')\n  x = QDense(\n      layer_size,\n      name='qdense',\n  )(x)\n  x = QActivation(activation=quantized_relu(8), name='relu')(x)\n  model = tf.keras.Model(inputs=xin, outputs=x)\n\n  # Generate random weights for the model.\n  w_k = np.random.rand(layer_size, layer_size)\n  w_b = np.random.rand(\n      layer_size,\n  )\n  model.set_weights([w_k, w_b])\n\n  # Save the model as an h5 file.\n  fd, fname = tempfile.mkstemp('.h5')\n  model.save(fname)\n\n  # Load the model.\n  loaded_model = load_qmodel(fname)\n\n  # Clean the h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  # Compare weights of original and loaded models.\n  model_weights = model.weights\n  loaded_model_weights = loaded_model.weights\n  assert_equal(len(model_weights), len(loaded_model_weights))\n  for i, model_weight in enumerate(model_weights):\n    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qmac_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test layers from qlayers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport logging\nimport os\nimport tempfile\n\nimport numpy as np\nfrom numpy.testing import assert_allclose\nfrom numpy.testing import assert_equal\nimport pytest\nfrom tensorflow.keras import backend as K\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.models import Model\n\nfrom qkeras import QScaleShift\nfrom qkeras.utils import load_qmodel\n\n\ndef create_qmac_model(layer_cls,\n                      kwargs=None,\n                      input_data=None,\n                      weight_data=None):\n  \"\"\"Create a QMAC model for test purpose.\"\"\"\n  layer = layer_cls(**kwargs)\n  x = Input(shape=input_data.shape[1:], dtype=input_data.dtype)\n  y = layer(x)\n  layer.set_weights(weight_data)\n\n  return Model(x, y)\n\n\n@pytest.mark.parametrize(\n    'layer_kwargs, input_data, weight_data, bias_data, expected_output',\n    [\n        (\n            {\n                'weight_quantizer': 'quantized_bits(8,2,alpha=1.0)',\n                'bias_quantizer': 'quantized_bits(8,2,alpha=1.0)',\n                'activation': 'quantized_bits(8,4,alpha=1.0)'\n            },\n            np.array([[1, 1], [2, 2]], dtype=K.floatx()),\n            np.array([[1.0]]),\n            np.array([[4.0]]),\n            np.array([[5, 5], [6, 6]], dtype=K.floatx())),\n    ])\ndef test_qmac(layer_kwargs, input_data, weight_data, bias_data,\n              expected_output):\n  model = create_qmac_model(\n      layer_cls=QScaleShift,\n      kwargs=layer_kwargs,\n      input_data=input_data,\n      weight_data=[weight_data, bias_data])\n\n  actual_output = model.predict(input_data)\n  assert_allclose(actual_output, expected_output, rtol=1e-4)\n\n  # Test model loading and saving.\n  fd, fname = tempfile.mkstemp('.h5')\n  model.save(fname)\n\n  # Load the model.\n  loaded_model = load_qmodel(fname)\n\n  # Clean the h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  # Compare weights of original and loaded models.\n  model_weights = model.weights\n  loaded_model_weights = loaded_model.weights\n\n  assert_equal(len(model_weights), len(loaded_model_weights))\n  for i, model_weight in enumerate(model_weights):\n    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())\n\n  # Compare if loaded models have the same prediction as original models.\n  loaded_model_output = loaded_model.predict(input_data)\n  assert_equal(actual_output, loaded_model_output)\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qnoise_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test gradual quantization noise injection with quantizers of quantizers.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nimport logging\nfrom numpy.testing import assert_allclose\nfrom numpy.testing import assert_equal\nimport pytest\nfrom tensorflow.keras import backend as K\nfrom qkeras.quantizers import quantized_bits\nfrom qkeras.quantizers import quantized_relu\n\n\ndef test_qnoise_quantized_bits():\n  # 1 sign bit, 1 integer bit, and 2 fractional bits.\n  bits = 4\n  integer = 1\n  symmetric = True\n  keep_negative = True\n  alpha = 1\n  use_stochastic_rounding = False\n\n  qb = quantized_bits(\n      bits=bits,\n      integer=integer,\n      symmetric=symmetric,\n      keep_negative=keep_negative,\n      alpha=alpha,\n      use_stochastic_rounding=use_stochastic_rounding,\n      use_variables=True)\n\n  inputs = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32)\n  x = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32)\n  xq = np.array([0.0, 0.5, -0.5, 0.5, -0.5, 1.75, -1.75], dtype=np.float32)\n  x_xq = 0.5 * (x + xq)\n\n  # no quantization\n  qb.update_qnoise_factor(qnoise_factor=0.0)\n  x_q_0 = qb(inputs)\n  assert_equal(x_q_0, x)\n\n  # full quantization\n  qb.update_qnoise_factor(qnoise_factor=1.0)\n  x_q_1 = qb(inputs)\n  assert_equal(x_q_1, xq)\n\n  # mixing half and half of x and xq\n  qb.update_qnoise_factor(qnoise_factor=0.5)\n  x_q_05 = qb(inputs)\n  assert_equal(x_q_05, x_xq)\n\n\ndef test_qnoise_quantized_relu():\n  # 0 sign bit, 1 integer bit, and 3 fractional bits.\n  bits = 4\n  integer = 1\n  use_sigmoid = False\n  negative_slope = 0\n  use_stochastic_rounding = False\n\n  # input to quantized relu\n  inputs = np.array([0.0, 0.5, -0.5, 0.6, 2.0, 3.0], dtype=np.float32)\n  # float relu\n  x = np.array([0.0, 0.5, 0.0, 0.6, 2.0, 3.0], dtype=np.float32)\n  # float relu with upper bound 1.5\n  x_ub = np.array([0.0, 0.5, 0.0, 0.6, 1.5, 1.5], dtype=np.float32)\n  # float relu with quantized clipping\n  x_clipped = np.array([0.0, 0.5, 0.0, 0.6, 1.875, 1.875], dtype=np.float32)\n  # quantized relu\n  xq = np.array([0.0, 0.5, 0.0, 0.625, 1.875, 1.875], dtype=np.float32)\n\n  # mixing half and half\n  x_xq = 0.5 * (x + xq)\n  x_clipped_xq = 0.5 * (x_clipped + xq)\n  x_ub_xq = 0.5 * (x_ub + xq)\n\n  #########################################\n  # No relu upper bound\n  # No quantized clip for float relu\n  #########################################\n  qr_qc_false = quantized_relu(\n      bits=bits,\n      integer=integer,\n      use_sigmoid=use_sigmoid,\n      negative_slope=negative_slope,\n      use_stochastic_rounding=use_stochastic_rounding,\n      relu_upper_bound=None,\n      is_quantized_clip=False,\n      use_variables=True)\n  # no quantization\n  qr_qc_false.update_qnoise_factor(qnoise_factor=0.0)\n  x_q_0 = qr_qc_false(inputs)\n  assert_equal(x_q_0, x)\n\n  # full quantization\n  qr_qc_false.update_qnoise_factor(qnoise_factor=1.0)\n  x_q_1 = qr_qc_false(inputs)\n  assert_equal(x_q_1, xq)\n\n  # mixing half and half\n  qr_qc_false.update_qnoise_factor(qnoise_factor=0.5)\n  x_q_05 = qr_qc_false(inputs)\n  assert_equal(x_q_05, x_xq)\n\n  #########################################\n  # No relu upper bound\n  # Quantized clip for float relu\n  #########################################\n  qr_qc_true = quantized_relu(\n      bits=bits,\n      integer=integer,\n      use_sigmoid=use_sigmoid,\n      negative_slope=negative_slope,\n      use_stochastic_rounding=use_stochastic_rounding,\n      relu_upper_bound=None,\n      is_quantized_clip=True,\n      use_variables=True)\n  # no quantization\n  qr_qc_true.update_qnoise_factor(qnoise_factor=0.0)\n  x_q_0 = qr_qc_true(inputs)\n  assert_equal(x_q_0, x_clipped)\n\n  # full quantization\n  qr_qc_true.update_qnoise_factor(qnoise_factor=1.0)\n  x_q_1 = qr_qc_true(inputs)\n  assert_equal(x_q_1, xq)\n\n  # mixing half and half\n  qr_qc_true.update_qnoise_factor(qnoise_factor=0.5)\n  x_q_05 = qr_qc_true(inputs)\n  assert_equal(x_q_05, x_clipped_xq)\n\n  #########################################\n  # Relu upper bound\n  # No quantized clip for float relu\n  #########################################\n  qr_ub_qc_false = quantized_relu(\n      bits=bits,\n      integer=integer,\n      use_sigmoid=use_sigmoid,\n      negative_slope=negative_slope,\n      use_stochastic_rounding=use_stochastic_rounding,\n      relu_upper_bound=1.5,\n      is_quantized_clip=False,\n      use_variables=True)\n  # no quantization\n  qr_ub_qc_false.update_qnoise_factor(qnoise_factor=0.0)\n  x_q_0 = qr_ub_qc_false(inputs)\n  assert_equal(x_q_0, np.clip(x_ub, a_min=None, a_max=1.5))\n\n  # full quantization\n  qr_ub_qc_false.update_qnoise_factor(qnoise_factor=1.0)\n  x_q_1 = qr_ub_qc_false(inputs)\n  assert_equal(x_q_1, np.clip(xq, a_min=None, a_max=1.5))\n\n  # mixing half and half\n  qr_ub_qc_false.update_qnoise_factor(qnoise_factor=0.5)\n  x_q_05 = qr_ub_qc_false(inputs)\n  assert_equal(x_q_05, np.clip(x_ub_xq, a_min=None, a_max=1.5))\n\n  #########################################\n  # Relu upper bound\n  # Quantized clip for float relu\n  # (The quantized clip has precedence over the relu upper bound.)\n  #########################################\n  qr_ub_qc_true = quantized_relu(\n      bits=bits,\n      integer=integer,\n      use_sigmoid=use_sigmoid,\n      negative_slope=negative_slope,\n      use_stochastic_rounding=use_stochastic_rounding,\n      relu_upper_bound=1.5,\n      is_quantized_clip=True,\n      use_variables=True)\n  # no quantization\n  qr_ub_qc_true.update_qnoise_factor(qnoise_factor=0.0)\n  x_q_0 = qr_ub_qc_true(inputs)\n  assert_equal(x_q_0, x_clipped)\n\n  # full quantization\n  qr_ub_qc_true.update_qnoise_factor(qnoise_factor=1.0)\n  x_q_1 = qr_ub_qc_true(inputs)\n  assert_equal(x_q_1, xq)\n\n  # mixing half and half\n  qr_ub_qc_true.update_qnoise_factor(qnoise_factor=0.5)\n  x_q_05 = qr_ub_qc_true(inputs)\n  assert_equal(x_q_05, x_clipped_xq)\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qpooling_test.py",
    "content": "# Copyright 2021 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test layers from qpooling.py.\"\"\"\nimport numpy as np\nfrom numpy.testing import assert_allclose\nfrom numpy.testing import assert_raises\nfrom numpy.testing import assert_equal\nimport pytest\nimport logging\nimport tempfile\nimport os\nimport tensorflow.compat.v2 as tf\n\nfrom tensorflow.keras import backend as K\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.layers import AveragePooling2D\nfrom tensorflow.keras.layers import GlobalAveragePooling2D\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.backend import clear_session\n\nfrom qkeras import QAveragePooling2D\nfrom qkeras import QGlobalAveragePooling2D\nfrom qkeras import quantized_bits\nfrom qkeras import binary\nfrom qkeras import ternary\nfrom qkeras.utils import model_save_quantized_weights\nfrom qkeras.utils import quantized_model_from_json\nfrom qkeras.utils import load_qmodel\nfrom qkeras.utils import model_quantize\nfrom qkeras import print_qstats\nfrom qkeras.qtools import qgraph\nfrom qkeras.qtools import generate_layer_data_type_map\nfrom qkeras.qtools import interface\n\n\n@pytest.mark.parametrize(\n    ('pooling, input_size, pool_size, strides, padding, data_format,'\n     'average_quantizer, activation_quantizer,  y'), [\n         ('QAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid',\n          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),\n          np.array([[[[0.375, 0.625, 0.375], [0.25, 0.75, 0.5]],\n                     [[0.375, 0.25, 0.625], [0.625, 0.5, 0.375]]],\n                    [[[0.375, 0.375, 0.5], [0.375, 0.5, 0.625]],\n                     [[0.75, 0.625, 0.5], [0.5, 0.5, 0.75]]]]).astype(\n                         np.float16)),\n         ('QAveragePooling2D', (4, 4, 3), (3, 3), (3, 3), 'valid',\n          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),\n          np.array([[[[0.375, 0.625, 0.625]]], [[[0.625, 0.5, 0.625]]]]).astype(\n              np.float16)),\n         ('QGlobalAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid',\n          'channels_last', quantized_bits(10, 0, 1), quantized_bits(4, 0, 1),\n          np.array([[0.5, 0.5, 0.375], [0.5, 0.5, 0.625]]).astype(np.float16)),\n         ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'valid',\n          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),\n          np.array([[[[0.375, 0.625, 0.375]]], [[[0.375, 0.375, 0.5]]]]).astype(\n              np.float16)),\n         ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'same',\n          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),\n          np.array([[[[0.375, 0.625, 0.375], [0.375, 0.75, 0.25]],\n                     [[0.75, 0.25, 0.375], [0.75, 0.75, 0.25]]],\n                    [[[0.375, 0.375, 0.5], [0.25, 0.625, 0.5]],\n                     [[0.625, 0.625, 0.5], [0.625, 0.625, 0.875]]]]).astype(\n                         np.float16)),\n         ('QAveragePooling2D', (4, 4, 3), (2, 2),\n          (2, 2), 'valid', 'channels_first', quantized_bits(\n              4, 0, 1), quantized_bits(4, 0, 1), None),\n     ])\ndef test_q_average_pooling(pooling, input_size, pool_size, strides, padding,\n                           data_format, average_quantizer,\n                           activation_quantizer, y):\n  \"\"\"q_average_pooling test utility.\"\"\"\n\n  np.random.seed(33)\n\n  x = Input(input_size)\n  xin = x\n  if pooling == 'QAveragePooling2D':\n    x = QAveragePooling2D(\n        pool_size=pool_size,\n        strides=strides,\n        padding=padding,\n        data_format=data_format,\n        average_quantizer=average_quantizer,\n        activation=activation_quantizer,\n        name='qpooling')(x)\n  else:\n    x = QGlobalAveragePooling2D(\n        data_format=data_format,\n        average_quantizer=average_quantizer,\n        activation=activation_quantizer,\n        name='qpooling')(\n            x)\n  model = Model(inputs=xin, outputs=x)\n\n  # Prints qstats to make sure it works with Conv1D layer\n  print_qstats(model)\n\n  size = (2,) + input_size\n  inputs = np.random.rand(size[0], size[1], size[2], size[3])\n\n  if data_format == 'channels_first':\n    assert_raises(tf.errors.InvalidArgumentError, model.predict, inputs)\n  else:\n    p = model.predict(inputs).astype(np.float16)\n    assert_allclose(p, y, rtol=1e-4)\n\n    # Reloads the model to ensure saving/loading works\n    json_string = model.to_json()\n    clear_session()\n    reload_model = quantized_model_from_json(json_string)\n    p = reload_model.predict(inputs).astype(np.float16)\n    assert_allclose(p, y, rtol=1e-4)\n\n    # Saves the model as an h5 file using Keras's model.save()\n    fd, fname = tempfile.mkstemp(\".h5\")\n    model.save(fname)\n    del model  # Delete the existing model\n\n    # Returns a compiled model identical to the previous one\n    loaded_model = load_qmodel(fname)\n\n    # Cleans the created h5 file after loading the model\n    os.close(fd)\n    os.remove(fname)\n\n    # Applys quantizer to weights\n    model_save_quantized_weights(loaded_model)\n    p = loaded_model.predict(inputs).astype(np.float16)\n    assert_allclose(p, y, rtol=1e-4)\n\n\ndef test_qpooling_in_model_quantize():\n  input_size = (16, 16, 3)\n  pool_size = (2, 2)\n\n  x = Input(input_size)\n  xin = x\n  x = AveragePooling2D(pool_size=pool_size, name=\"pooling\")(x)\n  x = GlobalAveragePooling2D(name=\"global_pooling\")(x)\n  model = Model(inputs=xin, outputs=x)\n\n  quantize_config = {\n      \"QAveragePooling2D\": {\n          \"average_quantizer\": \"binary\",\n          \"activation_quantizer\": \"binary\"\n      },\n      \"QGlobalAveragePooling2D\": {\n          \"average_quantizer\": \"quantized_bits(4, 0, 1)\",\n          \"activation_quantizer\": \"ternary\"\n      }\n  }\n\n  qmodel = model_quantize(model, quantize_config, 4)\n  print_qstats(qmodel)\n  assert_equal(str(qmodel.layers[1].average_quantizer_internal), \"binary()\")\n  assert_equal(str(qmodel.layers[1].activation), \"binary()\")\n  assert_equal(\n      str(qmodel.layers[2].average_quantizer_internal), \"quantized_bits(4,0,1)\")\n  assert_equal(str(qmodel.layers[2].activation), \"ternary()\")\n\n\ndef test_qpooling_in_qtools():\n  input_size = (16, 16, 3)\n  pool_size = (2, 2)\n  input_quantizers = [quantized_bits(8, 0, 1)]\n  is_inference = False\n\n  x = Input(input_size)\n  xin = x\n  x = QAveragePooling2D(\n      pool_size=pool_size,\n      average_quantizer=binary(),\n      activation=quantized_bits(4, 0, 1),\n      name=\"pooling\")(\n          x)\n  x = QGlobalAveragePooling2D(\n      average_quantizer=quantized_bits(4, 0, 1),\n      activation=ternary(),\n      name=\"global_pooling\")(\n          x)\n  model = Model(inputs=xin, outputs=x)\n\n  (graph, source_quantizer_list) = qgraph.CreateGraph(\n      model, input_quantizers)\n\n  qgraph.GraphPropagateActivationsToEdges(graph)\n\n  layer_map = generate_layer_data_type_map.generate_layer_data_type_map(\n      graph, source_quantizer_list, is_inference)\n\n  dtype_dict = interface.map_to_json(layer_map)\n\n  # Checks the QAveragePpooling layer datatype\n  multiplier = dtype_dict[\"pooling\"][\"pool_avg_multiplier\"]\n  accumulator = dtype_dict[\"pooling\"][\"pool_sum_accumulator\"]\n  average_quantizer  = dtype_dict[\"pooling\"][\"average_quantizer\"]\n  output = dtype_dict[\"pooling\"][\"output_quantizer\"]\n\n  assert_equal(multiplier[\"quantizer_type\"], \"quantized_bits\")\n  assert_equal(multiplier[\"bits\"], 10)\n  assert_equal(multiplier[\"int_bits\"], 3)\n  assert_equal(multiplier[\"is_signed\"], 1)\n  assert_equal(multiplier[\"op_type\"], \"mux\")\n\n  assert_equal(accumulator[\"quantizer_type\"], \"quantized_bits\")\n  assert_equal(accumulator[\"bits\"], 10)\n  assert_equal(accumulator[\"int_bits\"], 3)\n  assert_equal(accumulator[\"is_signed\"], 1)\n  assert_equal(accumulator[\"op_type\"], \"add\")\n\n  assert_equal(output[\"quantizer_type\"], \"quantized_bits\")\n  assert_equal(output[\"bits\"], 4)\n  assert_equal(output[\"int_bits\"], 1)\n  assert_equal(output[\"is_signed\"], 1)\n\n  assert_equal(average_quantizer[\"quantizer_type\"], \"binary\")\n  assert_equal(average_quantizer[\"bits\"], 1)\n  assert_equal(average_quantizer[\"int_bits\"], 1)\n  assert_equal(average_quantizer[\"is_signed\"], 1)\n\n  # Checks the QGlobalAveragePooling layer datatype\n  multiplier = dtype_dict[\"global_pooling\"][\"pool_avg_multiplier\"]\n  accumulator = dtype_dict[\"global_pooling\"][\"pool_sum_accumulator\"]\n  average_quantizer  = dtype_dict[\"global_pooling\"][\"average_quantizer\"]\n  output = dtype_dict[\"global_pooling\"][\"output_quantizer\"]\n\n  assert_equal(multiplier[\"quantizer_type\"], \"quantized_bits\")\n  assert_equal(multiplier[\"bits\"], 13)\n  assert_equal(multiplier[\"int_bits\"], 7)\n  assert_equal(multiplier[\"is_signed\"], 1)\n  assert_equal(multiplier[\"op_type\"], \"mul\")\n\n  assert_equal(accumulator[\"quantizer_type\"], \"quantized_bits\")\n  assert_equal(accumulator[\"bits\"], 10)\n  assert_equal(accumulator[\"int_bits\"], 7)\n  assert_equal(accumulator[\"is_signed\"], 1)\n  assert_equal(accumulator[\"op_type\"], \"add\")\n\n  assert_equal(output[\"quantizer_type\"], \"ternary\")\n  assert_equal(output[\"bits\"], 2)\n  assert_equal(output[\"int_bits\"], 2)\n  assert_equal(output[\"is_signed\"], 1)\n\n  assert_equal(average_quantizer[\"quantizer_type\"], \"quantized_bits\")\n  assert_equal(average_quantizer[\"bits\"], 4)\n  assert_equal(average_quantizer[\"int_bits\"], 1)\n  assert_equal(average_quantizer[\"is_signed\"], 1)\n\n\ndef test_QAveragePooling_output():\n  # Checks if the output of QAveragePooling layer with average_quantizer\n  # is correct.\n  x = np.ones(shape=(2, 6, 6, 1))\n  x[0, 0, :, :] = 0\n  x = tf.constant(x)\n\n  y = QAveragePooling2D(\n      pool_size=(3, 3),\n      strides=3,\n      padding=\"valid\",\n      average_quantizer=\"quantized_bits(8, 1, 1)\")(x)\n  y = y.numpy()\n  assert np.all(y == [[[[0.65625], [0.65625]], [[0.984375], [0.984375]]],\n                      [[[0.984375], [0.984375]], [[0.984375], [0.984375]]]])\n\n\ndef test_QGlobalAveragePooling_output():\n  # Checks if the output of QGlobalAveragePooling layer with average_quantizer\n  # is correct.\n  x = np.ones(shape=(2, 3, 3, 2))\n  x[0, 0, 1, :] = 0\n  x = tf.constant(x)\n  y = QGlobalAveragePooling2D(average_quantizer=\"quantized_bits(8, 1, 1)\")(x)\n  y = y.numpy()\n  assert np.all(y == np.array([[0.875, 0.875], [0.984375, 0.984375]]))\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qrecurrent_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for qrecurrent.py.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\nimport json\nimport os\nimport tempfile\n\nimport numpy as np\nfrom numpy.testing import assert_allclose\nimport pytest\nimport tensorflow as tf\nfrom tensorflow.keras import backend as K\nfrom tensorflow.keras.backend import clear_session\nfrom tensorflow.keras.layers import Activation\nfrom tensorflow.keras.layers import Bidirectional\nfrom tensorflow.keras.layers import Dense\nfrom tensorflow.keras.layers import Flatten\nfrom tensorflow.keras.layers import GRU\nfrom tensorflow.keras.layers import Input\nfrom tensorflow.keras.layers import LSTM\nfrom tensorflow.keras.layers import SimpleRNN\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.models import Sequential\n\nfrom qkeras import QActivation\nfrom qkeras import QBidirectional\nfrom qkeras import QDense\nfrom qkeras import QGRU\nfrom qkeras import QLSTM\nfrom qkeras import QSimpleRNN\nfrom qkeras import quantized_bits\nfrom qkeras import quantized_tanh\nfrom qkeras.utils import load_qmodel\nfrom qkeras.utils import model_quantize\nfrom qkeras.utils import model_save_quantized_weights\nfrom qkeras.utils import quantized_model_from_json\n\n@pytest.mark.skip(reason=\"Test failing due to random weight initializaiton\")\n@pytest.mark.parametrize('rnn, all_weights_signature, expected_output', [\n    (QSimpleRNN,\n     np.array([5.109375, -1.8828125, 0.0, -0.5, 0.0], dtype=np.float32),\n     np.array(\n              [[0.281, 0.4956, 0.1047, 0.1188],\n               [0.185, 0.6016, 0.0977, 0.1157],\n               [0.3892, 0.483, 0.03528, 0.0926],\n               [0.4038, 0.511, 0.01686, 0.06824],\n               [0.3354, 0.5376, 0.02602, 0.101],\n               [0.2043, 0.587, 0.04147, 0.1675],\n               [0.2297, 0.6455, 0.0456, 0.0789],\n               [0.4512, 0.4326, 0.01938, 0.0968],\n               [0.6304, 0.2498, 0.03345, 0.0866],\n               [0.4924, 0.3735, 0.011925, 0.1222]],\n              dtype=np.float16)),\n    (QLSTM, np.array([3.7421875, 2.1328125, 15.875, -0.5, 0.0],\n                     dtype=np.float32),\n     np.array(\n              [[0.27, 0.1814, 0.3108, 0.2378],\n               [0.2976, 0.2424, 0.248, 0.2119],\n               [0.3054, 0.2004, 0.2705, 0.2238],\n               [0.325, 0.1656, 0.269, 0.2401],\n               [0.271, 0.1796, 0.3, 0.2493],\n               [0.3066, 0.1873, 0.2477, 0.2583],\n               [0.2798, 0.1757, 0.2944, 0.25],\n               [0.2693, 0.2335, 0.2534, 0.2437],\n               [0.2808, 0.2057, 0.2712, 0.2422],\n               [0.2732, 0.2336, 0.2491, 0.244]],\n              dtype=np.float16)),\n    (QGRU, np.array([4.6875, 4.3984375, 0.0, -0.5, 0.0], dtype=np.float32),\n     np.array(\n              [[0.2025, 0.3467, 0.2952, 0.1556],\n               [0.2935, 0.3313, 0.2058, 0.1694],\n               [0.2046, 0.4465, 0.1827, 0.1661],\n               [0.1913, 0.498, 0.1583, 0.1525],\n               [0.1578, 0.477, 0.1973, 0.1677],\n               [0.2018, 0.44, 0.1714, 0.1869],\n               [0.157, 0.551, 0.1709, 0.12115],\n               [0.1973, 0.4353, 0.1672, 0.2001],\n               [0.1622, 0.5146, 0.1741, 0.149],\n               [0.2101, 0.3855, 0.2069, 0.1976]],\n              dtype=np.float16)),\n])\ndef test_qrnn(rnn, all_weights_signature, expected_output):\n  K.set_learning_phase(0)\n  np.random.seed(22)\n  tf.random.set_seed(22)\n\n  x = x_in = Input((2, 4), name='input')\n  x = rnn(\n    16,\n    activation=quantized_tanh(bits=8, symmetric=True),\n    kernel_quantizer=quantized_bits(8, 0, 1, alpha=1.0),\n    recurrent_quantizer=quantized_bits(8, 0, 1, alpha=1.0),\n    bias_quantizer=quantized_bits(8, 0, 1, alpha=1.0),\n    state_quantizer=quantized_bits(4, 0, 1, alpha=1.0),\n    name='qrnn_0')(\n        x)\n  x = QDense(\n      4,\n      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      name='dense')(\n          x)\n  x = Activation('softmax', name='softmax')(x)\n\n  model = Model(inputs=[x_in], outputs=[x])\n\n  # reload the model to ensure saving/loading works\n  json_string = model.to_json()\n  clear_session()\n  model = quantized_model_from_json(json_string)\n\n  # Save the model as an h5 file using Keras's model.save()\n  fd, fname = tempfile.mkstemp('.h5')\n  model.save(fname)\n  del model  # Delete the existing model\n\n  # Return a compiled model identical to the previous one\n  model = load_qmodel(fname)\n\n  # Clean the created h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  # apply quantizer to weights\n  model_save_quantized_weights(model)\n\n  all_weights = []\n\n  for layer in model.layers:\n    for i, weights in enumerate(layer.get_weights()):\n      w = np.sum(weights)\n      all_weights.append(w)\n\n  all_weights = np.array(all_weights)\n\n  assert all_weights.size == all_weights_signature.size\n  assert np.all(all_weights == all_weights_signature)\n\n  # test forward:\n  inputs = 2 * np.random.rand(10, 2, 4)\n  actual_output = model.predict(inputs).astype(np.float16)\n  assert_allclose(actual_output, expected_output, rtol=1e-4)\n\n\n@pytest.mark.skip(reason=\"Test failing due to random weight initializaiton\")\n@pytest.mark.parametrize('rnn, all_weights_signature, expected_output', [\n    (QSimpleRNN,\n     np.array([\n         -2.6562500e+00, -4.3466797e+00, 8.6736174e-19, 6.2548828e-01,\n         -6.0751953e+00, 8.6736174e-19, -7.5000000e-01, 0.0\n     ],\n              dtype=np.float32),\n     np.array(\n         [[0.0851, 0.1288, 0.586, 0.2002], [0.1044, 0.1643, 0.7217, 0.00978],\n          [0.04135, 0.0537, 0.8706, 0.03455], [0.03354, 0.0489, 0.889, 0.02852],\n          [0.04358, 0.05246, 0.7563, 0.1478], [0.03403, 0.0743, 0.4177, 0.4739],\n          [0.0859, 0.1567, 0.3972, 0.36], [0.27, 0.1945, 0.4841, 0.05124],\n          [0.12115, 0.05722, 0.728, 0.0938], [0.2864, 0.1262, 0.339, 0.2484]],\n         dtype=np.float16)),\n    (QLSTM,\n     np.array(\n         [-4.1406555, 3.2921143, 16., 7.0236816, 4.1237793, 16., -0.75, 0.],\n         dtype=np.float32),\n     np.array(\n         [[0.301, 0.2236, 0.2275, 0.2478], [0.2135, 0.2627, 0.2439, 0.2798],\n          [0.1671, 0.2252, 0.2844, 0.3232], [0.2211, 0.2178, 0.2817, 0.2795],\n          [0.3384, 0.1732, 0.2451, 0.2434], [0.296, 0.1979, 0.2468, 0.2593],\n          [0.2698, 0.1753, 0.288, 0.267], [0.258, 0.1888, 0.3228, 0.2301],\n          [0.2169, 0.1578, 0.3699, 0.2554], [0.2783, 0.1816, 0.2986, 0.2415]],\n         dtype=np.float16)),\n    (QGRU,\n     np.array([\n         -6.7578125e-01, 3.6837769e-01, 2.6020852e-18, 4.1682129e+00,\n         -7.5769043e-01, 2.6020852e-18, -7.5000000e-01, 0.0\n     ],\n              dtype=np.float32),\n     np.array(\n         [[0.278, 0.1534, 0.314, 0.2546], [0.1985, 0.1788, 0.3823, 0.2402],\n          [0.1997, 0.1621, 0.3792, 0.259], [0.2534, 0.1605, 0.281, 0.3052],\n          [0.3794, 0.1266, 0.2296, 0.2642], [0.285, 0.1754, 0.2847, 0.255],\n          [0.2878, 0.1339, 0.3042, 0.274], [0.2874, 0.1475, 0.279, 0.2861],\n          [0.2379, 0.1356, 0.3186, 0.3079], [0.2234, 0.1476, 0.3274, 0.3015]],\n         dtype=np.float16))\n])\ndef test_qbidirectional(rnn, all_weights_signature, expected_output):\n  K.set_learning_phase(0)\n  np.random.seed(22)\n  tf.random.set_seed(22)\n\n  x = x_in = Input((2, 4), name='input')\n  x = QBidirectional(\n      rnn(16,\n          activation='quantized_po2(8)',\n          kernel_quantizer='quantized_po2(8)',\n          recurrent_quantizer='quantized_po2(8)',\n          bias_quantizer='quantized_po2(8)',\n          state_quantizer='quantized_po2(8)',\n          name='qbirnn_0'))(\n              x)\n  x = QDense(\n      4,\n      kernel_quantizer=quantized_bits(8, 2, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(8, 0, 1),\n      name='dense')(\n          x)\n  x = Activation('softmax', name='softmax')(x)\n\n  model = Model(inputs=[x_in], outputs=[x])\n\n  # reload the model to ensure saving/loading works\n  json_string = model.to_json()\n  clear_session()\n  model = quantized_model_from_json(json_string)\n\n  # Save the model as an h5 file using Keras's model.save()\n  fd, fname = tempfile.mkstemp('.h5')\n  model.save(fname)\n  del model  # Delete the existing model\n\n  # Return a compiled model identical to the previous one\n  model = load_qmodel(fname)\n\n  # Clean the created h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  # apply quantizer to weights\n  model_save_quantized_weights(model)\n\n  all_weights = []\n\n  for layer in model.layers:\n    for _, weights in enumerate(layer.get_weights()):\n\n      w = np.sum(weights)\n      all_weights.append(w)\n\n  all_weights = np.array(all_weights)\n  assert all_weights.size == all_weights_signature.size\n  assert np.all(all_weights == all_weights_signature)\n\n  # test forward:\n  inputs = 2 * np.random.rand(10, 2, 4)\n  actual_output = model.predict(inputs).astype(np.float16)\n  assert_allclose(actual_output, expected_output, rtol=1e-4)\n\n\ndef create_network_rnn(rnn):\n  xi = Input((16, 1,))\n  x = rnn(8)(xi)\n  return Model(inputs=xi, outputs=x)\n\n\n@pytest.mark.parametrize('rnn', [SimpleRNN, LSTM, GRU])\ndef test_rnn_conversion(rnn):\n  m = create_network_rnn(rnn)\n  m.use_legacy_config = True\n  name = 'Q' + m.layers[1].__class__.__name__\n  d = {\n      name: {\n          'kernel_quantizer': 'binary',\n          'recurrent_quantizer': 'binary',\n          'bias_quantizer': 'binary',\n          'state_quantizer': 'binary',\n          'activation_quantizer': 'binary',\n      }\n  }\n  if name != 'QSimpleRNN':\n    d[name]['recurrent_activation_quantizer'] = 'binary'\n\n  qq = model_quantize(m, d, 4)\n  assert str(qq.layers[1].kernel_quantizer) == 'binary'\n  assert str(qq.layers[1].recurrent_quantizer) == 'binary'\n  assert str(qq.layers[1].bias_quantizer) == 'binary'\n  assert str(qq.layers[1].state_quantizer) == 'binary'\n  assert str(qq.layers[1].activation) == 'binary()'\n  if name != 'QSimpleRNN':\n    assert str(qq.layers[1].recurrent_activation) == 'binary()'\n\n\ndef create_network_birnn(rnn):\n  xi = Input((16, 1,))\n  x = Bidirectional(rnn(8))(xi)\n  return Model(inputs=xi, outputs=x)\n\n\n@pytest.mark.parametrize('rnn', [SimpleRNN, LSTM, GRU])\ndef test_birnn_conversion(rnn):\n  m = create_network_birnn(rnn)\n  m.use_legacy_config = True\n  name = 'Q' + m.layers[1].layer.__class__.__name__\n  d = {\n      'QBidirectional': {\n          'kernel_quantizer': 'binary',\n          'recurrent_quantizer': 'binary',\n          'bias_quantizer': 'binary',\n          'state_quantizer': 'binary',\n          'activation_quantizer': 'binary',\n      }\n  }\n  if name != 'QSimpleRNN':\n    d['QBidirectional']['recurrent_activation_quantizer'] = 'binary'\n\n  qq = model_quantize(m, d, 4)\n  layer = qq.layers[1].layer\n  assert str(layer.kernel_quantizer) == 'binary'\n  assert str(layer.recurrent_quantizer) == 'binary'\n  assert str(layer.bias_quantizer) == 'binary'\n  assert str(layer.state_quantizer) == 'binary'\n  assert str(layer.activation) == 'binary()'\n  if name != 'QSimpleRNN':\n    assert str(layer.recurrent_activation) == 'binary()'\n  backward_layer = qq.layers[1].backward_layer\n  # backwards weight quantizers are dict because of contraints.serialize\n  assert str(backward_layer.kernel_quantizer['class_name']) == 'binary'\n  assert str(backward_layer.recurrent_quantizer['class_name']) == 'binary'\n  assert str(backward_layer.bias_quantizer['class_name']) == 'binary'\n  assert str(backward_layer.state_quantizer['class_name']) == 'binary'\n  assert str(backward_layer.activation) == 'binary()'\n  if name != 'QSimpleRNN':\n    assert str(backward_layer.recurrent_activation) == 'binary()'\n\n\ndef test_birnn_subrnn():\n  model = Sequential([Bidirectional(LSTM(16)), LSTM(8)])\n  d = {\n      'QLSTM': {\n          'activation_quantizer': 'ternary',\n          'recurrent_activation_quantizer': 'ternary',\n          'kernel_quantizer': 'ternary',\n          'recurrent_quantizer': 'ternary',\n          'bias_quantizer': 'ternary',\n          'state_quantizer': 'ternary',\n      },\n      'QBidirectional': {\n          'activation_quantizer': 'binary',\n          'recurrent_activation_quantizer': 'binary',\n          'kernel_quantizer': 'binary',\n          'recurrent_quantizer': 'binary',\n          'bias_quantizer': 'binary',\n          'state_quantizer': 'binary',\n      }\n  }\n  qmodel = model_quantize(model, d, 4)\n  layer = qmodel.layers[1]\n  assert str(layer.kernel_quantizer) == 'ternary'\n  assert str(layer.recurrent_quantizer) == 'ternary'\n  assert str(layer.bias_quantizer) == 'ternary'\n  assert str(layer.state_quantizer) == 'ternary'\n  assert str(layer.activation) == 'ternary()'\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qseparable_conv2d_transpose_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test layers from qconvolutional.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport tempfile\n\nimport numpy as np\nfrom numpy.testing import assert_allclose, assert_equal\nimport pytest\nimport tensorflow as tf\n\nfrom qkeras import QSeparableConv2DTranspose\nfrom qkeras import quantized_bits\n\n\ndef create_model():\n  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))\n  x = QSeparableConv2DTranspose(\n      filters=2,\n      kernel_size=(2, 2),\n      strides=(2, 2),\n      padding=\"same\",\n      name=\"conv2d_tran\",\n      depthwise_activation=None,\n      pointwise_activation=None,\n      depthwise_kernel_quantizer=None,\n      pointwise_kernel_quantizer=None,\n      bias_quantizer=None,\n  )(x)\n\n  model = tf.keras.Model(inputs=img_input, outputs=x)\n\n  return model\n\n\ndef create_quantized_model():\n  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))\n  x = QSeparableConv2DTranspose(\n      filters=2,\n      kernel_size=(2, 2),\n      strides=(1, 1),\n      padding=\"same\",\n      name=\"conv2d_tran\",\n      depthwise_activation=\"quantized_bits(10, 6, 1)\",\n      pointwise_activation=\"quantized_bits(5, 3, 1)\",\n      depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),\n      pointwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),\n      bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0),\n  )(x)\n\n  model = tf.keras.Model(inputs=img_input, outputs=x)\n\n  return model\n\n\ndef test_qseparable_conv2d_transpose():\n  # By setting the weights and input values manually, we can test\n  # the correctness of the output.\n\n  # Input is (1, 4, 4, 3), with 3 output channels. For i-th channel,\n  # with shape (1, 4, 4, 1), it will convolve with the depthwise kernel at\n  # i-th channel. Depthwise outputs are (1, 8, 8, 3). DW output is then\n  # mapped from input channel(3) to output channel (2) by pointwise conv.\n  # Pointwise conv output is (1, 8, 8, 2).\n\n  # Create model.\n  model = create_model()\n\n  output_shape = model.output_shape\n  ws = model.layers[1].weights\n\n  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])\n  inputs = np.concatenate([x, x, x], axis=-1)\n  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)\n\n  k = tf.ones((2, 2, 1, 1))\n  dw_kernel = tf.concat([k, 1+k, 2+k], axis=-1)\n\n  k = tf.ones((1, 1, 1, 3))\n  pt_kernel = tf.concat([k, 1+k], axis=-2)\n\n  bias = tf.zeros((2,))\n\n  model.layers[1].set_weights([dw_kernel, pt_kernel, bias])\n\n  actual_output = model.predict(inputs).astype(np.float16)\n\n  predicted = np.array(\n      [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.],\n         [0., 0.], [0., 0.]],\n        [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.],\n         [0., 0.], [0., 0.]],\n        [[6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.],\n         [6., 12.], [6., 12.]],\n        [[6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.],\n         [6., 12.], [6., 12.], [6., 12.]],\n        [[12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.],\n         [12., 24.], [12., 24.], [12., 24.]],\n        [[12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.],\n         [12., 24.], [12., 24.], [12., 24.]],\n        [[18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.],\n         [18., 36.], [18., 36.], [18., 36.]],\n        [[18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.],\n         [18., 36.], [18., 36.], [18., 36.]]]])\n\n  assert_equal(output_shape[1:], (8, 8, 2))\n  assert_equal(len(ws), 3)\n\n  # Test if the depthwise conv kernel shape is correct.\n  assert_equal(ws[0].shape, (2, 2, 1, 3))\n\n  # Test if the pointwise conv kernel shape is correct.\n  assert_equal(ws[1].shape, (1, 1, 2, 3))\n\n  # Test if the bias shape is correct.\n  assert_equal(ws[2].shape, (2,))\n\n  # Test if overall output is correct.\n  assert_equal(actual_output, predicted)\n\n\ndef test_quantization_in_separable_conv2d_transpose():\n  # Test if quantization is applied correctly.\n\n  # Create model with quantization.\n  model = create_quantized_model()\n\n  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])\n  inputs = np.concatenate([x, x, x], axis=-1)\n  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)\n\n  k = tf.ones((2, 2, 1, 1))\n  dw_kernel = tf.concat([k, 1+k, 2+k], axis=-1)\n\n  k = tf.ones((1, 1, 1, 3))\n  pt_kernel = tf.concat([k, 1+k], axis=-2)\n\n  bias = tf.ones((2,))\n\n  model.layers[1].set_weights([dw_kernel, pt_kernel, bias])\n\n  actual_output = model.predict(inputs).astype(np.float16)\n\n  qs = model.layers[1].get_quantizers()\n  assert_equal(len(qs), 5)\n  assert_equal(str(qs[0]), \"quantized_bits(1,0,1,alpha=1.0)\")\n  assert_equal(str(qs[1]), \"quantized_bits(1,0,1,alpha=1.0)\")\n  assert_equal(str(qs[2]), \"quantized_bits(2,2,1,alpha=1.0)\")\n  assert_equal(str(qs[3]), \"quantized_bits(10,6,1)\")\n  assert_equal(str(qs[4]), \"quantized_bits(5,3,1)\")\n\n  expected = np.array(\n      [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.]],\n        [[3., 3.], [6., 6.], [6., 6.], [6., 6.]],\n        [[7.5, 7.5], [7.5, 7.5], [7.5, 7.5], [7.5, 7.5]],\n        [[7.5, 7.5], [7.5, 7.5], [7.5, 7.5], [7.5, 7.5]]]]\n  )\n\n  assert_equal(actual_output, expected)\n\n\ndef test_save_and_load_model():\n  # Test if the model can be loaded from a saved model.\n  model = create_quantized_model()\n\n  fd, fname = tempfile.mkstemp(\".hdf5\")\n  model.save(fname)\n\n  custom_object = {\n      \"QSeparableConv2DTranspose\": QSeparableConv2DTranspose,\n  }\n\n  model_loaded = tf.keras.models.load_model(\n      fname, custom_objects=custom_object)\n\n  # Clean the h5 file after loading the model\n  os.close(fd)\n  os.remove(fname)\n\n  model_weights = model.layers[1].weights\n  loaded_model_weights = model_loaded.layers[1].weights\n\n  assert_equal(len(model_weights), len(loaded_model_weights))\n  for i, model_weight in enumerate(model_weights):\n    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qtools_model_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for various model architectures.\"\"\"\n\nimport json\n\nimport numpy as np\nimport pytest\nimport tensorflow.keras as keras\nimport tensorflow as tf\n\nfrom qkeras import QActivation\nfrom qkeras import QAdaptiveActivation\nfrom qkeras import QBatchNormalization\nfrom qkeras import QConv2D\nfrom qkeras import QDepthwiseConv2D\nfrom qkeras import QDense\nfrom qkeras import quantizers\nfrom qkeras.qtools import interface\nfrom qkeras.qtools import qgraph\nfrom qkeras.qtools import run_qtools\nfrom qkeras.qtools import settings as qtools_settings\nfrom qkeras.qtools.quantized_operators import divider_factory\nfrom qkeras.qtools import generate_layer_data_type_map\nfrom qkeras.utils import model_save_quantized_weights\nfrom qkeras.qtools.quantized_operators import adder_impl\nfrom qkeras.qtools.quantized_operators import quantizer_impl\nfrom qkeras.qtools.DnC import divide_and_conquer\n\n\ndef qdense_model_fork():\n  x = x_in = keras.layers.Input((23,), name=\"input\")\n  x = QDense(\n      10,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizers.quantized_po2(3, 1),\n      name=\"qdense_0\")(x)\n  x = QDense(\n      20,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizers.quantized_relu(6, 2),\n      name=\"qdense_1\")(x)\n  x = QActivation(\"quantized_relu(4)\", name=\"QA_2\")(x)\n  x_1 = QDense(\n      30,\n      kernel_quantizer=quantizers.binary(),\n      bias_quantizer=quantizers.binary(),\n      name=\"qdense_3\")(x)\n  x_2 = QActivation(\"quantized_relu(6,2)\", name=\"QA_3\")(x)\n\n  model = keras.Model(\n      inputs=[x_in], outputs=[x_1, x_2,])\n  return model\n\n\ndef qconv_model():\n  x = x_in = keras.layers.Input((23, 23, 1), name=\"input\")\n  x = QActivation(\"quantized_relu(4)\", name=\"QA_0\")(x)\n  x = QConv2D(\n      16, 2, 2,\n      kernel_quantizer=quantizers.binary(),\n      bias_quantizer=quantizers.ternary(),\n      name=\"qconv2d_1\")(x)\n  x = QConv2D(\n      8, 2, 2,\n      kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n      activation=quantizers.quantized_relu(6, 2),\n      name=\"qconv2D_2\")(x)\n  x = QConv2D(\n      2, 2, 2,\n      kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n      activation=quantizers.quantized_relu(6, 2),\n      name=\"qconv2d_3\")(x)\n  x = QActivation(\"quantized_bits(6, 0, 1)\", name=\"QA_4\")(x)\n\n  model = keras.Model(\n      inputs=[x_in], outputs=[x])\n  return model\n\n\ndef po2_qbits_model():\n  x = x_in = keras.layers.Input((23, 23, 1), name=\"input\")\n  x = QActivation(\"quantized_relu_po2(3, 2)\", name=\"QA_0\")(x)\n  x = QConv2D(\n      16, 2, 2,\n      kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n      name=\"qconv2d_1\")(x)\n\n  model = keras.Model(inputs=[x_in], outputs=[x])\n  return model\n\n\ndef float_po2_model():\n  x = x_in = keras.layers.Input((23, 23, 1), name=\"input\")\n  x = QConv2D(\n      16, 2, 2,\n      kernel_quantizer=quantizers.quantized_po2(5, 0),\n      bias_quantizer=quantizers.quantized_po2(5, 0),\n      name=\"qconv2d_1\")(x)\n  x = QActivation(\"quantized_relu_po2(3, 2)\", name=\"QA_0\")(x)\n  x = QConv2D(\n      10, 2, 2,\n      kernel_quantizer=quantizers.quantized_bits(5, 2, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 2, 1),\n      name=\"qconv2d_0\")(x)\n  model = keras.Model(\n      inputs=[x_in], outputs=[x])\n\n  for layer in model.layers:\n    print(layer)\n    print(layer.output_shape)\n  return model\n\n\ndef qbn_model(\n    act=\"binary(use_01=0)\",\n    gamma=quantizers.quantized_relu_po2(4, 2),\n    variance=quantizers.quantized_relu_po2(4, 2),\n    beta=None, mean=None):\n\n  x = x_in = keras.layers.Input((23, 23, 1), name=\"input\")\n  x = QActivation(act, name=\"QA_0\")(x)\n  x = QBatchNormalization(\n      gamma_quantizer=gamma,\n      variance_quantizer=variance,\n      beta_quantizer=beta,\n      mean_quantizer=mean,\n      gamma_range=8, beta_range=4, name=\"qbn_1\")(x)\n\n  model = keras.Model(\n      inputs=[x_in], outputs=[x])\n\n  return model\n\n\ndef qbn_model_inference():\n\n  x = x_in = keras.layers.Input((23, 23, 1), name=\"input\")\n  x = QConv2D(\n      4, 2, 23,\n      kernel_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0),\n      bias_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0),\n      use_bias=False,\n      name=\"qconv2d_1\")(x)\n  x = QBatchNormalization(\n      mean_quantizer=quantizers.quantized_bits(6, 0, 1),\n      gamma_quantizer=None,\n      variance_quantizer=None,\n      beta_quantizer=quantizers.quantized_bits(6, 0, 1),\n      inverse_quantizer=quantizers.quantized_bits(16, 0, 1),\n      scale=False,\n      center=False,\n      gamma_range=8, beta_range=4, name=\"qbn_2\")(x)\n  x = QActivation(activation=\"quantized_bits(5, 0, 1)\", name=\"act\")(x)\n  x = QConv2D(\n      2, 1, 1,\n      kernel_quantizer=quantizers.quantized_bits(3, 0),\n      bias_quantizer=quantizers.quantized_bits(3, 2),\n      name=\"qconv2d_3\")(x)\n  # Add an extra QNormalization here to test auto_po2 type of inverse_quantizer\n  # in batchnorm fusing.\n  x = QBatchNormalization(\n      mean_quantizer=quantizers.quantized_bits(6, 0, 1),\n      gamma_quantizer=None,\n      variance_quantizer=None,\n      beta_quantizer=quantizers.quantized_bits(6, 0, 1),\n      inverse_quantizer=quantizers.quantized_bits(8, 0, 1, alpha=\"auto_po2\"),\n      scale=False,\n      center=False,\n      gamma_range=8, beta_range=4, name=\"qbn_4\")(x)\n\n  model = keras.Model(inputs=[x_in], outputs=[x])\n  model.compile(loss=\"mse\", run_eagerly=True)\n  model.get_layer(\"qconv2d_1\").set_weights([\n      np.array([[[[0.11, -0.5, -0.14, -0.41]], [[-0.4, 0.9, 0.6, -1.]]],\n                [[[-0.35, 1., 0.54, 0.17]], [[0.39, -0.2, -0.41, -0.7]]]])\n  ])\n  model.get_layer(\"qbn_2\").set_weights(\n      [np.array([0., 0, 0, 0.]),\n       np.array([1, 1, 1, 1])])\n  model.get_layer(\"qconv2d_3\").set_weights([\n      np.array([[[[1.2, -1.5], [10., 1.3], [-0.7, 1.2], [1.7, 1.5]]]]),\n      np.array([0.7, 0.8])\n  ])\n  model.get_layer(\"qbn_4\").set_weights(\n      [np.array([0, 0]), np.array([0.3, 16.8])])\n\n  hw_weight_dict = model_save_quantized_weights(model)\n  return (hw_weight_dict, model)\n\n\ndef add_qmodel(quantizer1, quantizer2, quantizer3):\n\n  # Layer that add a list of inputs.\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n\n  x1 = input1 = keras.layers.Input((16,), name=\"input_0\")\n  x1 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizer1, name=\"dense_0\")(x1)\n\n  x2 = input2 = keras.layers.Input(shape=(32,), name=\"input_1\")\n  x2 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizer2, name=\"dense_1\")(x2)\n\n  x3 = input3 = keras.layers.Input(shape=(64,), name=\"input_2\")\n  x3 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizer3, name=\"dense_2\")(x3)\n\n  x = keras.layers.add([x1, x2, x3], name=\"add\")\n\n  model = keras.Model(\n      inputs=[input1, input2, input3], outputs=[x])\n\n  return model\n\n\ndef multiply_qmodel():\n\n  # element-wise multiply a list of inputs.\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n  x1 = input1 = keras.layers.Input((16,), name=\"input_0\")\n  x1 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizers.quantized_bits(4, 0, 1),\n      name=\"dense_0\")(x1)\n\n  x2 = input2 = keras.layers.Input(shape=(32,), name=\"input_1\")\n  x2 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizers.quantized_bits(5, 0, 1),\n      name=\"dense_1\")(x2)\n\n  x3 = input3 = keras.layers.Input(shape=(64,), name=\"input_2\")\n  x3 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizers.quantized_bits(6, 0, 1),\n      name=\"dense_2\")(x3)\n\n  x = keras.layers.multiply([x1, x2, x3], name=\"multiply\")\n  model = keras.Model(\n      inputs=[input1, input2, input3], outputs=[x])\n\n  return model\n\n\ndef pooling_qmodel():\n\n  # Average pooling and global average pooling operation for spatial data.\n  x = input1 = keras.layers.Input((16, 16, 3), name=\"input\")\n  x = keras.layers.AveragePooling2D(pool_size=(2, 2), padding=\"valid\",\n                                    name=\"avg_pooling\")(x)\n  x = keras.layers.GlobalAveragePooling2D(name=\"global_avg_pooling\")(x)\n\n  model = keras.Model(inputs=[input1], outputs=[x])\n\n  return model\n\n\ndef maximum_qmodel(quantizer1, quantizer2, quantizer3):\n\n  # element-wise maximum/minimum/average of a list of inputs.\n  # It takes as input a list of tensors, all of the same shape,\n  # and returns a single tensor (also of the same shape).\n  x1 = input1 = keras.layers.Input((16,), name=\"input_0\")\n  x1 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizer1, name=\"qdense_0\")(x1)\n\n  x2 = input2 = keras.layers.Input(shape=(32,), name=\"input_1\")\n  x2 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizer2, name=\"dense_1\")(x2)\n\n  x3 = input3 = keras.layers.Input(shape=(64,), name=\"input_2\")\n  x3 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      activation=quantizer3, name=\"dense_2\")(x3)\n\n  x = keras.layers.maximum([x1, x2, x3], name=\"maximum\")\n  model = keras.Model(\n      inputs=[input1, input2, input3], outputs=[x])\n\n  return model\n\n\ndef concatenate_qmodel(quantizer1, quantizer2, quantizer3):\n\n  # Layer that concatenates a list of inputs.\n  # It takes as input a list of tensors, all of the same shape except\n  # for the concatenation axis, and returns a single tensor,\n  # the concatenation of all inputs..\n\n  x1 = input1 = keras.layers.Input((16, 16, 1), name=\"input_0\")\n  x1 = QConv2D(\n      16, 2, 2,\n      kernel_quantizer=quantizer1,\n      bias_quantizer=quantizer1,\n      name=\"conv2d_0\")(x1)\n\n  x2 = input2 = keras.layers.Input((16, 16, 1), name=\"input_1\")\n  x2 = QConv2D(\n      32, 2, 2,\n      kernel_quantizer=quantizer2,\n      bias_quantizer=quantizer2,\n      name=\"conv2d_1\")(x2)\n\n  x3 = input3 = keras.layers.Input((16, 16, 1), name=\"input_2\")\n  x3 = QConv2D(\n      64, 2, 2,\n      kernel_quantizer=quantizer3,\n      bias_quantizer=quantizer3,\n      name=\"conv2d_2\")(x3)\n\n  x = keras.layers.concatenate([x1, x2, x3], axis=-1, name=\"concatenate\")\n  model = keras.Model(inputs=[input1, input2, input3], outputs=[x])\n\n  return model\n\n\ndef run(model, input_quantizers, is_inference=False,\n        verbose=False, hw_weight_dict=None):\n  (graph, source_quantizer_list) = qgraph.CreateGraph(\n      model, input_quantizers)\n  # qgraph.PrintGraph(graph)\n  qgraph.GraphPropagateActivationsToEdges(graph)\n\n  layer_map = generate_layer_data_type_map.generate_layer_data_type_map(\n      graph=graph, source_quantizer_list=source_quantizer_list,\n      is_inference=is_inference, hw_weight_dict=hw_weight_dict)\n\n  # interface.print_layer_data_type_map(dict)\n  output_dict = interface.map_to_json(layer_map)\n\n  if verbose:\n    dict_to_json = json.dumps(output_dict, indent=4)\n    print(dict_to_json)\n\n  return output_dict\n\n\ndef test_wrong_input_quantizers():\n  input_quantizers = [\n      quantizers.quantized_bits(4, 0, 1),\n      quantizers.quantized_bits(5, 0, 1),\n      quantizers.quantized_bits(6, 0, 1)\n  ]\n  # INPUT_QUANTIZERS = None\n  x1 = input1 = keras.layers.Input((16,), name=\"input_0\")\n  x1 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      name=\"dense_0\")(x1)\n  x2 = input2 = keras.layers.Input(shape=(32,), name=\"input_1\")\n  x2 = QDense(\n      8,\n      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n      name=\"dense_1\")(x2)\n  x = keras.layers.add([x1, x2], name=\"add\")\n\n  model = keras.Model(\n      inputs=[input1, input2], outputs=[x])\n\n  with pytest.raises(qgraph.WrongInputQuantizerError):\n    run(model, input_quantizers)\n\n\ndef test_qbn_inference():\n  input_quantizers = [quantizers.quantized_bits(4, 0, 1)]\n  (hw_weight_dict, model) = qbn_model_inference()\n\n  dtype_dict = run(model, input_quantizers, is_inference=True,\n                   hw_weight_dict=hw_weight_dict)\n  multiplier = dtype_dict[\"qconv2d_1\"][\"multiplier\"]\n  accumulator = dtype_dict[\"qconv2d_1\"][\"accumulator\"]\n  output = dtype_dict[\"qconv2d_1\"][\"output_quantizer\"]\n  fused_accumulator = dtype_dict[\"qconv2d_1\"][\"fused_accumulator\"]\n\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 7\n  assert multiplier[\"int_bits\"] == 1\n  assert multiplier[\"is_signed\"] == 1\n  assert multiplier[\"op_type\"] == \"mul\"\n\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 9\n  assert accumulator[\"int_bits\"] == 3\n  assert accumulator[\"is_signed\"] == 1\n  assert accumulator[\"op_type\"] == \"add\"\n\n  assert fused_accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert fused_accumulator[\"bits\"] == 25\n  assert fused_accumulator[\"int_bits\"] == 4\n  assert accumulator[\"is_signed\"] == 1\n  assert fused_accumulator[\"op_type\"] == \"add\"\n\n  # Tests auto_po2 type of quantizer in conv2d and batchnorm fusing. Here\n  # we set the layer weights in a way that scale value would be !=1 so that\n  # we need to check bits and int_bits are adjusted properly to incorporate\n  # the scale value.\n  multiplier = dtype_dict[\"qconv2d_3\"][\"multiplier\"]\n  accumulator = dtype_dict[\"qconv2d_3\"][\"accumulator\"]\n  output = dtype_dict[\"qconv2d_3\"][\"output_quantizer\"]\n  fused_accumulator = dtype_dict[\"qconv2d_3\"][\"fused_accumulator\"]\n\n  # w_bits = 3, w_intbits =0\n  # x_bits = 5, x_intbits =0\n  # weight scale = [[[[16.  2.]]]]\n  # before scale adjustment: m_bits=(3-1)+(5-1)+1=7   m_intbits = 0\n  # after scale adjustment: m_bits=7+(log16-log2)=10  m_intbits = 0+log16=4\n  # Note: dict here added sign bit to the intbit to match hardware format.\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 10\n  assert multiplier[\"int_bits\"] == 5\n  assert multiplier[\"is_signed\"] == 1\n  assert multiplier[\"op_type\"] == \"mul\"\n\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 13\n  assert accumulator[\"int_bits\"] == 8\n  assert accumulator[\"is_signed\"] == 1\n  assert accumulator[\"op_type\"] == \"add\"\n\n  # Calculates fused_accumulator according to fused_bn_factory/FusedBNFactory.\n  # For example, wiht inv_quantizer scale:[2. 2.] we have here,\n  # multiplier_x before adjust - bits:19 int_bits:6\n  # multiplier_x after adjust - bits:19 int_bits:7\n  assert fused_accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert fused_accumulator[\"bits\"] == 20\n  assert fused_accumulator[\"int_bits\"] == 9\n  assert accumulator[\"is_signed\"] == 1\n  assert fused_accumulator[\"op_type\"] == \"add\"\n\n\ndef test_invalid_denominator_qbn():\n  input_quantizers = None\n  act = \"binary(use_01=0)\"\n  gamma = quantizers.ternary()\n  variance = gamma\n  model = qbn_model(\n      act=act, gamma=gamma, variance=variance,\n      beta=None, mean=None)\n  with pytest.raises(divider_factory.UnacceptedQuantizerError):\n    run(model, input_quantizers)\n\n\ndef test_conv2d():\n  input_quantizers = None\n\n  act = \"quantized_bits(6, 0, 1)\"\n  weight = quantizers.quantized_relu_po2(4, 2)\n  x = x_in = keras.layers.Input((23, 23, 1), name=\"input\")\n  x = QActivation(act, name=\"QA_0\")(x)\n  x = QConv2D(\n      16, 2, 2,\n      kernel_quantizer=weight,\n      bias_quantizer=weight,\n      name=\"qconv2d_1\")(x)\n\n  model = keras.Model(inputs=[x_in], outputs=[x])\n\n  dtype_dict = run(model, input_quantizers)\n  multiplier = dtype_dict[\"qconv2d_1\"][\"multiplier\"]\n  accumulator = dtype_dict[\"qconv2d_1\"][\"accumulator\"]\n  op_count = dtype_dict[\"qconv2d_1\"][\"operation_count\"]\n\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 15\n  assert multiplier[\"int_bits\"] == 2\n  assert multiplier[\"is_signed\"] == 1\n  assert multiplier[\"op_type\"] == \"shifter\"\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 18\n  assert accumulator[\"int_bits\"] == 5\n  assert accumulator[\"is_signed\"] == 1\n  assert accumulator[\"op_type\"] == \"add\"\n  assert op_count == 7744\n\n\ndef test_qdense_model_fork():\n  input_quantizers = [quantizers.quantized_bits(4, 0, 1)]\n  model = qdense_model_fork()\n  dtype_dict = run(model, input_quantizers)\n\n  multiplier = dtype_dict[\"qdense_3\"][\"multiplier\"]\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 5\n  assert multiplier[\"int_bits\"] == 1\n  assert multiplier[\"is_signed\"] == 1\n  assert multiplier[\"op_type\"] == \"mux\"\n\n  accumulator = dtype_dict[\"qdense_3\"][\"accumulator\"]\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 11\n  assert accumulator[\"int_bits\"] == 7\n  assert accumulator[\"is_signed\"] == 1\n  assert accumulator[\"op_type\"] == \"add\"\n\n\ndef test_util_layers():\n  input_quantizers = None  # quantizers.quantized_bits(4, 0, 1)\n\n  act = \"quantized_bits(6, 0, 1)\"\n  x = x_in = keras.layers.Input((24, 24, 1), name=\"input\")\n  x = QActivation(act, name=\"QA_0\")(x)\n  x = keras.layers.Reshape((12 * 12, 4, 1), name=\"reshape_1\")(x)\n  x = keras.layers.MaxPooling2D(\n      pool_size=(2, 2), name=\"maxpooling_2\")(x)\n  x = keras.layers.Flatten(name=\"flatten_3\")(x)\n  x = QDense(\n      30,\n      kernel_quantizer=quantizers.binary(use_01=1),\n      bias_quantizer=quantizers.binary(use_01=1),\n      activation=quantizers.quantized_po2(3, 2),\n      name=\"qdense_4\")(x)\n\n  model = keras.Model(inputs=[x_in], outputs=[x])\n  dtype_dict = run(model, input_quantizers)\n\n  multiplier = dtype_dict[\"qdense_4\"][\"multiplier\"]\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 6\n  assert multiplier[\"int_bits\"] == 1\n  assert multiplier[\"is_signed\"] == 1\n  assert multiplier[\"op_type\"] == \"and\"\n\n  accumulator = dtype_dict[\"qdense_4\"][\"accumulator\"]\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 15\n  assert accumulator[\"int_bits\"] == 10\n  assert accumulator[\"is_signed\"] == 1\n  assert accumulator[\"op_type\"] == \"add\"\n\n  output = dtype_dict[\"qdense_4\"][\"output_quantizer\"]\n  assert output[\"quantizer_type\"] == \"quantized_po2\"\n  assert output[\"bits\"] == 3\n  assert output[\"is_signed\"] == 1\n  assert output[\"max_value\"] == 2\n\n\ndef test_merge_layers():\n  input_quantizers = [\n      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1),\n      quantizers.quantized_bits(6, 0, 1)]\n  model = add_qmodel(\n      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0),\n      quantizers.quantized_bits(6, 0, 1))\n  dtype_dict = run(model, input_quantizers)\n  merge_quantizer = dtype_dict[\"add\"][\"Add_quantizer\"]\n  assert merge_quantizer[\"quantizer_type\"] == \"quantized_bits\"\n  assert merge_quantizer[\"bits\"] == 7\n  assert merge_quantizer[\"int_bits\"] == 2\n  assert merge_quantizer[\"is_signed\"] == 1\n\n  model = multiply_qmodel()\n  dtype_dict = run(model, input_quantizers)\n  merge_quantizer = dtype_dict[\"multiply\"][\"Multiply_quantizer\"]\n  assert merge_quantizer[\"quantizer_type\"] == \"quantized_bits\"\n  assert merge_quantizer[\"bits\"] == 13\n  assert merge_quantizer[\"int_bits\"] == 1\n  assert merge_quantizer[\"is_signed\"] == 1\n  assert merge_quantizer[\"op_type\"] == \"mul\"\n\n  model = maximum_qmodel(\n      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0),\n      quantizers.quantized_bits(6, 0, 1))\n  dtype_dict = run(model, input_quantizers)\n  merge_quantizer = dtype_dict[\"maximum\"][\"Maximum_quantizer\"]\n  assert merge_quantizer[\"quantizer_type\"] == \"quantized_bits\"\n  assert merge_quantizer[\"bits\"] == 6\n  assert merge_quantizer[\"int_bits\"] == 1\n  assert merge_quantizer[\"is_signed\"] == 1\n\n  model = concatenate_qmodel(\n      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0),\n      quantizers.quantized_bits(6, 0, 1))\n  dtype_dict = run(model, input_quantizers)\n  merge_quantizer = dtype_dict[\"concatenate\"][\"Concatenate_quantizer\"]\n  assert merge_quantizer[\"quantizer_type\"] == \"quantized_bits\"\n  assert merge_quantizer[\"bits\"] == 14\n  assert merge_quantizer[\"int_bits\"] == 4\n  assert merge_quantizer[\"is_signed\"] == 1\n\n\ndef test_pooling():\n  input_quantizers = [quantizers.quantized_bits(8, 0, 1)]\n  model = pooling_qmodel()\n  dtype_dict = run(model, input_quantizers)\n\n  accumulator = dtype_dict[\"avg_pooling\"][\"pool_sum_accumulator\"]\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 10\n  assert accumulator[\"int_bits\"] == 3\n\n  accumulator = dtype_dict[\"global_avg_pooling\"][\"pool_sum_accumulator\"]\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 16\n  assert accumulator[\"int_bits\"] == 9\n\n\ndef test_qenergy():\n  x = x_in = keras.layers.Input((784,), name=\"input\")\n  x = QDense(\n      300,\n      kernel_quantizer=quantizers.binary(),\n      bias_quantizer=quantizers.binary(),\n      name=\"d0\")(x)\n  x = QActivation(\"quantized_relu(4,0)\", name=\"d0_qr4\")(x)\n  x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n             bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n             name=\"d1\")(x)\n  x = QAdaptiveActivation(\"quantized_relu\", 4, name=\"d1_qr4\")(x)\n  x = QDense(\n      10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),\n      bias_quantizer=quantizers.quantized_bits(4, 0, 1),\n      name=\"d2\")(x)\n  x = keras.layers.Activation(\"softmax\", name=\"softmax\")(x)\n\n  model = keras.Model(inputs=[x_in], outputs=[x])\n  # print(model.summary())\n\n  reference_internal = \"int8\"\n  reference_accumulator = \"int32\"\n\n  # get reference energy cost\n  q = run_qtools.QTools(\n      model, process=\"horowitz\",\n      source_quantizers=reference_internal,\n      is_inference=False, weights_path=None,\n      keras_quantizer=reference_internal,\n      keras_accumulator=reference_accumulator,\n      for_reference=True)\n\n  ref_energy_dict = q.pe(\n      weights_on_memory=\"sram\",\n      activations_on_memory=\"sram\",\n      min_sram_size=8*16*1024*1024,\n      rd_wr_on_io=False)\n  reference_size = q.extract_energy_sum(\n      qtools_settings.cfg.include_energy, ref_energy_dict)\n\n  # get trial energy cost\n  q = run_qtools.QTools(\n      model, process=\"horowitz\",\n      source_quantizers=reference_internal,\n      is_inference=False, weights_path=None,\n      keras_quantizer=reference_internal,\n      keras_accumulator=reference_accumulator,\n      for_reference=False)\n  trial_energy_dict = q.pe(\n      weights_on_memory=\"sram\",\n      activations_on_memory=\"sram\",\n      min_sram_size=8*16*1024*1024,\n      rd_wr_on_io=False)\n  trial_size = q.extract_energy_sum(\n      qtools_settings.cfg.include_energy, trial_energy_dict)\n\n  # Reference energy number is now updated with keras_accumulator as\n  # output quantizer\n  tmp = ref_energy_dict[\"d0\"][\"energy\"]\n  assert tmp[\"inputs\"] == pytest.approx(372.77, abs=0.1)\n  assert tmp[\"outputs\"] == pytest.approx(570.57, abs=0.1)\n  assert tmp[\"parameters\"] == pytest.approx(111975.96, abs=0.1)\n  assert tmp[\"op_cost\"] == pytest.approx(70560.0, abs=0.1)\n\n  tmp = ref_energy_dict[\"d1\"][\"energy\"]\n  assert tmp[\"inputs\"] == pytest.approx(570.57, abs=0.1)\n  assert tmp[\"outputs\"] == pytest.approx(190.19, abs=0.1)\n  assert tmp[\"parameters\"] == pytest.approx(14313.66, abs=0.1)\n  assert tmp[\"op_cost\"] == pytest.approx(26500.0, abs=0.1)\n\n  tmp = ref_energy_dict[\"d2\"][\"energy\"]\n  assert tmp[\"inputs\"] == pytest.approx(190.19, abs=0.1)\n  assert tmp[\"outputs\"] == pytest.approx(19.02, abs=0.1)\n  assert tmp[\"parameters\"] == pytest.approx(483.08, abs=0.1)\n  assert tmp[\"op_cost\"] == pytest.approx(883.33, abs=0.1)\n\n  # Trial\n  tmp = trial_energy_dict[\"d0\"][\"energy\"]\n  assert tmp[\"inputs\"] == pytest.approx(372.77, abs=0.1)\n  assert tmp[\"outputs\"] == pytest.approx(342.34, abs=0.1)\n  assert tmp[\"parameters\"] == pytest.approx(13997.95, abs=0.1)\n  assert tmp[\"op_cost\"] == pytest.approx(15729.0, abs=0.1)\n\n  tmp = trial_energy_dict[\"d1\"][\"energy\"]\n  assert tmp[\"inputs\"] == pytest.approx(72.27, abs=0.1)\n  assert tmp[\"outputs\"] == pytest.approx(110.31, abs=0.1)\n  assert tmp[\"parameters\"] == pytest.approx(7158.73, abs=0.1)\n  assert tmp[\"op_cost\"] == pytest.approx(3250.0, abs=0.1)\n\n  tmp = trial_energy_dict[\"d2\"][\"energy\"]\n  assert tmp[\"inputs\"] == pytest.approx(26.63, abs=0.1)\n  assert tmp[\"outputs\"] == pytest.approx(11.41, abs=0.1)\n  assert tmp[\"parameters\"] == pytest.approx(243.44, abs=0.1)\n  assert tmp[\"op_cost\"] == pytest.approx(102.08, abs=0.1)\n\n  # print(ref_energy_dict)\n  # print(trial_energy_dict)\n  assert int(reference_size) == 226629\n  assert int(trial_size) == 41070\n\n\ndef test_quntized_reference_energy_same_as_floating_trial():\n  # Test if reference energy from quantized model and floating model is the\n  # same\n  def get_model(quantize=False):\n    x1 = input1 = keras.layers.Input((16, 16, 3), name=\"input_0\")\n    if quantize:\n      x1 = QConv2D(\n          16, 2, 2,\n          kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n          bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n          name=\"conv_0\")(x1)\n    else:\n      x1 = keras.layers.Conv2D(16, 2, 2, name=\"conv_0\")(x1)\n\n    x2 = input2 = keras.layers.Input(shape=(16, 16, 3), name=\"input_1\")\n    if quantize:\n      x2 = QConv2D(\n          16, 2, 2,\n          kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n          bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n          name=\"conv_1\")(x2)\n    else:\n      x2 = keras.layers.Conv2D(16, 2, 2, name=\"conv_1\")(x2)\n\n    x = keras.layers.add([x1, x2], name=\"add\")\n    if quantize:\n      x = QActivation(activation=\"quantized_relu(8, 2)\", name=\"relu\")(x)\n    else:\n      x = keras.layers.Activation(\"relu\", name=\"relu\")(x)\n\n    if quantize:\n      x = QConv2D(\n          2, 2, 2,\n          kernel_quantizer=quantizers.quantized_bits(5, 0, 1),\n          bias_quantizer=quantizers.quantized_bits(5, 0, 1),\n          name=\"conv_2\")(x)\n    else:\n      x = keras.layers.Conv2D(2, 2, 2, name=\"conv_2\")(x)\n\n    model = keras.Model(inputs=[input1, input2], outputs=[x])\n    return model\n\n  def get_qenergy(model, qenergy_config, for_reference):\n    q = run_qtools.QTools(\n        model, process=qenergy_config[\"process\"],\n        source_quantizers=qenergy_config[\"reference_internal\"],\n        is_inference=qenergy_config[\"trained_model\"],\n        weights_path=None,\n        keras_quantizer=qenergy_config[\"reference_internal\"],\n        keras_accumulator=qenergy_config[\"reference_accumulator\"],\n        for_reference=for_reference)\n\n    # caculate energy of the derived data type map.\n    energy_dict = q.pe(\n        weights_on_memory=qenergy_config[\"parameters_on_memory\"],\n        activations_on_memory=qenergy_config[\"activations_on_memory\"],\n        min_sram_size=qenergy_config[\"min_sram_size\"],\n        rd_wr_on_io=qenergy_config[\"rd_wr_on_io\"])\n\n    total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy,\n                                        energy_dict)\n\n    return q, total_energy\n\n  qenergy_config = {\n      \"trained_model\": True,\n      \"delta_p\": 8.0,\n      \"delta_n\": 8.0,\n      \"rate\": 2.0,\n      \"stress\": 1.0,\n      \"process\": \"horowitz\",\n      \"parameters_on_memory\": \"sram\",\n      \"activations_on_memory\": \"sram\",\n      \"rd_wr_on_io\": False,\n      \"min_sram_size\": 0,\n      \"source_quantizers\": [\"quantizers.quantized_bits(8, 0, 1)\"],\n      \"reference_internal\": \"int8\",\n      \"reference_accumulator\": \"int32\"\n  }\n\n  float_model = get_model(quantize=False)\n  quantized_model = get_model(quantize=True)\n\n  _, float_reference_energy = get_qenergy(\n      float_model, qenergy_config, for_reference=False)\n  _, float_trial_energy = get_qenergy(\n      float_model, qenergy_config, for_reference=True)\n  _, quantized_reference_energy = get_qenergy(\n      quantized_model, qenergy_config, for_reference=True)\n\n  assert float_reference_energy == quantized_reference_energy\n  assert float_reference_energy == float_trial_energy\n\n\ndef test_auto_po2():\n  def gen_model(img_shape):\n    img_input = x = keras.Input(shape=img_shape)\n    x = QConv2D(\n        filters=5, kernel_size=4, strides=4,\n        kernel_quantizer=quantizers.quantized_bits(8, 3, alpha=\"auto_po2\"),\n        bias_quantizer=quantizers.quantized_bits(8, 3),\n        name=\"conv\")(x)\n    x = QActivation(activation=quantizers.quantized_relu(4, 0), name=\"act\")(x)\n    x = keras.layers.Flatten(name=\"flatten\")(x)\n    x = QDense(5,\n               kernel_quantizer=quantizers.quantized_bits(\n                   8, 0, alpha=\"auto_po2\"),\n               bias_quantizer=quantizers.quantized_bits(8, 3),\n               name=\"dense\")(x)\n    model = keras.Model(inputs=img_input, outputs=[x])\n    return model\n\n  model = gen_model((32, 32, 3,))\n  model.compile(loss=\"mse\", run_eagerly=True)\n  model.layers[1].quantizers[0].scale = tf.constant(\n      [[[[0.0625, 0.0625, 0.0625, 0.0625, 0.03125]]]])\n  model.layers[4].quantizers[0].scale = tf.constant([[0.5, 0.5, 1, 0.5, 0.25]])\n  input_quantizers = [\n      quantizers.quantized_bits(bits=8, integer=0, keep_negative=False)\n  ]\n  dtype_dict = run(model, input_quantizers)\n\n  # Original multiplier has 16 bits(16=8+8) and 3 int_bits\n  multiplier = dtype_dict[\"conv\"][\"multiplier\"]\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 16\n  assert multiplier[\"int_bits\"] == 4\n\n  # Original accumulator has 16+log2(4*4*3)+1 bits,\n  # and 4+log2(4*4*3)+1 int_bits\n  accumulator = dtype_dict[\"conv\"][\"accumulator\"]\n  assert accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert accumulator[\"bits\"] == 23\n  assert accumulator[\"int_bits\"] == 11\n\n  # adjusting multiplier with auto_po2:\n  # bits = max_fractional_bits + max_int_bits = bits + max_shift - min_shift\n  # max_shift = log2(0.0625) = -4\n  # min_shift=log2(0.03125) = -5\n  # So adjusted multiplier bits=17, 1 bit bigger than original multiplier.\n  # Modified multiplier int_bits = int_bits + max_shift = 3 - 4 = -1\n  # Because in datatype map we add int_bits with 1 extra sign bit,\n  # adjusted multiplier int_bits = 0, 4 bit smaller than original multiplier.\n  # When we pass the adjusted multiplier to fused_accumulator, we\n  # get bits = 23+1=24, and int_bits = 11-4=7\n  fused_accumulator = dtype_dict[\"conv\"][\"fused_accumulator\"]\n  assert fused_accumulator[\"quantizer_type\"] == \"quantized_bits\"\n  assert fused_accumulator[\"bits\"] == 24\n  assert fused_accumulator[\"int_bits\"] == 7\n\n  multiplier = dtype_dict[\"dense\"][\"multiplier\"]\n  assert multiplier[\"quantizer_type\"] == \"quantized_bits\"\n  assert multiplier[\"bits\"] == 12\n  assert multiplier[\"int_bits\"] == 1\n\n\ndef test_big_bias_quantizer():\n  q1 = quantizer_impl.QuantizedBits()\n  q1.convert_qkeras_quantizer(quantizers.quantized_bits(8, 3))\n  q2 = quantizer_impl.QuantizedBits()\n  q2.convert_qkeras_quantizer(quantizers.quantized_bits(16, 4))\n  r = adder_impl.FixedPointAdder(q1, q2)\n\n  # int_bits = max(q1.int_bits, q2.int_bits) + 1\n  # bits = int_bits + sign_bit + max(q1_fraction_bit, q2_fraction bit)\n  assert r.output.bits == 17\n  assert r.output.int_bits == 5\n\n\ndef test_qdepthwiseconv2d():\n  x = x_in = keras.layers.Input((64, 64, 3), name=\"input\")\n  x = QDepthwiseConv2D(\n      kernel_size=(1, 7),\n      depthwise_quantizer=quantizers.quantized_bits(8, 0, 1, alpha=1.0),\n      bias_quantizer=quantizers.quantized_bits(12, 6, 1, alpha=1.0),\n      name=\"dw_conv\")(x)\n  x = QConv2D(\n      filters=16,\n      kernel_size=(1, 1),\n      bias_quantizer=quantizers.quantized_bits(12, 4, 1, alpha=1.0),\n      kernel_quantizer=quantizers.quantized_bits(4,0, 1, alpha=1.0),\n      name=\"pw_conv\")(x)\n\n  model = keras.Model(inputs=[x_in], outputs=[x])\n\n  input_quantizers = [quantizers.quantized_bits(8, 0, 1)]\n  dtype_dict = run(model, input_quantizers)\n\n  # multiplier_int_bits = 0(x_int_bits) + 0(w_int_bits) = 0 (excluding sign_bit)\n  # multiplier_fractional_bits = 7(x_fractional) + 7(w_fractional) = 14\n  # multiplier_bits = 0 + 14 + sign_bit = 15\n  assert dtype_dict[\"dw_conv\"][\"multiplier\"][\"bits\"] == 15\n  assert dtype_dict[\"dw_conv\"][\"multiplier\"][\"int_bits\"] == 1\n  # accumulator_int_bits = max(bias_int_bits, log7 + 0) + 1 = 7\n  # accumulator_fractional_bits = max(bias_fractional, 14) = 14\n  # accumulator_bits = int_bits + fractional_bits + sign_bit = 22\n  assert dtype_dict[\"dw_conv\"][\"accumulator\"][\"bits\"] == 22\n  assert dtype_dict[\"dw_conv\"][\"accumulator\"][\"int_bits\"] == 8\n\n  assert dtype_dict[\"pw_conv\"][\"multiplier\"][\"bits\"] == 25\n  assert dtype_dict[\"pw_conv\"][\"multiplier\"][\"int_bits\"] == 8\n  assert dtype_dict[\"pw_conv\"][\"accumulator\"][\"bits\"] == 28\n  assert dtype_dict[\"pw_conv\"][\"accumulator\"][\"int_bits\"] == 11\n\n\ndef test_divide_and_conquer_sequential_conv2d():\n  # These following values are verified manually to be globally optimal.\n\n  # The test has two purposes:\n  # 1) check if the code runs ok;\n  # 2) for a simple conv2d model, the output is as expected.\n\n  # We will need to add more tests with more complex graph architecture\n  # in the future as our solution grows.\n\n  xin = x = tf.keras.layers.Input(shape=(16, 16, 1), name=\"input_layer\")\n  x = QConv2D(\n      kernel_size=3,\n      filters=3,\n      use_bias=False,\n      kernel_quantizer=quantizers.quantized_bits(4, 0, alpha=1.0),\n      name=\"conv_1\",\n  )(x)\n  x = QConv2D(\n      kernel_size=3,\n      filters=5,\n      use_bias=False,\n      kernel_quantizer=quantizers.quantized_bits(4, 0, alpha=1.0),\n      name=\"conv_2\",\n  )(x)\n\n  # Create a model\n  model = tf.keras.Model(inputs=xin, outputs=x)\n\n  # Test if the flow works perperly. In the future we will construct more\n  # detailed tests regarding cost once the cost design matures.\n  assert divide_and_conquer.estimate_model_cost(\n      model,\n      input_quantizer_bits=8,\n      target_OutElementPerClk=10,\n      target_out_throughput=1.0,\n      compute_to_memory_max_ratio=1,\n      memory_to_unroll_max_ratio=1,\n      mode=divide_and_conquer.CostMode.ACE,\n  )\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/qtools_util_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for qtools_util module.\"\"\"\n\nimport json\n\nimport numpy as np\nimport pytest\nimport tensorflow.keras as keras\nimport tensorflow as tf\n\nfrom qkeras import quantizers\nfrom qkeras.qtools import qtools_util\n\nfrom qkeras.qtools import quantized_operators\nfrom qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module\n\n\n@pytest.mark.parametrize(\n    \"w_bits, w_int_bits, weight_quantizer_scale_type, \"\n    \"expected_bits_before_adjustment, expected_int_bits_before_adjustment, \"\n    \"expected_bits_after_adjustment, expected_int_bits_after_adjustment\",\n    [\n        (8, 0, \"1.0\", 11, 2, 11, 2),\n        (4, 2, \"auto_po2\", 7, 4, 10, 5),\n        (4, 0, \"post_training_scale\", 7, 2, 10, 5),\n    ],\n)\ndef test_adjust_multiplier_for_auto_po2(\n    w_bits, w_int_bits, weight_quantizer_scale_type,\n    expected_bits_before_adjustment, expected_int_bits_before_adjustment,\n    expected_bits_after_adjustment, expected_int_bits_after_adjustment):\n  \"\"\"Test adjust_multiplier_for_auto_po2 with auto_po2 weight quantizer.\"\"\"\n\n  multiplier_factory = quantized_operators.MultiplierFactory()\n  quantizer_factory = quantizer_factory_module.QuantizerFactory()\n\n  qkeras_input_quantizer = quantizers.quantized_bits(4, 2, 1)\n\n  # Generate the weight quantizer.\n  if weight_quantizer_scale_type in [\"auto_po2\", \"post_training_scale\"]:\n    # Compute the scale for auto_po2 quantizer.\n    qkeras_weight_quantizer = quantizers.quantized_bits(\n        bits=w_bits, integer=w_int_bits, keep_negative=True,\n        symmetric=True, alpha=\"auto_po2\")\n    weight_arr = np.array([1.07, -1.7, 3.06, 1.93, 0.37, -2.43, 6.3, -2.9]\n                          ).reshape((2, 4))\n    qkeras_weight_quantizer(weight_arr)\n\n    if weight_quantizer_scale_type == \"post_training_scale\":\n      # Set the post_training_scale as fixed scale.\n      auto_po2_scale = qkeras_weight_quantizer.scale.numpy()\n      qkeras_weight_quantizer = quantizers.quantized_bits(\n          bits=w_bits, integer=w_int_bits, alpha=\"auto_po2\",\n          post_training_scale=auto_po2_scale)\n  else:\n    qkeras_weight_quantizer = quantizers.quantized_bits(w_bits, w_int_bits)\n\n  input_quantizer = quantizer_factory.make_quantizer(\n      qkeras_input_quantizer)\n  weight_quantizer = quantizer_factory.make_quantizer(\n      qkeras_weight_quantizer)\n\n  multiplier = multiplier_factory.make_multiplier(\n      weight_quantizer, input_quantizer)\n\n  np.testing.assert_equal(multiplier.output.bits,\n                          expected_bits_before_adjustment)\n  np.testing.assert_equal(multiplier.output.int_bits,\n                          expected_int_bits_before_adjustment)\n\n  qtools_util.adjust_multiplier_for_auto_po2(\n      multiplier, qkeras_weight_quantizer)\n  print(f\"after adjustment: {multiplier.output.bits}, {multiplier.output.int_bits}\")\n  np.testing.assert_equal(multiplier.output.bits,\n                          expected_bits_after_adjustment)\n  np.testing.assert_equal(multiplier.output.int_bits,\n                          expected_int_bits_after_adjustment)\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/quantizer_impl_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for methods in quantizer_impl.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport pytest\nimport tensorflow as tf\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import *\n\nfrom qkeras import *\nfrom qkeras.qtools.quantized_operators import quantizer_impl\nfrom qkeras import quantizers\nfrom numpy.testing import assert_equal\n\n\n# pylint: disable=invalid-name\ndef test_QuantizedBits():\n  qkeras_quantizer = quantizers.quantized_bits()\n  qtools_quantizer = quantizer_impl.QuantizedBits()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      symmetric=qkeras_quantizer.symmetric, alpha=qkeras_quantizer.alpha,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      scale_axis=qkeras_quantizer.scale_axis,\n      qnoise_factor=qkeras_quantizer.qnoise_factor)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_QuantizedBits_ElementsPerScale():\n  \"\"\"Tests quantized_bits with elements_per_scale.\"\"\"\n  def _get_min_max_po2_exponent(x):\n    \"\"\"Get min and max po2 exponent of x.\"\"\"\n    po2_x = K.log(x)/np.log(2.0)\n    return (tf.math.reduce_min(po2_x).numpy(),\n            tf.math.reduce_max(po2_x).numpy())\n\n  qkeras_quantizer = quantizers.quantized_bits(\n      alpha=\"auto_po2\", elements_per_scale=[1, 1], scale_axis=[1, 2],\n      min_po2_exponent=-3, max_po2_exponent=3)\n\n  qtools_quantizer = quantizer_impl.QuantizedBits()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      symmetric=qkeras_quantizer.symmetric,\n      alpha=qkeras_quantizer.alpha,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      scale_axis=qkeras_quantizer.scale_axis,\n      qnoise_factor=qkeras_quantizer.qnoise_factor,\n      elements_per_scale=qkeras_quantizer.elements_per_scale,\n      min_po2_exponent=qkeras_quantizer.min_po2_exponent,\n      max_po2_exponent=qkeras_quantizer.max_po2_exponent,\n  )\n\n  # for quantized_bits the scale is multiplied by the integer scale as well\n  # the integer scale depends on the sign bit\n  integer_po2_scale = new_quantizer.bits - new_quantizer.keep_negative\n\n  # Test for input tensors of rank 3\n  x_r3 = tf.random.uniform([1, 8, 8])\n  new_quantizer(x_r3)\n  x_r3_scale = new_quantizer.scale\n  xq_r3_min_exp, xq_r3_max_exp = _get_min_max_po2_exponent(x_r3_scale)\n\n  assert_equal(new_quantizer.scale.shape, [1, 8, 8])\n  assert xq_r3_min_exp >= -3*integer_po2_scale\n  assert xq_r3_max_exp <= 3*integer_po2_scale\n\n  # Test for input tensors of rank 4\n  x_r4 = tf.random.uniform([1, 2, 4, 8])\n  new_quantizer(x_r4)\n  x_r4_scale = new_quantizer.scale\n  xq_r4_min_exp, xq_r4_max_exp = _get_min_max_po2_exponent(x_r4_scale)\n\n  assert_equal(new_quantizer.scale.shape, [1, 2, 4, 1])\n  assert xq_r4_min_exp >= -3*integer_po2_scale\n  assert xq_r4_max_exp <= 3*integer_po2_scale\n\n\ndef test_QuantizedTanh():\n  qkeras_quantizer = quantizers.quantized_tanh()\n  qtools_quantizer = quantizer_impl.QuantizedTanh()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      symmetric=qkeras_quantizer.symmetric)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_QuantizedUlaw():\n  qkeras_quantizer = quantizers.quantized_ulaw()\n  qtools_quantizer = quantizer_impl.QuantizedUlaw()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      symmetric=qkeras_quantizer.symmetric,\n      u=qkeras_quantizer.u)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_Binary():\n  qkeras_quantizer = quantizers.binary()\n  qtools_quantizer = quantizer_impl.Binary()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      alpha=qkeras_quantizer.alpha,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_StochasticBinary():\n  qkeras_quantizer = quantizers.stochastic_binary()\n  qtools_quantizer = quantizer_impl.StochasticBinary()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      alpha=qkeras_quantizer.alpha,\n      temperature=qkeras_quantizer.temperature,\n      use_real_sigmoid=qkeras_quantizer.use_real_sigmoid)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_Bernoulli():\n  qkeras_quantizer = quantizers.bernoulli()\n  qtools_quantizer = quantizer_impl.Bernoulli()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      alpha=qkeras_quantizer.alpha, temperature=qkeras_quantizer.temperature,\n      use_real_sigmoid=qkeras_quantizer.use_real_sigmoid)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_QuantizedRelu():\n  qkeras_quantizer = quantizers.quantized_relu()\n  qtools_quantizer = quantizer_impl.QuantizedRelu()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      use_sigmoid=qkeras_quantizer.use_sigmoid,\n      negative_slope=qkeras_quantizer.negative_slope,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      relu_upper_bound=qkeras_quantizer.relu_upper_bound,\n      is_quantized_clip=qkeras_quantizer.is_quantized_clip,\n      qnoise_factor=qkeras_quantizer.qnoise_factor)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_Ternary():\n\n  qkeras_quantizer = quantizers.ternary()\n  qtools_quantizer = quantizer_impl.Ternary()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      alpha=qkeras_quantizer.alpha, threshold=qkeras_quantizer.threshold,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      number_of_unrolls=qkeras_quantizer.number_of_unrolls)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_StochasticTernary():\n  qkeras_quantizer = quantizers.stochastic_ternary()\n  qtools_quantizer = quantizer_impl.StochasticTernary()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      alpha=qkeras_quantizer.alpha, threshold=qkeras_quantizer.threshold,\n      temperature=qkeras_quantizer.temperature,\n      use_real_sigmoid=qkeras_quantizer.use_real_sigmoid,\n      number_of_unrolls=qkeras_quantizer.number_of_unrolls)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_PowerOfTwo():\n  qkeras_quantizer = quantizers.quantized_po2()\n  qtools_quantizer = quantizer_impl.PowerOfTwo(is_signed=True)\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      negative_slope=None,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      quadratic_approximation=qkeras_quantizer.quadratic_approximation)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_ReluPowerOfTwo():\n  qkeras_quantizer = quantizers.quantized_relu_po2()\n  qtools_quantizer = quantizer_impl.ReluPowerOfTwo()\n  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)\n  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(\n      negative_slope=qkeras_quantizer.negative_slope,\n      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,\n      quadratic_approximation=qkeras_quantizer.quadratic_approximation)\n\n  result = new_quantizer.__dict__\n  for (key, val) in result.items():\n    assert_equal(val, qkeras_quantizer.__dict__[key])\n\n\ndef test_GetScale_PerChannelScale():\n  # Rank1 tensors\n  x_r1 = tf.ones([4])\n  q_r1 = tf.ones([4])\n  scale_r1_pcs_true = quantizers._get_scale(\n      \"auto\", x_r1, q_r1, scale_axis=None, per_channel_scale=True)\n  scale_r1_pcs_false = quantizers._get_scale(\n      \"auto\", x_r1, q_r1, scale_axis=None, per_channel_scale=False)\n  assert_equal(tf.shape(scale_r1_pcs_true).numpy(), [4])\n  assert_equal(tf.shape(scale_r1_pcs_false).numpy(), [1])\n\n  # Rank2 tensors\n  x_r2 = tf.ones([2, 4])\n  q_r2 = tf.ones([2, 4])\n  scale_r2_pcs_true = quantizers._get_scale(\n      \"auto\", x_r2, q_r2, scale_axis=None, per_channel_scale=True)\n  scale_r2_pcs_false = quantizers._get_scale(\n      \"auto\", x_r2, q_r2, scale_axis=None, per_channel_scale=False)\n  assert_equal(tf.shape(scale_r2_pcs_true).numpy(), [1, 4])\n  assert_equal(tf.shape(scale_r2_pcs_false).numpy(), [1, 1])\n\n  # Rank3 tensors\n  x_r3 = tf.ones([3, 3, 4])\n  q_r3 = tf.ones([3, 3, 4])\n  scale_r3_pcs_true = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, scale_axis=None, per_channel_scale=True)\n  scale_r3_pcs_false = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, scale_axis=None, per_channel_scale=False)\n  assert_equal(tf.shape(scale_r3_pcs_true).numpy(), [1, 1, 4])\n  assert_equal(tf.shape(scale_r3_pcs_false).numpy(), [1, 1, 1])\n\n  # Rank4 tensors\n  x_r4 = tf.ones([1, 1, 3, 4])\n  q_r4 = tf.ones([1, 1, 3, 4])\n  scale_r4_pcs_true = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, scale_axis=None, per_channel_scale=True)\n  scale_r4_pcs_false = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, scale_axis=None, per_channel_scale=False)\n  assert_equal(tf.shape(scale_r4_pcs_true).numpy(), [1, 1, 1, 4])\n  assert_equal(tf.shape(scale_r4_pcs_false).numpy(), [1, 1, 1, 1])\n\n\ndef _get_num_unique_elements(input_tensor):\n  return len(np.unique(input_tensor.numpy()))\n\n\ndef test_GetScale_ElementsPerScale_Scalar_ScaleAxis_EPS():\n  # Test get_scale function when elements_per_scale and scale_axis have scalar\n  # values and the input x and q tensors have rank 2\n  x_r2 = tf.random.uniform([4, 8])\n  q_r2 = tf.random.uniform([4, 8])\n  scale_r2_eps_none_ua_none = quantizers._get_scale(\n      \"auto\", x_r2, q_r2, elements_per_scale=None, scale_axis=None)\n  scale_r2_eps_2_ua_0 = quantizers._get_scale(\n      \"auto\", x_r2, q_r2, elements_per_scale=2, scale_axis=0)\n  scale_r2_eps_2_ua_1 = quantizers._get_scale(\n      \"auto\", x_r2, q_r2, elements_per_scale=2, scale_axis=1)\n\n  assert_equal(tf.shape(scale_r2_eps_none_ua_none).numpy(), [1, 8])\n  assert_equal(_get_num_unique_elements(scale_r2_eps_none_ua_none), 8)\n\n  assert_equal(tf.shape(scale_r2_eps_2_ua_0).numpy(), [4, 1])\n  assert_equal(_get_num_unique_elements(scale_r2_eps_2_ua_0), 2)\n\n  assert_equal(tf.shape(scale_r2_eps_2_ua_1).numpy(), [1, 8])\n  assert_equal(_get_num_unique_elements(scale_r2_eps_2_ua_1), 4)\n\n  # Test get_scale function when elements_per_scale and scale_axis have scalar\n  # values and the input x and q tensors have rank 3\n  x_r3 = tf.random.uniform([2, 4, 8])\n  q_r3 = tf.random.uniform([2, 4, 8])\n  scale_r3_eps_none_ua_none = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=None, scale_axis=None)\n  scale_r3_eps_2_ua_0 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=2, scale_axis=0)\n  scale_r3_eps_2_ua_1 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=2, scale_axis=1)\n  scale_r3_eps_2_ua_2 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=2, scale_axis=2)\n\n  assert_equal(tf.shape(scale_r3_eps_none_ua_none).numpy(), [1, 1, 8])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_none), 8)\n\n  assert_equal(tf.shape(scale_r3_eps_2_ua_0).numpy(), [2, 1, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_0), 1)\n\n  assert_equal(tf.shape(scale_r3_eps_2_ua_1).numpy(), [1, 4, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_1), 2)\n\n  assert_equal(tf.shape(scale_r3_eps_2_ua_2).numpy(), [1, 1, 8])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_2), 4)\n\n  # Test get_scale function when elements_per_scale and scale_axis have scalar\n  # values and the input x and q tensors have rank 4\n  x_r4 = tf.random.uniform([2, 4, 8, 16])\n  q_r4 = tf.random.uniform([2, 4, 8, 16])\n  scale_r4_eps_none_ua_none = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=None, scale_axis=None)\n  scale_r4_eps_2_ua_0 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=2, scale_axis=0)\n  scale_r4_eps_2_ua_1 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=2, scale_axis=1)\n  scale_r4_eps_2_ua_2 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=2, scale_axis=2)\n  scale_r4_eps_2_ua_3 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=2, scale_axis=3)\n\n  assert_equal(tf.shape(scale_r4_eps_none_ua_none).numpy(), [1, 1, 1, 16])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_none_ua_none), 16)\n\n  assert_equal(tf.shape(scale_r4_eps_2_ua_0).numpy(), [2, 1, 1, 1])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_0), 1)\n\n  assert_equal(tf.shape(scale_r4_eps_2_ua_1).numpy(), [1, 4, 1, 1])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_1), 2)\n\n  assert_equal(tf.shape(scale_r4_eps_2_ua_2).numpy(), [1, 1, 8, 1])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_2), 4)\n\n  assert_equal(tf.shape(scale_r4_eps_2_ua_3).numpy(), [1, 1, 1, 16])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_3), 8)\n\n\ndef test_GetScale_ElementsPerScale_List_ScaleAxis_EPS():\n  # Test get_scale function when elements_per_scale and scale_axis are lists of\n  # rank 1 and the input x and q tensors have rank 3\n  x_r3 = tf.random.uniform([2, 4, 8])\n  q_r3 = tf.random.uniform([2, 4, 8])\n\n  scale_r3_eps_none_ua_0 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=None, scale_axis=[0])\n  scale_r3_eps_2_ua_0 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=[2], scale_axis=[0])\n  scale_r3_eps_2_ua_1 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=[2], scale_axis=[1])\n  scale_r3_eps_2_ua_2 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=[2], scale_axis=[2])\n\n  assert_equal(tf.shape(scale_r3_eps_none_ua_0).numpy(), [2, 1, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_0), 2)\n\n  assert_equal(tf.shape(scale_r3_eps_2_ua_0).numpy(), [2, 1, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_0), 1)\n\n  assert_equal(tf.shape(scale_r3_eps_2_ua_1).numpy(), [1, 4, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_1), 2)\n\n  assert_equal(tf.shape(scale_r3_eps_2_ua_2).numpy(), [1, 1, 8])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_2), 4)\n\n  # Test get_scale function when elements_per_scale and scale_axis are lists of\n  # rank 2 and the input x and q tensors have rank 3\n  x_r3 = tf.random.uniform([2, 4, 8])\n  q_r3 = tf.random.uniform([2, 4, 8])\n\n  scale_r3_eps_none_ua_01 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=None, scale_axis=[0, 1])\n  scale_r3_eps_22_ua_01 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=[2, 2], scale_axis=[0, 1])\n  scale_r3_eps_11_ua_12 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=[2, 2], scale_axis=[1, 2])\n  scale_r3_eps_11_ua_02 = quantizers._get_scale(\n      \"auto\", x_r3, q_r3, elements_per_scale=[1, 1], scale_axis=[0, 2])\n\n  assert_equal(tf.shape(scale_r3_eps_none_ua_01).numpy(), [2, 4, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_01), 8)\n\n  assert_equal(tf.shape(scale_r3_eps_22_ua_01).numpy(), [2, 4, 1])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_22_ua_01), 2)\n\n  assert_equal(tf.shape(scale_r3_eps_11_ua_12).numpy(), [1, 4, 8])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_11_ua_12), 8)\n\n  assert_equal(tf.shape(scale_r3_eps_11_ua_02).numpy(), [2, 1, 8])\n  assert_equal(_get_num_unique_elements(scale_r3_eps_11_ua_02), 16)\n\n  # Test get_scale function when elements_per_scale and scale_axis are lists of\n  # rank 3 and the input x and q tensors have rank 4\n  x_r4 = tf.random.uniform([2, 4, 8, 16])\n  q_r4 = tf.random.uniform([2, 4, 8, 16])\n\n  scale_r4_eps_none_ua_012 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=None, scale_axis=[0, 1, 2])\n  scale_r4_eps_221_ua_012 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[0, 1, 2])\n  scale_r4_eps_221_ua_123 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[1, 2, 3])\n  scale_r4_eps_221_ua_013 = quantizers._get_scale(\n      \"auto\", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[0, 1, 3])\n\n  assert_equal(tf.shape(scale_r4_eps_none_ua_012).numpy(), [2, 4, 8, 1])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_none_ua_012), 64)\n\n  assert_equal(tf.shape(scale_r4_eps_221_ua_012).numpy(), [2, 4, 8, 1])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_012), 16)\n\n  assert_equal(tf.shape(scale_r4_eps_221_ua_123).numpy(), [1, 4, 8, 16])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_123), 128)\n\n  assert_equal(tf.shape(scale_r4_eps_221_ua_013).numpy(), [2, 4, 1, 16])\n  assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_013), 32)\n\n\ndef test_GetScale_MinPO2Exponent_MaxPO2Exponent():\n  \"\"\"Verify get_scale function with min and max po2_exponent clipping.\"\"\"\n\n  def _get_min_max_po2_exponent(x):\n    \"\"\"Get min and max po2 exponent of x.\"\"\"\n    po2_x = K.log(x)/np.log(2.0)\n    return (tf.math.reduce_min(po2_x).numpy(),\n            tf.math.reduce_max(po2_x).numpy())\n\n  # generate small decimal numbers to verify that po2 clipping works properly\n  x = 2**tf.random.uniform(shape=[2, 4, 8], minval=-50, maxval=0)\n  q = 2**tf.random.uniform(shape=[2, 4, 8], minval=-50, maxval=0)\n\n  # set various min and max po2 exponents for the scale\n  scale_min_neg3_max_1 = quantizers._get_scale(\n      \"auto_po2\", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-3,\n      max_po2_exponent=1)\n\n  scale_min_neg8_max_0 = quantizers._get_scale(\n      \"auto_po2\", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-8,\n      max_po2_exponent=0)\n\n  scale_min_neg10_max_1 = quantizers._get_scale(\n      \"auto_po2\", x, q, elements_per_scale=4, scale_axis=2,\n      min_po2_exponent=-10, max_po2_exponent=1)\n\n  # verify that the output scales have the correct min and max ranges\n  assert_equal(tf.shape(scale_min_neg3_max_1).numpy(), [1, 1, 8])\n  min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg3_max_1)\n  assert min_po2_exp >= -3\n  assert max_po2_exp <= 1\n\n  assert_equal(tf.shape(scale_min_neg8_max_0).numpy(), [1, 1, 8])\n  min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg8_max_0)\n  assert min_po2_exp >= -8\n  assert max_po2_exp <= 0\n\n  assert_equal(tf.shape(scale_min_neg10_max_1).numpy(), [1, 1, 8])\n  min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg10_max_1)\n  assert min_po2_exp >= -10\n  assert max_po2_exp <= 1\n\n\ndef test_GetUnrolledShape_GetRolledBackShape():\n  x_r4 = [4, 4, 8, 16]\n\n  # Scalar unroll_factor and unroll_axis - Test _get_unrolled_shape\n  unrolled_x_r4_uf_2_ua_0 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=2, unroll_axis=0)\n  unrolled_x_r4_uf_2_ua_1 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=2, unroll_axis=1)\n  unrolled_x_r4_uf_2_ua_2 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=2, unroll_axis=2)\n  unrolled_x_r4_uf_2_ua_3 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=2, unroll_axis=3)\n\n  assert_equal(unrolled_x_r4_uf_2_ua_0, ([2, 2, 4, 8, 16], 0))\n  assert_equal(unrolled_x_r4_uf_2_ua_1, ([4, 2, 2, 8, 16], 1))\n  assert_equal(unrolled_x_r4_uf_2_ua_2, ([4, 4, 4, 2, 16], 2))\n  assert_equal(unrolled_x_r4_uf_2_ua_3, ([4, 4, 8, 8, 2], 3))\n\n  # Scalar unroll_factor and unroll_axis - Test _get_rolled_back_shape\n  rolled_back_x_r4_uf_2_ua_0 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_2_ua_0[0], roll_axis=unrolled_x_r4_uf_2_ua_0[1])\n  rolled_back_x_r4_uf_2_ua_1 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_2_ua_1[0], roll_axis=unrolled_x_r4_uf_2_ua_1[1])\n  rolled_back_x_r4_uf_2_ua_2 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_2_ua_2[0], roll_axis=unrolled_x_r4_uf_2_ua_2[1])\n  rolled_back_x_r4_uf_2_ua_3 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_2_ua_3[0], roll_axis=unrolled_x_r4_uf_2_ua_3[1])\n\n  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_0)\n  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_1)\n  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_2)\n  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_3)\n\n  # List[2] unroll_factor and unroll_axis - Test _get_unrolled_shape\n  unrolled_x_r4_uf_24_ua_01 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=[2, 4], unroll_axis=[0, 1])\n  unrolled_x_r4_uf_24_ua_12 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=[2, 4], unroll_axis=[1, 2])\n  unrolled_x_r4_uf_24_ua_13 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=[2, 4], unroll_axis=[1, 3])\n  unrolled_x_r4_uf_24_ua_34 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=[2, 4], unroll_axis=[2, 3])\n\n  assert_equal(unrolled_x_r4_uf_24_ua_01, ([2, 2, 1, 4, 8, 16], [0, 2]))\n  assert_equal(unrolled_x_r4_uf_24_ua_12, ([4, 2, 2, 2, 4, 16], [1, 3]))\n  assert_equal(unrolled_x_r4_uf_24_ua_13, ([4, 2, 2, 8, 4, 4], [1, 4]))\n  assert_equal(unrolled_x_r4_uf_24_ua_34, ([4, 4, 4, 2, 4, 4], [2, 4]))\n\n  # List[2] unroll_factor and unroll_axis - Test _get_rolled_back_shape\n  rolled_back_x_r4_uf_24_ua_01 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_24_ua_01[0], roll_axis=unrolled_x_r4_uf_24_ua_01[1])\n  rolled_back_x_r4_uf_24_ua_12 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_24_ua_12[0], roll_axis=unrolled_x_r4_uf_24_ua_12[1])\n  rolled_back_x_r4_uf_24_ua_13 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_24_ua_13[0], roll_axis=unrolled_x_r4_uf_24_ua_13[1])\n  rolled_back_x_r4_uf_24_ua_34 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_24_ua_34[0], roll_axis=unrolled_x_r4_uf_24_ua_34[1])\n\n  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_01)\n  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_12)\n  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_13)\n  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_34)\n\n  # List[3] unroll_factor and unroll_axis - Test _get_unrolled_shape\n  unrolled_x_r4_uf_242_ua_012 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=[2, 4, 2], unroll_axis=[0, 1, 2])\n  unrolled_x_r4_uf_242_ua_023 = quantizers._get_unrolled_shape(\n      x_r4, unroll_factor=[2, 4, 2], unroll_axis=[0, 2, 3])\n\n  assert_equal(unrolled_x_r4_uf_242_ua_012, ([2, 2, 1, 4, 4, 2, 16], [0, 2, 4]))\n  assert_equal(unrolled_x_r4_uf_242_ua_023, ([2, 2, 4, 2, 4, 8, 2], [0, 3, 5]))\n\n  # List[3] unroll_factor and unroll_axis - Test _get_rolled_back_shape\n  rolled_back_x_r4_uf_242_ua_012 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_242_ua_012[0],\n      roll_axis=unrolled_x_r4_uf_242_ua_012[1])\n  rolled_back_x_r4_uf_242_ua_023 = quantizers._get_rolled_back_shape(\n      unrolled_x_r4_uf_242_ua_023[0],\n      roll_axis=unrolled_x_r4_uf_242_ua_023[1])\n\n  assert_equal(x_r4, rolled_back_x_r4_uf_242_ua_012)\n  assert_equal(x_r4, rolled_back_x_r4_uf_242_ua_023)\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/quantizer_registry_test.py",
    "content": "# Copyright 2024 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Unit tests for QKeras quantizer registry.\"\"\"\n\nimport numpy as np\nimport pytest\n\nfrom qkeras import quantizer_registry\nfrom qkeras import quantizers\n\n\n@pytest.mark.parametrize(\n    \"quantizer_name\",\n    [\n        \"quantized_linear\",\n        \"quantized_bits\",\n        \"bernoulli\",\n        \"ternary\",\n        \"stochastic_ternary\",\n        \"binary\",\n        \"stochastic_binary\",\n        \"quantized_relu\",\n        \"quantized_ulaw\",\n        \"quantized_tanh\",\n        \"quantized_sigmoid\",\n        \"quantized_po2\",\n        \"quantized_relu_po2\",\n        \"quantized_hswish\",\n    ],\n)\ndef test_lookup(quantizer_name):\n  quantizer = quantizer_registry.lookup_quantizer(quantizer_name)\n  is_class_instance = isinstance(quantizer, type)\n  np.testing.assert_equal(is_class_instance, True)\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/range_test.py",
    "content": "# Copyright 2020 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Test range values that are used for codebook computation\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport numpy as np\nfrom numpy.testing import assert_allclose\n\nimport pytest\nfrom tensorflow.keras import backend as K\n\nfrom qkeras import quantized_relu\nfrom qkeras import quantized_bits\n\n\n@pytest.mark.parametrize(\n    'bits, integer, expected_values',\n    [\n        (3, 0, np.array([0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875])),\n        (3, 1, np.array([0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75])),\n        (3, 2, np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5])),\n        (3, 3, np.array([0, 1, 2, 3, 4, 5, 6, 7])),\n        (6, 1, np.array(\n            [0.0, 0.03125, 0.0625, 0.09375, 0.125, 0.15625, 0.1875, 0.21875,\n            0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875,\n            0.5, 0.53125, 0.5625, 0.59375, 0.625, 0.65625, 0.6875, 0.71875,\n            0.75, 0.78125, 0.8125, 0.84375, 0.875, 0.90625, 0.9375, 0.96875,\n            1.0, 1.03125, 1.0625, 1.09375, 1.125, 1.15625, 1.1875, 1.21875,\n            1.25, 1.28125, 1.3125, 1.34375, 1.375, 1.40625, 1.4375, 1.46875,\n            1.5, 1.53125, 1.5625, 1.59375, 1.625, 1.65625, 1.6875, 1.71875,\n            1.75, 1.78125, 1.8125, 1.84375, 1.875, 1.90625, 1.9375, 1.96875]))\n    ])\ndef test_quantized_relu_range(bits, integer, expected_values):\n  \"\"\"Test quantized_relu range function.\"\"\"\n  q = quantized_relu(bits, integer)\n  result = q.range()\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\n@pytest.mark.parametrize(\n    'bits, integer, expected_values',\n    [\n        (3, 0, np.array([0.0, 0.25, 0.5, 0.75, -1.0, -0.75, -0.5, -0.25])),\n        (3, 1, np.array([0.0, 0.5, 1.0, 1.5, -2.0, -1.5, -1.0, -0.5])),\n        (3, 2, np.array([0.0, 1.0, 2.0, 3.0, -4.0, -3.0, -2.0, -1.0])),\n        (3, 3, np.array([0.0, 2.0, 4.0, 6.0, -8.0, -6.0, -4.0, -2.0])),\n        (6, 1, np.array(\n            [0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625,\n             0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 1.0625, 1.125, 1.1875,\n             1.25, 1.3125, 1.375, 1.4375, 1.5, 1.5625, 1.625, 1.6875, 1.75, 1.8125,\n             1.875, 1.9375, -2.0, -1.9375, -1.875, -1.8125, -1.75, -1.6875, -1.625,\n             -1.5625, -1.5, -1.4375, -1.375, -1.3125, -1.25, -1.1875, -1.125, -1.0625,\n             -1.0, -0.9375, -0.875, -0.8125, -0.75, -0.6875, -0.625, -0.5625, -0.5,\n             -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625]))\n    ])\ndef test_quantized_bits_range(bits, integer, expected_values):\n  \"\"\"Test quantized_bits range function.\"\"\"\n  q = quantized_bits(bits, integer)\n  result = q.range()\n  assert_allclose(result, expected_values, rtol=1e-05)\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/registry_test.py",
    "content": "# Copyright 2024 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Unit tests for registry.\"\"\"\n\nfrom numpy.testing import assert_equal\nfrom numpy.testing import assert_raises\nimport pytest\n\nfrom qkeras import registry\n\n\ndef sample_function(arg):\n  \"\"\"Sample function for testing.\"\"\"\n  return arg\n\n\nclass SampleClass(object):\n  \"\"\"Sample class for testing.\"\"\"\n\n  def __init__(self, arg):\n    self._arg = arg\n\n  def get_arg(self):\n    return self._arg\n\n\ndef test_register_function():\n  reg = registry.Registry()\n  reg.register(sample_function)\n  registered_function = reg.lookup('sample_function')\n  # Call the function to validate.\n  assert_equal(registered_function, sample_function)\n\n\ndef test_register_class():\n  reg = registry.Registry()\n  reg.register(SampleClass)\n  registered_class = reg.lookup('SampleClass')\n  # Create and call class object to validate.\n  assert_equal(SampleClass, registered_class)\n\n\ndef test_register_with_name():\n  reg = registry.Registry()\n  name = 'NewSampleClass'\n  reg.register(SampleClass, name=name)\n  registered_class = reg.lookup(name)\n  # Create and call class object to validate.\n  assert_equal(SampleClass, registered_class)\n\n\ndef test_lookup_missing_item():\n  reg = registry.Registry()\n  assert_raises(KeyError, reg.lookup, 'foo')\n\n\ndef test_lookup_missing_name():\n  reg = registry.Registry()\n  sample_class = SampleClass(arg=1)\n  # objects don't have a default __name__ attribute.\n  assert_raises(AttributeError, reg.register, sample_class)\n\n  # check that the object can be retrieved with a registered name.\n  reg.register(sample_class, 'sample_class')\n  assert_equal(sample_class, reg.lookup('sample_class'))\n\n\nif __name__ == '__main__':\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/safe_eval_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Implements a safe evaluation.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport operator\nimport pytest\n\nfrom qkeras.safe_eval import GetParams\nfrom qkeras.safe_eval import safe_eval\n\n\nadd = operator.add\n\n\ndef test_get_params1():\n  s = \"(3, 0.3, sep=5  )\"\n  args, kwargs = GetParams(s)\n  assert args == [3, 0.3]\n  assert kwargs == {\"sep\": 5}\n\n\ndef test_get_params2():\n  s = \"(  )\"\n\n  args, kwargs = GetParams(s)\n\n  assert not args\n  assert not kwargs\n\n\ndef test_get_params3():\n  s = (\"(3, 0.3,  -1.0, True, False, 'string1', num1=0.1, num2=-3.0, \"\n       \"str1='string2', bool1=True, bool2=False)\")\n\n  args, kwargs = GetParams(s)\n\n  assert args == [3, 0.3, -1.0, True, False, \"string1\"]\n  assert kwargs == {\n      \"num1\": 0.1,\n      \"num2\": -3.0,\n      \"str1\": \"string2\",\n      \"bool1\": True,\n      \"bool2\": False\n  }\n\n\ndef test_safe_eval1():\n  s = \"add(3,3)\"\n  assert safe_eval(s, globals()) == 6\n\n\ndef i_func(s):\n  return -s\n\n\ndef myadd2(a, b):\n  return i_func(a) + i_func(b)\n\n\ndef myadd(a=32, b=10):\n  return a + b\n\nclass myaddcls(object):\n  def __call__(self, a=32, b=10):\n    return a + b\n\ndef test_safe_eval2():\n  s_add = [3, 39]\n  assert safe_eval(\"add\", globals(), *s_add) == 42\n\n\ndef test_safe_eval3():\n  assert safe_eval(\"myadd()\", globals()) == 42\n  assert safe_eval(\"myadd(a=39)\", globals(), b=3) == 42\n\n\ndef test_safe_eval4():\n  assert safe_eval(\"myadd2(a=39)\", globals(), b=3) == -42\n  assert safe_eval(\"myadd2(a= 39)\", globals(), b=3) == -42\n  assert safe_eval(\"myadd2(a= 39, b = 3)\", globals()) == -42\n\ndef test_safe_eval5():\n  assert safe_eval(\"myadd\", globals())(3,39) == 42\n  assert safe_eval(\"myaddcls\", globals())(3,39) == 42\n  assert safe_eval(\"myaddcls()\", globals())(3,39) == 42\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  },
  {
    "path": "tests/utils_test.py",
    "content": "# Copyright 2019 Google LLC\n#\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for methods in utils.py.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport pytest\nimport os\nimport tempfile\nfrom tensorflow.keras.layers import *\nfrom tensorflow.keras.models import *\n\nfrom qkeras import *\nfrom qkeras.utils import get_model_sparsity\nfrom qkeras.utils import model_quantize\nfrom qkeras.utils import convert_to_folded_model\nfrom qkeras.utils import is_TFOpLambda_layer\nfrom qkeras.utils import find_bn_fusing_layer_pair\nfrom qkeras.utils import add_bn_fusing_weights\nfrom qkeras.utils import clone_model_and_freeze_auto_po2_scale\nfrom qkeras.utils import load_qmodel\n\n\ndef create_quantized_network():\n  \"\"\"Creates a simple quantized conv net model.\"\"\"\n  # Create a simple model\n  xi = Input((28, 28, 1))\n  x = Conv2D(32, (3, 3))(xi)\n  x = Activation(\"relu\")(x)\n  x = Conv2D(32, (3, 3), activation=\"relu\")(x)\n  x = Activation(\"softmax\")(x)\n  model = Model(inputs=xi, outputs=x)\n\n  # Quantize the model\n  quantizer_config = {\n      \"QConv2D\": {\n          \"kernel_quantizer\": \"quantized_bits(4)\",\n          \"bias_quantizer\": \"quantized_bits(4)\"\n      },\n      \"QActivation\": {\n          \"relu\": \"ternary\"\n      }\n  }\n  activation_bits = 4\n  qmodel = model_quantize(model, quantizer_config, activation_bits)\n  return qmodel\n\n\ndef create_quantized_po2_network():\n  \"\"\"Creates a simple quantized conv net model with po2 quantizers.\"\"\"\n  xi = Input((28, 28, 1))\n  x = QConv2D(32, (3, 3), kernel_quantizer=quantized_po2(4))(xi)\n  x = QActivation(quantized_bits(8))(x)\n  x = QConv2D(32, (3, 3), kernel_quantizer=quantized_po2(4))(x)\n  x = QActivation(quantized_bits(8))(x)\n  qmodel = Model(xi, x, name='simple_po2_qmodel')\n  return qmodel\n\n\ndef set_network_sparsity(model, sparsity):\n  \"\"\"Set the sparsity of the given model using random weights.\"\"\"\n\n  for layer in model.layers:\n    new_weights = []\n    for w in layer.get_weights():\n      # Create weights with desired sparsity\n      sparse_weights = np.random.rand(w.size)+0.1\n      sparse_weights[:int(w.size*sparsity)] = 0\n      np.random.shuffle(sparse_weights)\n      new_weights.append(sparse_weights.reshape(w.shape))\n    layer.set_weights(new_weights)\n  return model\n\n\ndef test_get_model_sparsity():\n  \"\"\"Tests if the method get_model_sparsity in utils.py works correctly.\"\"\"\n  qmodel = create_quantized_network()\n\n  # Generate sparsity levels to test\n  sparsity_levels = np.concatenate((np.random.rand(10), [1.0, 0.0])).round(2)\n\n  # Test various sparsity levels\n  for true_sparsity in sparsity_levels:\n    qmodel = set_network_sparsity(qmodel, true_sparsity)\n    calc_sparsity = get_model_sparsity(qmodel)\n    assert np.abs(calc_sparsity - true_sparsity) < 0.01\n\n\ndef test_get_po2_model_sparsity():\n  \"\"\"Tests get_model_sparsity on a po2-quantized model.\n\n  Models quantized with po2 quantizers should have a sparsity near 0 because\n  if the exponent is set to 0, the value of the weight will equal 2^0 == 1 != 0\n  \"\"\"\n  qmodel = create_quantized_po2_network()\n  qmodel.use_legacy_config = True\n\n  # Generate sparsity levels to test\n  sparsity_levels = np.concatenate((np.random.rand(10), [1.0, 0.0])).round(2)\n\n  # Test various sparsity levels\n  for set_sparsity in sparsity_levels:\n    qmodel = set_network_sparsity(qmodel, set_sparsity)\n    calc_sparsity = get_model_sparsity(qmodel)\n    assert np.abs(calc_sparsity - 0) < 0.01\n\n\ndef test_convert_to_folded_model():\n  \"\"\"Test convert_to_folded_model to work properly on non-sequential model.\"\"\"\n\n  def get_add_model():\n    x = x_in = Input(shape=(4, 4, 1), name=\"input\")\n    x1 = Conv2D(4, kernel_size=(2, 2), padding=\"valid\", strides=(1, 1),\n                name=\"conv2d_1\")(x)\n    x1 = BatchNormalization(name=\"bn_1\")(x1)\n    x1 = Activation(\"relu\", name=\"relu_1\")(x1)\n    x2 = Conv2D(4, kernel_size=(2, 2), padding=\"valid\", strides=(1, 1),\n                name=\"conv2d_2\")(x)\n    x2 = BatchNormalization(name=\"bn_2\")(x2)\n    x2 = Activation(\"relu\", name=\"relu_2\")(x2)\n    x = Add(name=\"add\")([x1, x2])\n    x = Softmax()(x)\n\n    return Model(inputs=[x_in], outputs=[x])\n\n  model = get_add_model()\n\n  fmodel, _ = convert_to_folded_model(model)\n\n  assert fmodel.layers[5].name == \"add\"\n\n  # test if convert_to_folded_model work with TFOpLambda layers\n  def hard_sigmoid(x):\n    return ReLU(6.)(x + 3.) * (1. / 6.)\n\n  def hard_swish(x):\n    return Multiply()([hard_sigmoid(x), x])\n\n  def get_lambda_model():\n    x = x_in = Input(shape=(4, 4, 1), name=\"input\")\n    x = Conv2D(\n        4, kernel_size=(2, 2), padding=\"valid\", strides=(1, 1),\n        name=\"conv2d_1\")(x)\n    x = hard_swish(x)\n\n    return Model(inputs=[x_in], outputs=[x])\n\n  model = get_lambda_model()\n  fmodel, _ = convert_to_folded_model(model)\n\n  assert is_TFOpLambda_layer(model.layers[2])\n  assert is_TFOpLambda_layer(model.layers[4])\n  assert isinstance(fmodel.layers[5], Multiply)\n\n\ndef test_find_bn_fusing_layer_pair():\n  x = x_in = Input((23, 23, 1), name=\"input\")\n  x1 = QConv2D(\n      2, 2, 1,\n      kernel_quantizer=quantized_bits(4, 0, 1),\n      bias_quantizer=quantized_bits(4, 0, 1),\n      use_bias=False,\n      name=\"conv1\")(x)\n  x1 = QBatchNormalization(\n      mean_quantizer=quantized_bits(4, 0, 1),\n      gamma_quantizer=None,\n      variance_quantizer=None,\n      beta_quantizer=quantized_bits(4, 0, 1),\n      inverse_quantizer=quantized_bits(8, 0, 1), name=\"bn1\")(x1)\n\n  x2 = QConv2D(\n      2, 2, 1,\n      kernel_quantizer=quantized_bits(3, 0),\n      bias_quantizer=quantized_bits(3, 2),\n      name=\"conv2\")(x)\n\n  x2 = QBatchNormalization(\n      mean_quantizer=quantized_bits(4, 0, 1),\n      gamma_quantizer=None,\n      variance_quantizer=None,\n      beta_quantizer=quantized_bits(4, 0, 1),\n      inverse_quantizer=quantized_bits(8, 0, 1), name=\"bn2\")(x2)\n\n  x = Add(name=\"add\")([x1, x2])\n  model = Model(inputs=[x_in], outputs=[x])\n\n  (conv_bn_pair_dict, _) = find_bn_fusing_layer_pair(model)\n  assert conv_bn_pair_dict[\"conv1\"] == \"bn1\"\n  assert conv_bn_pair_dict[\"conv2\"] == \"bn2\"\n\n  conv_layer = model.layers[1]\n  bn_layer = model.layers[3]\n\n  conv_layer.set_weights([\n      np.array([[[[0.5, 0.75]], [[1.5, -0.625]]],\n                [[[-0.875, 1.25]], [[-1.25, -2.5]]]])\n  ])\n  bn_layer.set_weights([\n      np.array([1., 0.25]),\n      np.array([0.5, 1.0]),\n      np.array([0.5, 2.5]),\n      np.array([1.5, 1.])\n  ])\n  saved_weights = {}\n  saved_weights[conv_layer.name] = {}\n  add_bn_fusing_weights(conv_layer, bn_layer, saved_weights)\n\n  d = saved_weights[conv_layer.name]\n  assert d[\"enable_bn_fusing\"]\n  assert d[\"fused_bn_layer_name\"] == \"bn1\"\n  assert np.all(d[\"bn_inv\"] == np.array([0.8125, 0.25]))\n  assert np.all(d[\"fused_bias\"] == np.array([0.09375, 0.65625]))\n\n\ndef create_test_model_for_scale_freezing(bias_quantizer):\n  def _create_simple_model(bias_quantizer):\n    x = x_in = tf.keras.Input((4, 4, 1), name=\"input\")\n    x = QConv2D(\n        filters=4, kernel_size=2, strides=2,\n        kernel_quantizer=quantized_bits(4, 2, 1, alpha=\"auto_po2\"),\n        bias_quantizer=quantized_bits(4, 2, 1),\n        use_bias=False,\n        name=\"conv\")(x)\n    x = QDepthwiseConv2D(\n        kernel_size=2, strides=1,\n        depthwise_quantizer=quantized_bits(6, 3, 1, alpha=\"auto_po2\"),\n        use_bias=False,\n        bias_quantizer=quantized_bits(4, 2, 1),\n        name=\"dw_conv\")(x)\n    x = QBatchNormalization(\n        mean_quantizer=quantized_bits(4, 2, 1),\n        gamma_quantizer=None,\n        variance_quantizer=None,\n        beta_quantizer=quantized_bits(4, 0, 1),\n        inverse_quantizer=quantized_bits(8, 0, 1, alpha=\"auto_po2\"),\n        name=\"bn\")(x)\n\n    x = QActivation(activation=quantized_bits(4, 0), name=\"relu\")(x)\n    x = tf.keras.layers.Flatten(name=\"flatten\")(x)\n    x = QDense(units=2,\n               kernel_quantizer=quantized_bits(4, 2, 1, alpha=\"auto_po2\"),\n               bias_quantizer=bias_quantizer, name=\"dense\")(x)\n    model = tf.keras.Model(inputs=x_in, outputs=x)\n\n    return model\n\n  def _set_weights(model):\n    conv_w = [np.array(\n        [0.23, 2.76, 0.1, 0.33, 0.53, 0.16, 0.3, 1.7, -0.9,\n         1.43, 2.31, -0.2, -1.7, 0.39, -2.03, 1.79]).reshape(2, 2, 1, 4)]\n\n    dw_conv_w = [np.array([\n        0.03, 3.6, 2.1, 1.2, 0.13, 1.3, -0.3, 1.2, -0.7,\n        -10.3, 11.7, -0.92, -10.7, 0.59, -1.93, 2.8]).reshape((2, 2, 4, 1))]\n\n    bn_w = [np.array([0.28, 1.33, 2.27, 3.36]),\n            np.array([0.31, 0.1, 0.03, 4.26]),\n            np.array([0.89, -0.21, 1.97, 2.06]),\n            np.array([1.2, 0.9, 13.2, 10.9])]\n\n    dense_w = np.array(\n        [0.13, 0.66, 0.21, 0.23, 1.07, -0.79, 1.83, 1.81])\n    dense_w = [dense_w.reshape((4, 2)), np.array([-1.3, 0.7])]\n\n    model.get_layer(\"conv\").set_weights(conv_w)\n    model.get_layer(\"dw_conv\").set_weights(dw_conv_w)\n    model.get_layer(\"bn\").set_weights(bn_w)\n    model.get_layer(\"dense\").set_weights(dense_w)\n\n  orig_model = _create_simple_model(bias_quantizer)\n  _set_weights(orig_model)\n\n  return orig_model\n\n\ndef test_clone_model_and_freeze_auto_po2_scale():\n  \"\"\"Test clone_model_and_freeze_auto_po2_scale to work properly.\"\"\"\n\n  orig_model = create_test_model_for_scale_freezing(quantized_bits(4, 2, 1))\n  _, new_hw = clone_model_and_freeze_auto_po2_scale(\n      orig_model, quantize_model_weights=True)\n\n  # Check if the new model's weights and scales are derived properly.\n  np.testing.assert_array_equal(\n      new_hw[\"conv\"][\"weights\"][0],\n      np.array(\n          [[[[0.5, 6, 0, 0.5]], [[1, 0, 0.5, 3.5]]],\n           [[[-2., 3., 3.5, -0.5]], [[-3.5, 1., -3.5, 3.5]]]]))\n\n  np.testing.assert_array_equal(\n      new_hw[\"conv\"][\"scales\"][0], np.array([[[[0.25, 0.5, 0.25, 0.25]]]]))\n\n  np.testing.assert_array_equal(\n      new_hw[\"dw_conv\"][\"weights\"][0].numpy().flatten(),\n      np.array([\n          0., 14, 8, 4, 0, 6, -2, 4, -2, -42, 46, -4, -42, 2, -8, 12]))\n\n  np.testing.assert_array_equal(\n      new_hw[\"dense\"][\"scales\"][0], np.array([[0.25, 0.25]]))\n\n\ndef test_clone_model_and_freeze_auto_po2_scale_serialization():\n  # Test if the cloned model can be saved and loaded properly.\n  orig_model = create_test_model_for_scale_freezing(quantized_bits(4, 2, 1))\n  new_model, _ = clone_model_and_freeze_auto_po2_scale(\n      orig_model, quantize_model_weights=True)\n\n  fd, fname = tempfile.mkstemp(\".hdf5\")\n  new_model.save(fname)\n  _ = load_qmodel(fname)\n  os.close(fd)\n  os.remove(fname)\n\n\ndef test_clone_model_and_freeze_auto_po2_scale_error():\n  orig_model = create_test_model_for_scale_freezing(\n      quantized_bits(4, 2, 1, alpha=\"auto_po2\"))\n  # Test if the function raises an error when there are more than one\n  # auto_po2 quantizers in a layer.\n  with pytest.raises(ValueError):\n    clone_model_and_freeze_auto_po2_scale(\n        orig_model, quantize_model_weights=False)\n\n\nif __name__ == \"__main__\":\n  pytest.main([__file__])\n"
  }
]