Repository: google/qkeras
Branch: master
Commit: 5e0cd30c20b1
Files: 140
Total size: 1.4 MB

Directory structure:
gitextract_ead9uto5/

├── .github/
│   └── workflows/
│       └── ci.yml
├── CHANGELOG
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── examples/
│   ├── example_act.py
│   ├── example_b2t.py
│   ├── example_cifar10_po2.py
│   ├── example_keras_to_qkeras.py
│   ├── example_mnist.py
│   ├── example_mnist_ae.py
│   ├── example_mnist_b2t.py
│   ├── example_mnist_bn.py
│   ├── example_mnist_po2.py
│   ├── example_mnist_prune.py
│   ├── example_qdense.py
│   ├── example_qoctave.py
│   └── example_ternary.py
├── experimental/
│   └── lo/
│       ├── __init__.py
│       ├── compress.py
│       ├── conv2d.py
│       ├── dense.py
│       ├── generate_rf_code.py
│       ├── optimizer.py
│       ├── random_forest/
│       │   ├── __init__.py
│       │   ├── gen_random_tree.py
│       │   ├── parser.py
│       │   ├── random_forest.py
│       │   ├── random_tree.py
│       │   └── utils.py
│       ├── receptive.py
│       ├── table/
│       │   ├── __init__.py
│       │   ├── parser.py
│       │   └── utils.py
│       └── utils.py
├── notebook/
│   ├── AutoQKeras.ipynb
│   ├── CodebookQuantization.ipynb
│   ├── QKerasTutorial.ipynb
│   └── QRNNTutorial.ipynb
├── qkeras/
│   ├── __init__.py
│   ├── autoqkeras/
│   │   ├── __init__.py
│   │   ├── autoqkeras_internal.py
│   │   ├── examples/
│   │   │   └── run/
│   │   │       ├── get_data.py
│   │   │       ├── get_model.py
│   │   │       ├── networks/
│   │   │       │   ├── __init__.py
│   │   │       │   └── conv_block.py
│   │   │       └── plot_history.py
│   │   ├── forgiving_metrics/
│   │   │   ├── __init__.py
│   │   │   ├── forgiving_bits.py
│   │   │   ├── forgiving_energy.py
│   │   │   └── forgiving_factor.py
│   │   ├── quantization_config.py
│   │   ├── tests/
│   │   │   └── test_forgiving_factor.py
│   │   └── utils.py
│   ├── b2t.py
│   ├── base_quantizer.py
│   ├── bn_folding_utils.py
│   ├── callbacks.py
│   ├── codebook.py
│   ├── estimate.py
│   ├── experimental/
│   │   └── quantizers/
│   │       ├── __init__.py
│   │       └── quantizers_po2.py
│   ├── qconv2d_batchnorm.py
│   ├── qconvolutional.py
│   ├── qdepthwise_conv2d_transpose.py
│   ├── qdepthwiseconv2d_batchnorm.py
│   ├── qlayers.py
│   ├── qmac.py
│   ├── qmodel.proto
│   ├── qnormalization.py
│   ├── qoctave.py
│   ├── qpooling.py
│   ├── qrecurrent.py
│   ├── qseparable_conv2d_transpose.py
│   ├── qtools/
│   │   ├── DnC/
│   │   │   ├── divide_and_conquer.py
│   │   │   └── dnc_layer_cost_ace.py
│   │   ├── __init__.py
│   │   ├── config_public.py
│   │   ├── examples/
│   │   │   ├── example_generate_json.py
│   │   │   └── example_get_energy.py
│   │   ├── generate_layer_data_type_map.py
│   │   ├── interface.py
│   │   ├── qenergy/
│   │   │   ├── __init__.py
│   │   │   └── qenergy.py
│   │   ├── qgraph.py
│   │   ├── qtools_util.py
│   │   ├── quantized_operators/
│   │   │   ├── __init__.py
│   │   │   ├── accumulator_factory.py
│   │   │   ├── accumulator_impl.py
│   │   │   ├── adder_factory.py
│   │   │   ├── adder_impl.py
│   │   │   ├── divider_factory.py
│   │   │   ├── divider_impl.py
│   │   │   ├── fused_bn_factory.py
│   │   │   ├── merge_factory.py
│   │   │   ├── multiplier_factory.py
│   │   │   ├── multiplier_impl.py
│   │   │   ├── qbn_factory.py
│   │   │   ├── quantizer_factory.py
│   │   │   ├── quantizer_impl.py
│   │   │   └── subtractor_factory.py
│   │   ├── run_qtools.py
│   │   └── settings.py
│   ├── quantizer_imports.py
│   ├── quantizer_registry.py
│   ├── quantizers.py
│   ├── registry.py
│   ├── safe_eval.py
│   └── utils.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests/
    ├── automatic_conversion_test.py
    ├── autoqkeras_test.py
    ├── bn_folding_test.py
    ├── callbacks_test.py
    ├── codebook_test.py
    ├── leakyrelu_test.py
    ├── min_max_test.py
    ├── print_qstats_test.py
    ├── qactivation_test.py
    ├── qadaptiveactivation_test.py
    ├── qalpha_test.py
    ├── qconvolutional_test.py
    ├── qdepthwise_conv2d_transpose_test.py
    ├── qlayers_test.py
    ├── qmac_test.py
    ├── qnoise_test.py
    ├── qpooling_test.py
    ├── qrecurrent_test.py
    ├── qseparable_conv2d_transpose_test.py
    ├── qtools_model_test.py
    ├── qtools_util_test.py
    ├── quantizer_impl_test.py
    ├── quantizer_registry_test.py
    ├── range_test.py
    ├── registry_test.py
    ├── safe_eval_test.py
    └── utils_test.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci.yml
================================================
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: CI tests

on:
  push:
    branches: [ master ]
  pull_request:
    branches: [ master ]

jobs:
  build:

    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v2
    - name: Set up Python 3.7
      uses: actions/setup-python@v2
      with:
        python-version: 3.7
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
        pip install .
        python setup.py install 
    - name: Test with pytest
      run: |
        pytest


================================================
FILE: CHANGELOG
================================================
v0.5, 2019/07 -- Initial release.
v0.6, 2020/03 -- Support tensorflow 2.0, tf.keras and python3.
v0.7, 2020/03 -- Enhancemence of binary and ternary quantization.


================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute

We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.

## Contributor License Agreement

Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.

You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.

## Code reviews

All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.

## Community Guidelines

This project follows
[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).


================================================
FILE: LICENSE
================================================
Copyright 2019 The QKeras Authors.  All rights reserved.

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include *.txt
recursive-include docs *.txt


================================================
FILE: README.md
================================================
# QKeras

[github.com/google/qkeras](https://github.com/google/qkeras)

## Introduction

QKeras is a quantization extension to Keras that provides drop-in
replacement for some of the Keras layers, especially the ones that
creates parameters and activation layers, and perform arithmetic
operations, so that we can quickly create a deep quantized version of
Keras network.

According to Tensorflow documentation, Keras is a high-level API to
build and train deep learning models. It's used for fast prototyping,
advanced research, and production, with three key advantages:

- User friendly

Keras has a simple, consistent interface optimized for common use
cases. It provides clear and actionable feedback for user errors.

- Modular and composable

Keras models are made by connecting configurable building blocks
together, with few restrictions.

- Easy to extend

Write custom building blocks to express new ideas for research. Create
new layers, loss functions, and develop state-of-the-art models.

QKeras is being designed to extend the functionality of Keras using
Keras' design principle, i.e. being user friendly, modular and
extensible, adding to it being "minimally intrusive" of Keras native
functionality.

In order to successfully quantize a model, users need to replace
variable creating layers (Dense, Conv2D, etc) by their counterparts
(QDense, QConv2D, etc), and any layers that perform math operations
need to be quantized afterwards.

## Publications

- Claudionor N. Coelho Jr, Aki Kuusela, Shan Li, Hao Zhuang, Jennifer Ngadiuba, Thea Klaeboe Aarrestad, Vladimir Loncar, Maurizio Pierini, Adrian Alan Pol, Sioni Summers, "Automatic heterogeneous quantization of deep neural networks for low-latency inference on the edge for particle detectors", Nature Machine Intelligence (2021), https://www.nature.com/articles/s42256-021-00356-5

- Claudionor N. Coelho Jr., Aki Kuusela, Hao Zhuang, Thea Aarrestad, Vladimir Loncar, Jennifer Ngadiuba, Maurizio Pierini, Sioni Summers, "Ultra Low-latency, Low-area Inference Accelerators using Heterogeneous Deep Quantization with QKeras and hls4ml", http://arxiv.org/abs/2006.10159v1

- Erwei Wang, James J. Davis, Daniele Moro, Piotr Zielinski, Claudionor Coelho, Satrajit Chatterjee, Peter Y. K. Cheung, George A. Constantinides, "Enabling Binary Neural Network Training on the Edge", https://arxiv.org/abs/2102.04270

## Layers Implemented in QKeras

- QDense

- QConv1D

- QConv2D

- QDepthwiseConv2D

- QSeparableConv1D (depthwise + pointwise convolution, without
quantizing the activation values after the depthwise step)

- QSeparableConv2D (depthwise + pointwise convolution, without
quantizing the activation values after the depthwise step)

- QMobileNetSeparableConv2D (extended from MobileNet SeparableConv2D
implementation, quantizes the activation values after the depthwise step)

- QConv2DTranspose

- QActivation

- QAdaptiveActivation

- QAveragePooling2D (in fact, an AveragePooling2D stacked with a 
QActivation layer for quantization of the result)

- QBatchNormalization (is still in its experimental stage, as we
have not seen the need to use this yet due to the normalization 
and regularization effects of stochastic activation functions.)

- QOctaveConv2D

- QSimpleRNN, QSimpleRNNCell

- QLSTM, QLSTMCell

- QGRU, QGRUCell

- QBidirectional

It is worth noting that not all functionality is safe at this time to
be used with other high-level operations, such as with layer
wrappers. For example, Bidirectional layer wrappers are used with
RNNs.  If this is required, we encourage users to use quantization
functions invoked as strings instead of the actual functions as a way
through this, but we may change that implementation in the future.

A first attempt to create a safe mechanism in QKeras is the adoption
of QActivation is a wrap-up that provides an encapsulation around the
activation functions so that we can save and restore the network
architecture, and duplicate them using Keras interface, but this
interface has not been fully tested yet.

## Activation Layers Implemented in QKeras

- smooth_sigmoid(x)

- hard_sigmoid(x)

- binary_sigmoid(x)

- binary_tanh(x)

- smooth_tanh(x)

- hard_tanh(x)

- quantized_bits(bits=8, integer=0, symmetric=0, keep_negative=1)(x)

- bernoulli(alpha=1.0)(x)

- stochastic_ternary(alpha=1.0, threshold=0.33)(x)

- ternary(alpha=1.0, threshold=0.33)(x)

- stochastic_binary(alpha=1.0)(x)

- binary(alpha=1.0)(x)

- quantized_relu(bits=8, integer=0, use_sigmoid=0, negative_slope=0.0)(x)

- quantized_ulaw(bits=8, integer=0, symmetric=0, u=255.0)(x)

- quantized_tanh(bits=8, integer=0, symmetric=0)(x)

- quantized_po2(bits=8, max_value=-1)(x)

- quantized_relu_po2(bits=8, max_value=-1)(x)

The stochastic_* functions, bernoulli as well as quantized_relu and
quantized_tanh rely on stochastic versions of the activation
functions. They draw a random number with uniform distribution from
_hard_sigmoid of the input x, and result is based on the expected
value of the activation function. Please refer to the papers if you
want to understand the underlying theory, or the documentation in
qkeras/qlayers.py.

The parameters "bits" specify the number of bits for the quantization,
and "integer" specifies how many bits of "bits" are to the left of the
decimal point. Finally, our experience in training networks with
QSeparableConv2D, both quantized_bits and quantized_tanh that
generates values between [-1, 1), required symmetric versions of the
range in order to properly converge and eliminate the bias.

Every time we use a quantization for weights and bias that can
generate numbers outside the range [-1.0, 1.0], we need to adjust the
*_range to the number. For example, if we have a
quantized_bits(bits=6, integer=2) in a weight of a layer, we need to
set the weight range to 2**2, which is equivalent to Catapult HLS
ac_fixed<6, 3, true>. Similarly, for quantization functions that accept an 
alpha parameter, we need to specify a range of alpha,
and for po2 type of quantizers, we need to specify the range of
max_value.


### Example

Suppose you have the following network.

An example of a very simple network is given below in Keras.


```python
from keras.layers import *

x = x_in = Input(shape)
x = Conv2D(18, (3, 3), name="first_conv2d")(x)
x = Activation("relu")(x)
x = SeparableConv2D(32, (3, 3))(x)
x = Activation("relu")(x)
x = Flatten()(x)
x = Dense(NB_CLASSES)(x)
x = Activation("softmax")(x)
```

You can easily quantize this network as follows:

```python
from keras.layers import *
from qkeras import *

x = x_in = Input(shape)
x = QConv2D(18, (3, 3),
        kernel_quantizer="stochastic_ternary",
        bias_quantizer="ternary", name="first_conv2d")(x)
x = QActivation("quantized_relu(3)")(x)
x = QSeparableConv2D(32, (3, 3),
        depthwise_quantizer=quantized_bits(4, 0, 1),
        pointwise_quantizer=quantized_bits(3, 0, 1),
        bias_quantizer=quantized_bits(3),
        depthwise_activation=quantized_tanh(6, 2, 1))(x)
x = QActivation("quantized_relu(3)")(x)
x = Flatten()(x)
x = QDense(NB_CLASSES,
        kernel_quantizer=quantized_bits(3),
        bias_quantizer=quantized_bits(3))(x)
x = QActivation("quantized_bits(20, 5)")(x)
x = Activation("softmax")(x)
```

The last QActivation is advisable if you want to compare results later on. 
Please find more cases under the directory examples.


## QTools
The purpose of QTools is to assist hardware implementation of the quantized
model and model energy consumption estimation. QTools has two functions: data
type map generation and energy consumption estimation.

- Data Type Map Generation:
QTools automatically generate the data type map for weights, bias, multiplier,
adder, etc. of each layer. The data type map includes operation type,
variable size, quantizer type and bits, etc. Input of the QTools is:
1) a given quantized model;
2) a list of input quantizers
for the model. Output of QTools json file that list the data type map of each
layer (stored in qtools_instance._output_dict)
Output methods include: qtools_stats_to_json, which is to output the data type
map to a json file; qtools_stats_print which is to print out the data type map.

- Energy Consumption Estimation:
Another function of QTools is to estimate the model energy consumption in
Pico Joules (pJ). It provides a tool for QKeras users to quickly estimate
energy consumption for memory access and MAC operations in a quantized model
derived from QKeras, especially when comparing power consumption of two models
running on the same device.

As with any high-level model, it should be used with caution when attempting
to estimate the absolute energy consumption of a model for a given technology,
or when attempting to compare different technologies.

This tool also provides a measure for model tuning which needs to consider
both accuracy and model energy consumption. The energy cost provided by this
tool can be integrated into a total loss function which combines energy
cost and accuracy.

- Energy Model:
The best work referenced by the literature on energy consumption was first
computed by Horowitz M.: “1.1 computing’s energy problem (
and what we can do about it)”; IEEE International Solid-State Circuits
Conference Digest of Technical Papers (ISSCC), 2014

In this work, the author attempted to estimate the energy
consumption for accelerators, and for 45 nm process, the data points he
presented has since been used whenever someone wants to compare accelerator
performance. QTools energy consumption on a 45nm process is based on the
data published in this work.

- Examples:
Example of how to generate data type map can be found in qkeras/qtools/
examples/example_generate_json.py. Example of how to generate energy consumption
estimation can be found in qkeras/qtools/examples/example_get_energy.py


## AutoQKeras

AutoQKeras allows the automatic quantization and rebalancing of deep neural
networks by treating quantization and rebalancing of an existing deep neural
network as a hyperparameter search in Keras-Tuner using random search,
hyperband or gaussian processes.

In order to contain the explosion of hyperparameters, users can group tasks by
patterns, and perform distribute training using available resources.

Extensive documentation is present in notebook/AutoQKeras.ipynb.


## Related Work

QKeras has been implemented based on the work of "B.Moons et al. -
Minimum Energy Quantized Neural Networks", Asilomar Conference on
Signals, Systems and Computers, 2017 and "Zhou, S. et al. -
DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with
Low Bitwidth Gradients," but the framework should be easily
extensible. The original code from QNN can be found below.

https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow

QKeras extends QNN by providing a richer set of layers (including
SeparableConv2D, DepthwiseConv2D, ternary and stochastic ternary
quantizations), besides some functions to aid the estimation for the
accumulators and conversion between non-quantized to quantized
networks. Finally, our main goal is easy of use, so we attempt to make
QKeras layers a true drop-in replacement for Keras, so that users can
easily exchange non-quantized layers by quantized ones.

### Acknowledgements

Portions of QKeras were derived from QNN.

https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow

Copyright (c) 2017, Bert Moons where it applies


================================================
FILE: examples/example_act.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Example the usage of activation functions in qkeras."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import warnings
import numpy as np

import tensorflow as tf
import tensorflow.keras.backend as K

from qkeras import binary
from qkeras import bernoulli
from qkeras import hard_sigmoid
from qkeras import hard_tanh
from qkeras import quantized_bits
from qkeras import quantized_relu
from qkeras import quantized_tanh
from qkeras import quantized_po2
from qkeras import quantized_relu_po2
from qkeras import set_internal_sigmoid
from qkeras import smooth_sigmoid
from qkeras import smooth_tanh
from qkeras import stochastic_binary
from qkeras import stochastic_ternary
from qkeras import ternary


def main():
  # check the mean value of samples from stochastic_rounding for po2
  np.random.seed(42)
  count = 100000
  val = 42
  a = K.constant([val] * count)
  b = quantized_po2(use_stochastic_rounding=True)(a)
  res = np.sum(K.eval(b)) / count
  print(res, "should be close to ", val)
  b = quantized_relu_po2(use_stochastic_rounding=True)(a)
  res = np.sum(K.eval(b)) / count
  print(res, "should be close to ", val)
  a = K.constant([-1] * count)
  b = quantized_relu_po2(use_stochastic_rounding=True)(a)
  res = np.sum(K.eval(b)) / count
  print(res, "should be all ", 0)

  # non-stochastic rounding quantizer.
  a = K.constant([-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0])
  a = K.constant([0.194336])
  print(" a =", K.eval(a).astype(np.float16))
  print("qa =", K.eval(quantized_relu(6,2)(a)).astype(np.float16))
  print("ss =", K.eval(smooth_sigmoid(a)).astype(np.float16))
  print("hs =", K.eval(hard_sigmoid(a)).astype(np.float16))
  print("ht =", K.eval(hard_tanh(a)).astype(np.float16))
  print("st =", K.eval(smooth_tanh(a)).astype(np.float16))
  c = K.constant(np.arange(-1.5, 1.51, 0.3))
  print(" c =", K.eval(c).astype(np.float16))
  print("qb_111 =", K.eval(quantized_bits(1,1,1)(c)).astype(np.float16))
  print("qb_210 =", K.eval(quantized_bits(2,1,0)(c)).astype(np.float16))
  print("qb_211 =", K.eval(quantized_bits(2,1,1)(c)).astype(np.float16))
  print("qb_300 =", K.eval(quantized_bits(3,0,0)(c)).astype(np.float16))
  print("qb_301 =", K.eval(quantized_bits(3,0,1)(c)).astype(np.float16))
  c_1000 = K.constant(np.array([list(K.eval(c))] * 1000))
  b = np.sum(K.eval(bernoulli()(c_1000)).astype(np.int32), axis=0) / 1000.0
  print("       hs =", K.eval(hard_sigmoid(c)).astype(np.float16))
  print("    b_all =", b.astype(np.float16))
  T = 0.0
  t = K.eval(stochastic_ternary(alpha="auto")(c_1000))
  for i in range(10):
    print("stochastic_ternary({}) =".format(i), t[i])
  print("   st_all =", np.round(
      np.sum(t.astype(np.float32), axis=0).astype(np.float16) /
      1000.0, 2).astype(np.float16))
  print("  ternary =", K.eval(ternary(threshold=0.5)(c)).astype(np.int32))
  c = K.constant(np.arange(-1.5, 1.51, 0.3))
  print(" c =", K.eval(c).astype(np.float16))
  print(" b_10 =", K.eval(binary(1)(c)).astype(np.float16))
  print("qr_10 =", K.eval(quantized_relu(1,0)(c)).astype(np.float16))
  print("qr_11 =", K.eval(quantized_relu(1,1)(c)).astype(np.float16))
  print("qr_20 =", K.eval(quantized_relu(2,0)(c)).astype(np.float16))
  print("qr_21 =", K.eval(quantized_relu(2,1)(c)).astype(np.float16))
  print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16))
  print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16))
  print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16))
  print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16))
  print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16))
  print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16))
  print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16))
  print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16))
  set_internal_sigmoid("smooth"); print("with smooth sigmoid")
  print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16))
  print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16))
  print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16))
  print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16))
  print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16))
  print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16))
  print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16))
  print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16))
  set_internal_sigmoid("real"); print("with real sigmoid")
  print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16))
  print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16))
  print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16))
  print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16))
  print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16))
  print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16))
  print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16))
  print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16))
  set_internal_sigmoid("hard")
  print(" c =", K.eval(c).astype(np.float16))
  print("q2_31 =", K.eval(quantized_po2(3,1)(c)).astype(np.float16))
  print("q2_32 =", K.eval(quantized_po2(3,2)(c)).astype(np.float16))
  print("qr2_21 =", K.eval(quantized_relu_po2(2,1)(c)).astype(np.float16))
  print("qr2_22 =", K.eval(quantized_relu_po2(2,2)(c)).astype(np.float16))
  print("qr2_44 =", K.eval(quantized_relu_po2(4,1)(c)).astype(np.float16))

  # stochastic rounding
  c = K.constant(np.arange(-1.5, 1.51, 0.3))
  print("q2_32_2 =", K.eval(quantized_relu_po2(32,2)(c)).astype(np.float16))
  b = K.eval(stochastic_binary()(c_1000)).astype(np.int32)
  for i in range(5):
    print("sbinary({}) =".format(i), b[i])
  print("sbinary =", np.round(np.sum(b, axis=0) / 1000.0, 2).astype(np.float16))
  print(" binary =", K.eval(binary()(c)).astype(np.int32))
  print(" c      =", K.eval(c).astype(np.float16))
  for i in range(10):
    print(" s_bin({}) =".format(i),
          K.eval(binary(use_stochastic_rounding=1)(c)).astype(np.int32))
  for i in range(10):
    print(" s_po2({}) =".format(i),
          K.eval(quantized_po2(use_stochastic_rounding=1)(c)).astype(np.int32))
  for i in range(10):
    print(
        " s_relu_po2({}) =".format(i),
        K.eval(quantized_relu_po2(use_stochastic_rounding=1)(c)).astype(
            np.int32))


if __name__ == '__main__':
  main()


================================================
FILE: examples/example_b2t.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements total/partial Binary to Thermometer decoder."""

import numpy as np
from qkeras import BinaryToThermometer

if __name__ == "__main__":
  np.random.seed(42)
  x = np.array(range(8))
  b = BinaryToThermometer(x, 2, 8)
  print(b)
  b = BinaryToThermometer(x, 2, 8, 1)
  print(b)
  b = BinaryToThermometer(x, 2, 8, 1, use_two_hot_encoding=1)
  print(b)
  b = BinaryToThermometer(x, 4, 8)
  print(b)
  b = BinaryToThermometer(x, 4, 8, 1)
  print(b)
  b = BinaryToThermometer(x, 4, 8, 1, use_two_hot_encoding=1)
  print(b)
  x = np.random.randint(0, 255, (100, 28, 28, 1))
  print(x[0, 0, 0:5])
  b = BinaryToThermometer(x, 8, 256, 0)
  print(x.shape, b.shape)
  print(b[0, 0, 0:5])
  b = BinaryToThermometer(x, 8, 256, 1)
  print(b[0, 0, 0:5])
  x = np.random.randint(0, 255, (100, 28, 28, 2))
  b = BinaryToThermometer(x, 8, 256, 0, 1)
  print(x.shape, b.shape)
  print(x[0, 0, 0, 0:2])
  print(b[0, 0, 0, 0:8])
  print(b[0, 0, 0, 8:16])


================================================
FILE: examples/example_cifar10_po2.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests qcore model with po2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from collections import defaultdict

import tensorflow.keras.backend as K
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import to_categorical
import numpy as np

from qkeras import *

np.random.seed(42)

NB_EPOCH = 50
BATCH_SIZE = 64
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam(lr=0.0001)
VALIDATION_SPLIT = 0.1

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

x_train /= 255.0
x_test /= 255.0

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

print(y_train[0:10])

y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)

x = x_in = Input(x_train.shape[1:], name="input")
x = QActivation("quantized_relu_po2(4,4)", name="acti")(x)
x = QConv2D(
    128, (3, 3),
    strides=1,
    kernel_quantizer=quantized_po2(4, 1),
    bias_quantizer=quantized_po2(4, 4),
    bias_range=4,
    name="conv2d_0_m")(
        x)
x = QActivation("ternary()", name="act0_m")(x)
x = MaxPooling2D(2, 2, name="mp_0")(x)
x = QConv2D(
    256, (3, 3),
    strides=1,
    kernel_quantizer=quantized_po2(4, 1),
    bias_quantizer=quantized_po2(4, 4),
    bias_range=4,
    name="conv2d_1_m")(
        x)
x = QActivation("quantized_relu(6,2)", name="act1_m")(x)
x = MaxPooling2D(2, 2, name="mp_1")(x)
x = QConv2D(
    128, (3, 3),
    strides=1,
    kernel_quantizer=quantized_bits(4, 0, 1),
    bias_quantizer=quantized_bits(4, 0, 1),
    name="conv2d_2_m")(
        x)
x = QActivation("quantized_relu(4,2)", name="act2_m")(x)
x = MaxPooling2D(2, 2, name="mp_2")(x)
x = Flatten()(x)
x = QDense(
    NB_CLASSES,
    kernel_quantizer=quantized_ulaw(4, 0, 1),
    bias_quantizer=quantized_bits(4, 0, 1),
    name="dense")(
        x)
x = Activation("softmax", name="softmax")(x)

model = Model(inputs=[x_in], outputs=[x])
model.summary()

model.compile(
    loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])

if int(os.environ.get("TRAIN", 0)):

  history = model.fit(
      x_train, y_train, batch_size=BATCH_SIZE,
      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,
      validation_split=VALIDATION_SPLIT)

  outputs = []
  output_names = []

  for layer in model.layers:
    if layer.__class__.__name__ in [
        "QActivation", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D"
    ]:
      output_names.append(layer.name)
      outputs.append(layer.output)

  model_debug = Model(inputs=[x_in], outputs=outputs)

  outputs = model_debug.predict(x_train)

  print("{:30} {: 8.4f} {: 8.4f}".format(
      "input", np.min(x_train), np.max(x_train)))

  for n, p in zip(output_names, outputs):
    print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="")
    layer = model.get_layer(n)
    for i, weights in enumerate(layer.get_weights()):
      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))
      print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)),
            end="")
      print("")

  score = model.evaluate(x_test, y_test, verbose=VERBOSE)
  print("Test score:", score[0])
  print("Test accuracy:", score[1])

model.summary()

print_qstats(model)


================================================
FILE: examples/example_keras_to_qkeras.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests automatic conversion of keras model to qkeras."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from collections import defaultdict

from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras.utils import quantized_model_dump

x0 = x_in0 = Input((28, 28, 1), name="input0")
x1 = x_in1 = Input((28, 28, 1), name="input1")
x = Concatenate(name="concat")([x0, x1])
x = Conv2D(128, (3, 3), strides=1, name="conv2d_0_m")(x)
x = Activation("relu", name="act0_m")(x)
x = MaxPooling2D(2, 2, name="mp_0")(x)
x = Conv2D(256, (3, 3), strides=1, name="conv2d_1_m")(x)
x = Activation("relu", name="act1_m")(x)
x = MaxPooling2D(2, 2, name="mp_1")(x)
x = Conv2D(128, (3, 3), strides=1, name="conv2d_2_m")(x)
x = Activation("relu", name="act2_m")(x)
x = MaxPooling2D(2, 2, name="mp_2")(x)
x = Flatten()(x)
x = Dense(10, name="dense")(x)
x = Activation("softmax", name="softmax")(x)

model = Model(inputs=[x_in0, x_in1], outputs=[x])
model.summary()

q_dict = {
    "conv2d_0_m": {
        "kernel_quantizer": "binary()",
        "bias_quantizer": "quantized_bits(4,0,1)"
    },
    "conv2d_1_m": {
        "kernel_quantizer": "ternary()",
        "bias_quantizer": "quantized_bits(4,0,1)"
    },
    "act2_m": "quantized_relu(6,2)",
    "QActivation": {
        "relu": "quantized_relu(4,0)"
    },
    "QConv2D": {
        "kernel_quantizer": "quantized_bits(4,0,1)",
        "bias_quantizer": "quantized_bits(4,0,1)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_bits(3,0,1)",
        "bias_quantizer": "quantized_bits(3,0,1)"
    }
}

qmodel = model_quantize(model, q_dict, 4)

qmodel.summary()

print_qstats(qmodel)

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_test_arr = [x_test[0:10,:], x_test[0:10,:]]

quantized_model_dump(
    qmodel, x_test_arr,
    layers_to_dump=["input0", "input1", "act2_m", "act1_m", "act0_m"])


================================================
FILE: examples/example_mnist.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""uses po2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from collections import defaultdict

import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical

from qkeras import *
from qkeras.utils import model_save_quantized_weights


import numpy as np
import tensorflow.compat.v1 as tf

np.random.seed(42)

NB_EPOCH = 100
BATCH_SIZE = 64
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam(lr=0.0001, decay=0.000025)
VALIDATION_SPLIT = 0.1

train = 1

(x_train, y_train), (x_test, y_test) = mnist.load_data()

RESHAPED = 784

x_test_orig = x_test

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

x_train /= 256.0
x_test /= 256.0

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

print(y_train[0:10])

y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)

x = x_in = Input(
    x_train.shape[1:-1] + (1,), name="input")
x = QConv2D(
    32, (2, 2), strides=(2,2),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1),
    name="conv2d_0_m")(x)
x = QActivation("quantized_relu(4,0)", name="act0_m")(x)
x = QConv2D(
    64, (3, 3), strides=(2,2),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1),
    name="conv2d_1_m")(x)
x = QActivation("quantized_relu(4,0)", name="act1_m")(x)
x = QConv2D(
    64, (2, 2), strides=(2,2),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1),
    name="conv2d_2_m")(x)
x = QActivation("quantized_relu(4,0)", name="act2_m")(x)
x = Flatten()(x)
x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4,0,1),
           bias_quantizer=quantized_bits(4,0,1),
           name="dense")(x)
x_out = x
x = Activation("softmax", name="softmax")(x)

model = Model(inputs=[x_in], outputs=[x])
mo = Model(inputs=[x_in], outputs=[x_out])
model.summary()

model.compile(
    loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])

if train:

  history = model.fit(
      x_train, y_train, batch_size=BATCH_SIZE,
      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,
      validation_split=VALIDATION_SPLIT)

  outputs = []
  output_names = []

  for layer in model.layers:
    if layer.__class__.__name__ in ["QActivation", "Activation",
                                  "QDense", "QConv2D", "QDepthwiseConv2D"]:
      output_names.append(layer.name)
      outputs.append(layer.output)

  model_debug = Model(inputs=[x_in], outputs=outputs)

  outputs = model_debug.predict(x_train)

  print("{:30} {: 8.4f} {: 8.4f}".format(
      "input", np.min(x_train), np.max(x_train)))

  for n, p in zip(output_names, outputs):
    print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="")
    layer = model.get_layer(n)
    for i, weights in enumerate(layer.get_weights()):
      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))
      print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)),
            end="")
      print("")

  p_test = mo.predict(x_test)
  p_test.tofile("p_test.bin")

  score = model.evaluate(x_test, y_test, verbose=VERBOSE)
  print("Test score:", score[0])
  print("Test accuracy:", score[1])

  all_weights = []
  model_save_quantized_weights(model)

  for layer in model.layers:
    for w, weights in enumerate(layer.get_weights()):
      print(layer.name, w)
      all_weights.append(weights.flatten())

  all_weights = np.concatenate(all_weights).astype(np.float32)
  print(all_weights.size)


for layer in model.layers:
  for w, weight in enumerate(layer.get_weights()):
    print(layer.name, w, weight.shape)

print_qstats(model)


================================================
FILE: examples/example_mnist_ae.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""uses po2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from collections import defaultdict

import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical

from qkeras import *
from qkeras.utils import model_save_quantized_weights


import numpy as np
import tensorflow.compat.v1 as tf

np.random.seed(42)

NB_EPOCH = 100
BATCH_SIZE = 64
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam(lr=0.0001, decay=0.000025)
VALIDATION_SPLIT = 0.1

train = 1

(x_train, y_train), (x_test, y_test) = mnist.load_data()

RESHAPED = 784

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

x_train /= 256.0
x_test /= 256.0

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

print(y_train[0:10])

y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)

x = x_in = Input(
    x_train.shape[1:-1] + (1,))
x = QConv2D(
    32,
    kernel_size=(3, 3),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x = QActivation("quantized_relu(4,0)")(x)
x = QConv2D(
    16,
    kernel_size=(3, 3),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x = QActivation("quantized_relu(4,0)")(x)
x = QConv2D(
    8,
    kernel_size=(3, 3),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x = QActivation("quantized_relu(4,0)")(x)
x = QConv2DTranspose(
    8,
    kernel_size=(3, 3),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x = QActivation("quantized_relu(4,0)")(x)
x = QConv2DTranspose(
    16,
    kernel_size=(3, 3),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x = QActivation("quantized_relu(4,0)")(x)
x = QConv2DTranspose(
    32,
    kernel_size=(3, 3),
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x = QActivation("quantized_relu(4,0)")(x)
x = QConv2D(
    1,
    kernel_size=(3, 3),
    padding="same",
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(4,0,1))(x)
x_out = x
x = Activation("sigmoid")(x)

model = Model(inputs=[x_in], outputs=[x])
mo = Model(inputs=[x_in], outputs=[x_out])
model.summary()

model.compile(
    loss="binary_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])

if train:

  history = model.fit(
      x_train, x_train, batch_size=BATCH_SIZE,
      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,
      validation_split=VALIDATION_SPLIT)

  # Generate reconstructions
  num_reco = 8
  samples = x_test[:num_reco]
  targets = y_test[:num_reco]
  reconstructions = model.predict(samples)


for layer in model.layers:
  for w, weight in enumerate(layer.get_weights()):
    print(layer.name, w, weight.shape)

print_qstats(model)


================================================
FILE: examples/example_mnist_b2t.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests qcore model with BinaryToThermometer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
import numpy as np

from qkeras import *

np.random.seed(42)

NB_EPOCH = 20
BATCH_SIZE = 32
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam(lr=0.0001)
N_HIDDEN = 100
VALIDATION_SPLIT = 0.1

T_CLASSES = 256
T_WITH_RESIDUE = 0

(x_train, y_train), (x_test, y_test) = mnist.load_data()

RESHAPED = 784

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

if T_CLASSES == 1:
  x_train /= 256.0
  x_test /= 256.0

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

print(y_train[0:10])

# x_train = x_train[0:1000]
# y_train = y_train[0:1000]
# x_test = x_test[0:100]
# y_test = y_test[0:100]

y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)

# we ran out of memory here, so we split x_train/x_test into smaller groups

x = x_in = Input(
    x_train.shape[1:-1] + (T_CLASSES,), name="input")

# Number is represented as 1.bbb, where number of bits of bbb is
# log2(256/T_CLASSES) if T_WITH_RESIDUE == 1

bits = (
    (T_WITH_RESIDUE == 1) * int(np.ceil(np.log2(256/T_CLASSES))) +
    (T_CLASSES > 1)
)

print("Input quantizer: quantized_relu({},{})".format(bits, int(T_CLASSES > 1)))
x = QActivation("quantized_relu({},{})".format(bits, int(T_CLASSES > 1)))(x)
x = QConv2D(
    64, (3, 3), strides=1, padding="same",
    kernel_quantizer=quantized_po2(4,1),
    bias_quantizer=quantized_bits(4,2,1),
    bias_range=4,
    name="conv2d_0_m")(x)
x = QActivation("quantized_relu(4,0)", name="act0_m")(x)
x = MaxPooling2D(2,2,name="mp_0")(x)
x = QConv2D(
    32, (3, 3), strides=1, padding="same",
    kernel_quantizer=stochastic_ternary(),
    bias_quantizer=quantized_bits(8,5,1),
    bias_range=32,
    name="conv2d_1_m")(x)
x = QActivation("quantized_relu(4,0)", name="act1_m")(x)
x = MaxPooling2D(2,2,name="mp_1")(x)
x = QConv2D(
    16, (3, 3), strides=1, padding="same",
    kernel_quantizer=quantized_bits(4,0,1),
    bias_quantizer=quantized_bits(8,5,1),
    bias_range=32,
    name="conv2d_2_m")(x)
x = QActivation("quantized_relu(6,2)", name="act2_m")(x)
x = MaxPooling2D(2,2,name="mp_2")(x)
x = Flatten()(x)
x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4,0,1),
           bias_quantizer=quantized_bits(4,0,1),
           name="dense2")(x)
x = Activation("softmax", name="softmax")(x)

model = Model(inputs=[x_in], outputs=[x])
model.summary()

model.compile(
    loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])

outputs = []
output_names = []

for layer in model.layers:
  if layer.__class__.__name__ in ["QActivation", "Activation",
                                  "QDense", "QConv2D", "QDepthwiseConv2D"]:
    output_names.append(layer.name)
    outputs.append(layer.output)

model_debug = Model(inputs=[x_in], outputs=outputs)

batch_size = 1000 * BATCH_SIZE
n_batches = x_train.shape[0] // batch_size

if T_CLASSES > 1:
  x_test = BinaryToThermometer(x_test, T_CLASSES, 256, T_WITH_RESIDUE)

if int(os.environ.get("TRAIN", 0)):

  for i in range(NB_EPOCH):
    for b in range(n_batches):

      min_b = b * batch_size
      max_b = (b + 1) * batch_size
      if max_b > x_train.shape[0]:
        max_b = x_train.shape[0]

      if T_CLASSES > 1:
        x = BinaryToThermometer(
            x_train[min_b:max_b], T_CLASSES, 256, T_WITH_RESIDUE)
      else:
        x = x_train[min_b:max_b]

      history = model.fit(
          x, y_train[min_b:max_b], batch_size=BATCH_SIZE,
          epochs=i+1, initial_epoch=i, verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

  if T_CLASSES > 1:
    x = BinaryToThermometer(x_train[0:100], T_CLASSES, 256, T_WITH_RESIDUE)
  else:
    x = x_train[0:100]

  outputs = model_debug.predict(x)

  print("{:30} {: 8.4f} {: 8.4f}".format("input", np.min(x), np.max(x)))
  for n, p in zip(output_names, outputs):
    print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="")
    layer = model.get_layer(n)
    for i, weights in enumerate(layer.get_weights()):
      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))
      print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)),
            end="")
    print("")

  score = model.evaluate(x_test, y_test, verbose=VERBOSE)
  print("Test score:", score[0])
  print("Test accuracy:", score[1])

print_qstats(model)

acc = analyze_accumulator_from_sample(model, x_test, mode="sampled")

print(acc)


================================================
FILE: examples/example_mnist_bn.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests mnist batchnormalization used as learned scale factor."""

# to run, THRESHOLD=0.05 WITH_BN=1 EPOCHS=5 TRAIN=1 python example_mnist_bn.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from collections import defaultdict
import os

import numpy as np
from six.moves import zip
from tensorflow.keras import callbacks
import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import to_categorical

from qkeras import *

np.random.seed(42)

TRAIN = 1
NB_EPOCH = 2
BATCH_SIZE = 64
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam(lr=0.0001)
VALIDATION_SPLIT = 0.1
WITH_BN = 1
THRESHOLD = 0.1


class LearningRateAdjuster(callbacks.Callback):
  def __init__(self):
    self.learning_rate_factor = 1.0
    pass

  def on_epoch_end(self, epochs, logs):
    max_variance = -1

    for layer in self.model.layers:
      if layer.__class__.__name__ in [
          "BatchNormalization",
          "QBatchNormalization"
      ]:
        variance = np.max(layer.get_weights()[-1])
        if variance > max_variance:
          max_variance = variance

    if max_variance > 32 and self.learning_rate_factor < 100:
      learning_rate = K.get_value(self.model.optimizer.learning_rate)
      self.learning_rate_factor /= 2.0
      print("***** max_variance is {} / lr is {} *****".format(
          max_variance, learning_rate))
      K.eval(K.update(
          self.model.optimizer.learning_rate, learning_rate / 2.0
      ))

lra = LearningRateAdjuster()

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape + (1,)).astype("float32")
x_test = x_test.reshape(x_test.shape + (1,)).astype("float32")

x_train /= 256.0
x_test /= 256.0

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

print(y_train[0:10])

y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)

x = x_in = Input(x_train.shape[1:], name="input")
#x = QActivation("quantized_relu_po2(4,1)", name="acti")(x)
x = QConv2D(
    128, (3, 3),
    strides=1,
    kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_po2(4, 1),
    bias_quantizer=quantized_bits(4,2,0) if not WITH_BN else None,
    bias_range=4 if not WITH_BN else None,
    use_bias=not WITH_BN,
    name="conv2d_0_m")(x)
if WITH_BN:
  x = QBatchNormalization(
      gamma_quantizer=quantized_relu_po2(4,8),
      variance_quantizer=quantized_relu_po2(6),
      beta_quantizer=quantized_po2(4, 4),
      gamma_range=8,
      beta_range=4,
      name="bn0")(x)
x = QActivation("quantized_relu(3,1)", name="act0_m")(x)
x = MaxPooling2D(2, 2, name="mp_0")(x)
x = QConv2D(
    256, (3, 3),
    strides=1,
    kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_bits(2,0,1),
    bias_quantizer=quantized_bits(4,2,1) if not WITH_BN else None,
    bias_range=4 if not WITH_BN else None,
    use_bias=not WITH_BN,
    name="conv2d_1_m")(x)
if WITH_BN:
  x = QBatchNormalization(
      gamma_quantizer=quantized_relu_po2(4,8),
      variance_quantizer=quantized_relu_po2(6),
      beta_quantizer=quantized_po2(4, 4),
      gamma_range=8,
      beta_range=4,
      name="bn1")(x)
x = QActivation("quantized_relu(3,1)", name="act1_m")(x)
x = MaxPooling2D(2, 2, name="mp_1")(x)
x = QConv2D(
    128, (3, 3),
    strides=1,
    kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_bits(2,0,1),
    bias_quantizer=quantized_bits(4,2,1) if not WITH_BN else None,
    bias_range=4 if not WITH_BN else None,
    use_bias=not WITH_BN,
    name="conv2d_2_m")(x)
if WITH_BN:
  x = QBatchNormalization(
      gamma_quantizer=quantized_relu_po2(4,8),
      variance_quantizer=quantized_relu_po2(6),
      beta_quantizer=quantized_po2(4, 4),
      gamma_range=8,
      beta_range=4,
      name="bn2")(x)
x = QActivation("quantized_relu(3,1)", name="act2_m")(x)
x = MaxPooling2D(2, 2, name="mp_2")(x)
x = Flatten()(x)
x = QDense(
    NB_CLASSES,
    kernel_quantizer=quantized_ulaw(4, 0, 1),
    bias_quantizer=quantized_bits(4, 0, 1),
    name="dense")(
        x)
x = Activation("softmax", name="softmax")(x)

model = Model(inputs=[x_in], outputs=[x])
model.summary()

model.compile(
    loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])


if TRAIN:
  history = model.fit(
      x_train, y_train, batch_size=BATCH_SIZE,
      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,
      validation_split=VALIDATION_SPLIT,
      callbacks=[]) #lra])

  outputs = []
  output_names = []

  for layer in model.layers:
    if layer.__class__.__name__ in [
        "QActivation", "QBatchNormalization", "Activation", "QDense",
        "QConv2D", "QDepthwiseConv2D"
    ]:
      output_names.append(layer.name)
      outputs.append(layer.output)

  model_debug = Model(inputs=[x_in], outputs=outputs)

  outputs = model_debug.predict(x_train)

  print("{:30} {: 8.4f} {: 8.4f}".format(
      "input", np.min(x_train), np.max(x_train)))

  for n, p in zip(output_names, outputs):
    print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="")
    layer = model.get_layer(n)
    for i, weights in enumerate(layer.get_weights()):
      if layer.get_quantizers()[i]:
        weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))
      print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)),
            end="")
    print("")

  score = model.evaluate(x_test, y_test, verbose=False)
  print("Test score:", score[0])
  print("Test accuracy:", score[1])

print_qstats(model)


================================================
FILE: examples/example_mnist_po2.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests qlayers model with po2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import numpy as np

from qkeras import *   # pylint: disable=wildcard-import

np.random.seed(42)

NB_EPOCH = 5
BATCH_SIZE = 64
VERBOSE = 1
NB_CLASSES = 10
OPTIMIZER = Adam(lr=0.0001, decay=0.000025)
N_HIDDEN = 100
VALIDATION_SPLIT = 0.1

QUANTIZED = 1
CONV2D = 1

(x_train, y_train), (x_test, y_test) = mnist.load_data()

RESHAPED = 784

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

x_train /= 256.0
x_test /= 256.0

train = False

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

print(y_train[0:10])

y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)

# we ran out of memory here, so we split x_train/x_test into smaller groups

x = x_in = Input(x_train.shape[1:-1] + (1,), name="input")
x = QActivation("quantized_relu_po2(4)", name="acti")(x)
x = QConv2D(
    32, (2, 2),
    strides=(2, 2),
    kernel_quantizer=quantized_po2(4, 1),
    bias_quantizer=quantized_po2(4, 1),
    name="conv2d_0_m")(
        x)
x = QActivation("quantized_relu_po2(4,4)", name="act0_m")(x)
x = QConv2D(
    64, (3, 3),
    strides=(2, 2),
    kernel_quantizer=quantized_po2(4, 1),
    bias_quantizer=quantized_po2(4, 1),
    name="conv2d_1_m")(
        x)
x = QActivation("quantized_relu_po2(4,4,use_stochastic_rounding=True)",
                name="act1_m")(x)
x = QConv2D(
    64, (2, 2),
    strides=(2, 2),
    kernel_quantizer=quantized_po2(4, 1, use_stochastic_rounding=True),
    bias_quantizer=quantized_po2(4, 1),
    name="conv2d_2_m")(
        x)
x = QActivation("quantized_relu(4,1)", name="act2_m")(x)
x = Flatten()(x)
x = QDense(
    NB_CLASSES,
    kernel_quantizer=quantized_bits(4, 0, 1),
    bias_quantizer=quantized_bits(4, 0, 1),
    name="dense")(
        x)
x = Activation("softmax", name="softmax")(x)

model = Model(inputs=[x_in], outputs=[x])
model.summary()

model.compile(
    loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])

if train:
  history = model.fit(
      x_train, y_train, batch_size=BATCH_SIZE,
      epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE,
      validation_split=VALIDATION_SPLIT)

  outputs = []
  output_names = []

  for layer in model.layers:
    if layer.__class__.__name__ in [
        "QActivation", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D"
    ]:
      output_names.append(layer.name)
      outputs.append(layer.output)

  model_debug = Model(inputs=[x_in], outputs=outputs)

  outputs = model_debug.predict(x_train)

  print("{:30} {: 8.4f} {: 8.4f}".format(
      "input", np.min(x_train), np.max(x_train)))

  for n, p in zip(output_names, outputs):
    print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="")
    layer = model.get_layer(n)
    for i, weights in enumerate(layer.get_weights()):
      weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))
      print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)),
            end="")
      print("")

  score = model.evaluate(x_test, y_test, verbose=VERBOSE)
  print("Test score:", score[0])
  print("Test accuracy:", score[1])

model.summary()

print_qstats(model)


================================================
FILE: examples/example_mnist_prune.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Example of mnist model with pruning.
   Adapted from TF model optimization example."""

import tempfile
import numpy as np

import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import save_model
from tensorflow.keras.utils import to_categorical

from qkeras import QActivation
from qkeras import QDense
from qkeras import QConv2D
from qkeras import quantized_bits
from qkeras.utils import load_qmodel
from qkeras.utils import print_model_sparsity

from tensorflow_model_optimization.python.core.sparsity.keras import prune
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule


batch_size = 128
num_classes = 10
epochs = 12

prune_whole_model = True # Prune whole model or just specified layers


def build_model(input_shape):
    x = x_in = Input(shape=input_shape, name="input")
    x = QConv2D(
        32, (2, 2), strides=(2,2),
        kernel_quantizer=quantized_bits(4,0,1),
        bias_quantizer=quantized_bits(4,0,1),
        name="conv2d_0_m")(x)
    x = QActivation("quantized_relu(4,0)", name="act0_m")(x)
    x = QConv2D(
        64, (3, 3), strides=(2,2),
        kernel_quantizer=quantized_bits(4,0,1),
        bias_quantizer=quantized_bits(4,0,1),
        name="conv2d_1_m")(x)
    x = QActivation("quantized_relu(4,0)", name="act1_m")(x)
    x = QConv2D(
        64, (2, 2), strides=(2,2),
        kernel_quantizer=quantized_bits(4,0,1),
        bias_quantizer=quantized_bits(4,0,1),
        name="conv2d_2_m")(x)
    x = QActivation("quantized_relu(4,0)", name="act2_m")(x)
    x = Flatten()(x)
    x = QDense(num_classes, kernel_quantizer=quantized_bits(4,0,1),
               bias_quantizer=quantized_bits(4,0,1),
               name="dense")(x)
    x = Activation("softmax", name="softmax")(x)

    model = Model(inputs=[x_in], outputs=[x])
    return model


def build_layerwise_model(input_shape, **pruning_params):
    return Sequential([
        prune.prune_low_magnitude(
            QConv2D(
                32, (2, 2), strides=(2,2),
                kernel_quantizer=quantized_bits(4,0,1),
                bias_quantizer=quantized_bits(4,0,1),
                name="conv2d_0_m"),
            input_shape=input_shape,
            **pruning_params),
        QActivation("quantized_relu(4,0)", name="act0_m"),
        prune.prune_low_magnitude(
            QConv2D(
                64, (3, 3), strides=(2,2),
                kernel_quantizer=quantized_bits(4,0,1),
                bias_quantizer=quantized_bits(4,0,1),
                name="conv2d_1_m"),
            **pruning_params),
        QActivation("quantized_relu(4,0)", name="act1_m"),
        prune.prune_low_magnitude(
            QConv2D(
                64, (2, 2), strides=(2,2),
                kernel_quantizer=quantized_bits(4,0,1),
                bias_quantizer=quantized_bits(4,0,1),
                name="conv2d_2_m"),
            **pruning_params),
        QActivation("quantized_relu(4,0)", name="act2_m"),
        Flatten(),
        prune.prune_low_magnitude(
            QDense(
                num_classes, kernel_quantizer=quantized_bits(4,0,1),
                bias_quantizer=quantized_bits(4,0,1),
                name="dense"),
            **pruning_params),
        Activation("softmax", name="softmax")
  ])


def train_and_save(model, x_train, y_train, x_test, y_test):
    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["accuracy"])

    # Print the model summary.
    model.summary()

    # Add a pruning step callback to peg the pruning step to the optimizer's
    # step. Also add a callback to add pruning summaries to tensorboard
    callbacks = [
        pruning_callbacks.UpdatePruningStep(),
        #pruning_callbacks.PruningSummaries(log_dir=tempfile.mkdtemp())
        pruning_callbacks.PruningSummaries(log_dir="/tmp/mnist_prune")
    ]

    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=1,
        callbacks=callbacks,
        validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)
    print("Test loss:", score[0])
    print("Test accuracy:", score[1])

    print_model_sparsity(model)

    # Export and import the model. Check that accuracy persists.
    _, keras_file = tempfile.mkstemp(".h5")
    print("Saving model to: ", keras_file)
    save_model(model, keras_file)
    
    print("Reloading model")
    with prune.prune_scope():
        loaded_model = load_qmodel(keras_file)
    score = loaded_model.evaluate(x_test, y_test, verbose=0)
    print("Test loss:", score[0])
    print("Test accuracy:", score[1])


def main():
    # input image dimensions
    img_rows, img_cols = 28, 28

    # the data, shuffled and split between train and test sets
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    if K.image_data_format() == "channels_first":
      x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
      x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
      input_shape = (1, img_rows, img_cols)
    else:
      x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
      x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
      input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype("float32")
    x_test = x_test.astype("float32")
    x_train /= 255
    x_test /= 255
    print("x_train shape:", x_train.shape)
    print(x_train.shape[0], "train samples")
    print(x_test.shape[0], "test samples")

    # convert class vectors to binary class matrices
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    pruning_params = {
        "pruning_schedule":
            pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)
    }
    
    if prune_whole_model:
        model = build_model(input_shape)
        model = prune.prune_low_magnitude(model, **pruning_params)
    else:
        model = build_layerwise_model(input_shape, **pruning_params)

    train_and_save(model, x_train, y_train, x_test, y_test)


if __name__ == "__main__":
    main()

================================================
FILE: examples/example_qdense.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests qdense model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse

from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import numpy as np

from qkeras import print_qstats
from qkeras import QActivation
from qkeras import QDense
from qkeras import quantized_bits
from qkeras import ternary


np.random.seed(42)
OPTIMIZER = Adam()
NB_EPOCH = 1
BATCH_SIZE = 32
VERBOSE = 1
NB_CLASSES = 10
N_HIDDEN = 100
VALIDATION_SPLIT = 0.1
RESHAPED = 784


def QDenseModel(weights_f, load_weights=False):
  """Construct QDenseModel."""

  x = x_in = Input((RESHAPED,), name="input")
  x = QActivation("quantized_relu(4)", name="act_i")(x)
  x = QDense(N_HIDDEN, kernel_quantizer=ternary(),
             bias_quantizer=quantized_bits(4, 0, 1), name="dense0")(x)
  x = QActivation("quantized_relu(2)", name="act0")(x)
  x = QDense(
      NB_CLASSES,
      kernel_quantizer=quantized_bits(4, 0, 1),
      bias_quantizer=quantized_bits(4, 0, 1),
      name="dense2")(
          x)
  x = Activation("softmax", name="softmax")(x)

  model = Model(inputs=[x_in], outputs=[x])
  model.summary()
  model.compile(loss="categorical_crossentropy",
                optimizer=OPTIMIZER, metrics=["accuracy"])

  if load_weights and weights_f:
    model.load_weights(weights_f)

  print_qstats(model)
  return model


def UseNetwork(weights_f, load_weights=False):
  """Use DenseModel.

  Args:
    weights_f: weight file location.
    load_weights: load weights when it is True.
  """
  model = QDenseModel(weights_f, load_weights)

  batch_size = BATCH_SIZE
  (x_train_, y_train_), (x_test_, y_test_) = mnist.load_data()

  x_train_ = x_train_.reshape(60000, RESHAPED)
  x_test_ = x_test_.reshape(10000, RESHAPED)
  x_train_ = x_train_.astype("float32")
  x_test_ = x_test_.astype("float32")

  x_train_ /= 255
  x_test_ /= 255

  print(x_train_.shape[0], "train samples")
  print(x_test_.shape[0], "test samples")

  y_train_ = to_categorical(y_train_, NB_CLASSES)
  y_test_ = to_categorical(y_test_, NB_CLASSES)

  if not load_weights:
    model.fit(
        x_train_,
        y_train_,
        batch_size=batch_size,
        epochs=NB_EPOCH,
        verbose=VERBOSE,
        validation_split=VALIDATION_SPLIT)

    if weights_f:
      model.save_weights(weights_f)

  score = model.evaluate(x_test_, y_test_, verbose=VERBOSE)
  print_qstats(model)
  print("Test score:", score[0])
  print("Test accuracy:", score[1])


def ParserArgs():
  parser = argparse.ArgumentParser()
  parser.add_argument("-l", "--load_weight", default="0",
                      help="""load weights directly from file.
                            0 is to disable and train the network.""")
  parser.add_argument("-w", "--weight_file", default=None)
  a = parser.parse_args()
  return a


if __name__ == "__main__":
  args = ParserArgs()
  lw = False if args.load_weight == "0" else True
  UseNetwork(args.weight_file, load_weights=lw)


================================================
FILE: examples/example_qoctave.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""QOctave example."""
import numpy as np
import sys
from tensorflow.keras import activations
from tensorflow.keras import initializers
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from functools import partial
from qkeras import *   # pylint: disable=wildcard-import


def create_model():
  """use qocatve in network."""
  kernel_initializer=initializers.he_normal(seed=42)

  x = x_in = Input(shape=(256, 256, 3))

  # Block 1
  high, low = QOctaveConv2D(
      32, (3, 3),
      alpha=0.5,
      strides=(2, 2),
      padding='valid',
      kernel_initializer=kernel_initializer,
      bias_initializer="zeros",
      bias_quantizer="quantized_bits(4,1)",
      depthwise_quantizer="quantized_bits(4,1)",
      depthwise_activation="quantized_bits(6,2,1)",
      pointwise_quantizer="quantized_bits(4,1)",
      acc_quantizer="quantized_bits(16,7,1)",
      activation="quantized_relu(6,2)",
      use_separable=True,
      name='block1_conv1')([x, None])

  # Block 2
  high, low = QOctaveConv2D(
      64, (3, 3),
      alpha=0.4,
      strides=(2, 2),
      padding='same',
      kernel_initializer=kernel_initializer,
      bias_initializer="zeros",
      bias_quantizer="quantized_bits(4,1)",
      depthwise_quantizer="quantized_bits(4,1)",
      depthwise_activation="quantized_bits(6,2,1)",
      pointwise_quantizer="quantized_bits(4,1)",
      acc_quantizer="quantized_bits(16,7,1)",
      activation="quantized_relu(6,2)",
      use_separable=True,
      name='block2_conv1')([high, low])

  # Block 3
  high, low = QOctaveConv2D(
      64, (3, 3),
      alpha=0.4,
      strides=(2, 2),
      padding='same',
      kernel_initializer=kernel_initializer,
      bias_initializer="zeros",
      bias_quantizer="quantized_bits(4,1)",
      depthwise_quantizer="quantized_bits(4,1)",
      depthwise_activation="quantized_bits(6,2,1)",
      pointwise_quantizer="quantized_bits(4,1)",
      acc_quantizer="quantized_bits(16,7,1)",
      activation="quantized_relu(6,2)",
      use_separable=True,
      name='block3_conv1')([high, low])

  high, low = QOctaveConv2D(
      32, (3, 3),
      alpha=0.4,
      strides=(1, 1),
      padding='same',
      kernel_initializer=kernel_initializer,
      bias_initializer='zeros',
      bias_quantizer="quantized_bits(4,1)",
      depthwise_quantizer="quantized_bits(4,1)",
      depthwise_activation="quantized_bits(6,2,1)",
      pointwise_quantizer="quantized_bits(4,1)",
      acc_quantizer="quantized_bits(16,7,1)",
      activation="quantized_relu(6,2)",
      use_separable=True,
      name='block3_conv2')([high, low])

  high, low = QOctaveConv2D(
      32, (3, 3),
      alpha=0.3,
      strides=(1, 1),
      padding='same',
      kernel_initializer=kernel_initializer,
      bias_initializer='zeros',
      bias_quantizer="quantized_bits(4,1)",
      depthwise_quantizer="quantized_bits(4,1)",
      depthwise_activation="quantized_bits(6,2,1)",
      pointwise_quantizer="quantized_bits(4,1)",
      acc_quantizer="quantized_bits(16,7,1)",
      activation="quantized_relu(6,2)",
      use_separable=True,
      name='block3_conv3')([high, low])

  x, _ = QOctaveConv2D(
      32, (3, 3),
      alpha=0.0,
      strides=(2, 2),
      padding='same',
      kernel_initializer=kernel_initializer,
      bias_initializer='zeros',
      bias_quantizer="quantized_bits(4,1)",
      depthwise_quantizer="quantized_bits(4,1)",
      depthwise_activation="quantized_bits(6,2,1)",
      pointwise_quantizer="quantized_bits(4,1)",
      acc_quantizer="quantized_bits(16,7,1)",
      activation="quantized_relu(6,2)",
      use_separable=True,
      name='block3_conv_down')([high, low])

  # Upsample
  x = UpSampling2D(size=(2, 2), data_format="channels_last")(x)

  x = QConv2D(
      2, (2, 2),
      strides=(1, 1),
      kernel_initializer=kernel_initializer,
      bias_initializer="ones",
      kernel_quantizer=quantized_bits(4, 0, 1),
      bias_quantizer=quantized_bits(4, 0, 1),
      padding="same",
      name="conv_up")(
          x)

  x = Activation("softmax", name="softmax")(x)
  output = x

  model = Model(x_in, output, name='qoctave_network')
  return model


# Create the model
def customLoss(y_true,y_pred):
  log1 = 1.5 * y_true * K.log(y_pred + 1e-9) * K.pow(1-y_pred, 2)
  log0 = 0.5 * (1 - y_true) * K.log((1 - y_pred) + 1e-9) * K.pow(y_pred, 2)
  return (- K.sum(K.mean(log0 + log1, axis = 0)))

if __name__ == '__main__':
  model = create_model()
  model.compile(optimizer="Adam", loss=customLoss, metrics=['acc'])
  model.summary(line_length=100)
  print_qstats(model)


================================================
FILE: examples/example_ternary.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import  # Not necessary in a Python 3-only module
from __future__ import division  # Not necessary in a Python 3-only module
from __future__ import print_function  # Not necessary in a Python 3-only module

from absl import app
from absl import flags
import matplotlib
import numpy as np

matplotlib.use('TkAgg')
import matplotlib.pyplot as plt


FLAGS = flags.FLAGS


def _stochastic_rounding(x, precision, resolution, delta):
  """Stochastic_rounding for ternary.

  Args:
    x:
    precision: A float. The area we want to make this stochastic rounding.
       [delta-precision, delta] [delta, delta+precision]
    resolution: control the quantization resolution.
    delta: the undiscountinued point (positive number)

  Return:
    A tensor with stochastic rounding numbers.
  """
  delta_left = delta - precision
  delta_right = delta + precision
  scale = 1 / resolution
  scale_delta_left = delta_left * scale
  scale_delta_right = delta_right * scale
  scale_2_delta = scale_delta_right - scale_delta_left
  scale_x = x * scale
  fraction = scale_x - scale_delta_left
  # print(precision, scale, x[0], np.floor(scale_x[0]), scale_x[0], fraction[0])

  # we use uniform distribution
  random_selector = np.random.uniform(0, 1, size=x.shape) * scale_2_delta

  # print(precision, scale, x[0], delta_left[0], delta_right[0])
  # print('x', scale_x[0], fraction[0], random_selector[0], scale_2_delta[0])
  # rounddown = fraction < random_selector
  result = np.where(fraction < random_selector,
                    scale_delta_left / scale,
                    scale_delta_right / scale)
  return result


def _ternary(x, sto=False):
  m = np.amax(np.abs(x), keepdims=True)
  scale = 2 * m / 3.0
  thres = scale / 2.0
  ratio = 0.1

  if sto:
    sign_bit = np.sign(x)
    x = np.abs(x)
    prec = x / scale
    x = (
        sign_bit * scale * _stochastic_rounding(
            x / scale,
            precision=0.3, resolution=0.01, # those two are all normalized.
            delta=thres / scale))
    # prec + prec *ratio)
    # mm = np.amax(np.abs(x), keepdims=True)
  return np.where(np.abs(x) < thres, np.zeros_like(x), np.sign(x))


def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')

  # x = np.arange(-3.0, 3.0, 0.01)
  # x = np.random.uniform(-0.01, 0.01, size=1000)
  x = np.random.uniform(-10.0, 10.0, size=1000)
  # x = np.random.uniform(-1, 1, size=1000)
  x = np.sort(x)
  tr = np.zeros_like(x)
  t = np.zeros_like(x)
  iter_count = 500
  for _ in range(iter_count):
    y = _ternary(x)
    yr = _ternary(x, sto=True)
    t = t + y
    tr = tr + yr

  plt.plot(x, t/iter_count)
  plt.plot(x, tr/iter_count)
  plt.ylabel('mean (%s samples)' % iter_count)
  plt.show()


if __name__ == '__main__':
  app.run(main)


================================================
FILE: experimental/lo/__init__.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exports logic optimization module."""
from .utils import *  # pylint: disable=wildcard-import
from .receptive import model_to_receptive_field
from .conv2d import optimize_conv2d_logic
from .dense import optimize_dense_logic
from .optimizer import run_rf_optimizer
from .optimizer import run_abc_optimizer
from .optimizer import mp_rf_optimizer_func
from .table import load
from .compress import Compressor
from .generate_rf_code import *
# __version__ = "0.5.0"


================================================
FILE: experimental/lo/compress.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements faster version of set on multiple strings."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


class Compressor:
  """Implements a hierarchical set class with better performance than a set."""

  def __init__(self, hash_only_input=False):
    self.n_dict = {}
    self.hash_only_input = hash_only_input

  def add_entry(self, table_in, table_out=""):
    """Adds entry (table_in, table_out) to the set."""
    line = (table_in, table_out)

    if self.hash_only_input:
      h_line = hash(table_in)
    else:
      h_line = hash(line)

    if self.n_dict.get(h_line, None):
      self.n_dict[h_line] = self.n_dict[h_line].union([line])
    else:
      self.n_dict[h_line] = set([line])

  def has_entry(self, table_in, table_out=""):
    """Checks if table_in is already stored in the set."""

    line = (table_in, table_out)

    if self.hash_only_input:
      h_line = hash(table_in)
    else:
      h_line = hash(line)

    if not self.n_dict.get(h_line, None):
      return None

    set_h_line = self.n_dict[h_line]

    for (ti, to) in set_h_line:
      if table_in == ti:
        return to

    return None

  def __call__(self):
    for key in self.n_dict:
      for line in self.n_dict[key]:
        yield line


================================================
FILE: experimental/lo/conv2d.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements convolutional (?, h, w, c) facing input layer optimization."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import multiprocessing as mp
import os
import shutil

from .compress import Compressor
import numpy as np
import six
from tensorflow.keras.models import Model
from .utils import get_padding_value

DEBUG = int(os.getenv("DEBUG", 0))

OG_IS_SYMBOLIC = 0


def parallel_index_table(
    p, ni, size, idx_height, idx_width, i_dict, o_dict,
    kernel, strides, padding, generate_pla):
  """Processes the table in parallel and use espresso to optimize it."""

  print("... indexing table from {} to {} ({} => {})".format(
      ni, ni+size, p[0].shape, p[1].shape))

  table_ins = []
  table_ous = []

  table_set = Compressor(hash_only_input=True)

  if DEBUG:
    table_set_line = {}

  for n in range(size):

    # we need to traverse the outputs to compute the input coordinates

    for ho in idx_height:
      min_hi = strides[0]*ho - 2*padding[0]
      max_hi = strides[0]*ho - 2*padding[0] + kernel[0]

      if min_hi < 0 or max_hi > p[0].shape[0]:
        continue

      for wo in idx_width:
        min_wi = strides[1]*wo - 2*padding[1]
        max_wi = strides[1]*wo - 2*padding[1] + kernel[1]

        if min_wi < 0 or max_wi > p[0].shape[1]:
          continue

        i_values = p[0][n, min_hi:max_hi, min_wi:max_wi].flatten()

        # o_values has dimension (1, 1, C_O)

        o_values = p[1][n, ho, wo]

        # if we generate a pla entry, we care about a list of
        # bits. Otherwise, we care about a list of floating point
        # values.

        table_i = "".join([i_dict[v] for v in i_values])
        table_o = "".join([o_dict[v] for v in o_values])

        if generate_pla:
          table_s = "".join([str(v) for v in table_i])
          bit_str = table_s
        else:
          table_s = ",".join([str(v) for v in table_i])
          table_i = table_s
          bit_str = "".join(i_dict[v] for v in i_values)
        is_table_zero = bit_str != "0"*len(bit_str)

        if table_set.has_entry(table_s) and not is_table_zero:

          # if table is already stored, we do not store it again.
          # from time to time, we may want to check if we have found
          # diverging output values.

          if DEBUG:

            (table_o_old, (old_n, old_ho, old_wo)) = table_set_line[table_s]

            if table_o != table_o_old:
              print(
                  "contradicting outputs n={} old_n={} out_p={} out={}".format(
                      (n, ho, wo), (old_n, old_ho, old_wo), table_o_old,
                      table_o))
              print(" I:", table_s)
              print(" I:", i_values)
              print("<<<", table_o_old)
              print(">>>", table_o)
              return (None, None)

          continue

        # these are unique table entries

        table_ins.append(table_i)
        table_ous.append(table_o)

        # we store this information in order to be able to debug
        # and discard information.

        table_set.add_entry(table_s)

        if DEBUG:
          table_set_line[table_s] = (table_o, (n, ho, wo))

  print("... indexing table from {} to {} completed".format(ni, ni+size))

  return (table_ins, table_ous)


def parallel_compress_output_table(
    filename, header, table_ins, table_ous, output_group, generate_pla,
    n_bits_og, o, o_bits):
  """Processes in parallel compression of table and writes it to a disk."""

  f = open(filename, "w")

  f.write("".join(header))

  c = Compressor()

  for n in range(len(table_ins)):
    for og in range(output_group):

      if output_group > 1:
        if generate_pla:
          if OG_IS_SYMBOLIC:
            og_l = ["0"] * n_bits_og
            og_l[n_bits_og - 1 - og] = "1"
            og_b = "".join(og_l)
            table_i_suffix = " " + og_b
          else:
            og_b = bin(og)[2:]
            table_i_suffix = " " + "0" * (n_bits_og - len(og_b)) + og_b
        else:
          table_i_suffix = "," + str(og)
      else:
        table_i_suffix = ""
      table_i = table_ins[n] + table_i_suffix
      table_o = table_ous[n][(o+og)*o_bits:(o+og+1)*o_bits]

      if generate_pla:
        c.add_entry(table_i + " " + table_o)
      else:
        c.add_entry(table_i + "," + str(table_o[0]))

  for line in c():
    f.write("{}\n".format(line[0]))

  if generate_pla:
    f.write(".e\n")

  f.close()

  print("... file {} generated".format(filename))


def optimize_conv2d_logic(
    model, i_name, o_name, x_train,
    i_dict=None, o_dict=None,
    kernel=None, strides=None, padding=None,
    output_group=1, samples=2000,
    randomize=None, generate_pla=True, prefix=""):
  """Generates table for logic synthesis for conv2d or conv2d-like shape.

  Generates table in either espresso format or csv format to be optimized
  for logic synthesis. The parameters kernel, strides and padding usually
  do not require any values, unless we want to embed maxpooling layer or
  multiple convolutional layers between i_name and o_name. In that case,
  we require the user to compute the proper kernel, strides, and padding
  that will correspond to the combined layer, as Keras and tensorflow do not
  provide a way to compute the receptive field between two layers.

  Arguments:
    model: Keras model
    i_name: name of convolutional layer (input to this layer must be
      quantized).
    o_name: name of quantized output layer.
    x_train: training set to be used to dump table.
    i_dict: dictionary of floating point values to encoding for inputs.
    o_dict: dictionary of floating point values to encoding for outputs.
    kernel: kernel size, to be specified if we want to override convolution
      kernel.
    strides: strides, to be specified if we want to override first convolution
      strides.
    padding: padding, to be specified if we want to override first convolution
      padding.
    output_group: by default, we compute one PE per channel output. The user
      can override that by specifying how many output channels should be
      bundled into the same PE.
    samples: how many images from x_train should be sampled when generating the
      tables.
    randomize: if specified, it should be the number of coordinates within the
      same image we will use to derive the convolution table.
    generate_pla: if true, we generate table in pla format. Otherwise, we
      generate a csv file.
    prefix: prefix name to create directory.

  Returns:
    list of files generated.
  """

  # if no i_dict or no o_dict, we do not know how to encode, so we generate
  # csv file.

  if not i_dict or not o_dict:
    generate_pla = False

  # extract layer from i_name and o_name

  i_layer = model.get_layer(i_name)
  o_layer = model.get_layer(o_name)

  # if kernel is not specified, use the kernel size from i_layer

  if not kernel:
    kernel = i_layer.kernel_size

  # if strides is not specified, use the strides from i_layer

  if not strides:
    strides = i_layer.strides

  # if padding is not specified, use the padding from i_layer

  if not padding:
    padding = i_layer.padding

  # for conv2d, we want a list for kernel, strides and padding

  if not isinstance(kernel, list) and not isinstance(kernel, tuple):
    kernel = [kernel, kernel]

  if not isinstance(strides, list) and not isinstance(strides, tuple):
    strides = [strides, strides]

  if not isinstance(padding, list) and not isinstance(padding, tuple):
    padding = [padding, padding]

  # compute the padding value

  padding[0] = get_padding_value(padding[0], kernel[0])
  padding[1] = get_padding_value(padding[1], kernel[1])

  # resample inputs

  skip = min(2000, samples)

  indexes = np.array(range(x_train.shape[0]))
  np.random.shuffle(indexes)
  x_train = x_train[indexes[:samples]]

  # we want to create a smaller model that from inputs generate
  # i_layer.output + o_layer.output tensors, so that we can predict
  # its values.

  outputs = []

  x = i_layer.input
  y = o_layer.output

  if not isinstance(x, list):
    x = [x]

  outputs = x + [y]

  mo = Model(inputs=model.inputs, outputs=outputs)
  p = mo.predict(x_train)

  # in csv mode, each entry has "1" value, for PLA,
  # we encode the floating point into multiple bits.

  if not generate_pla:
    i_bits = 1
    # i_dict = {v:v for v in i_dict.keys()}
  else:
    i_bits = len(six.next(six.itervalues(i_dict)))

  if not generate_pla:
    o_bits = 1
    # o_dict = {v:v for v in o_dict.keys()}
  else:
    o_bits = len(six.next(six.itervalues(o_dict)))

  # if randomize is specified, we will sample sqrt(randomize)
  # from each image, as the conv2d performs the filter everywhere
  # in the image. Because the same image may contain a lot of
  # reduntant information, we may want to restrict the number of
  # samples.

  if randomize:
    idx_height = np.random.choice(
        p[-1].shape[1],
        int(np.round(np.sqrt(randomize))))

    idx_width = np.random.choice(
        p[-1].shape[2],
        int(np.round(np.sqrt(randomize))))
  else:
    idx_height = range(p[-1].shape[1])
    idx_width = range(p[-1].shape[2])

  # this is just to inspect that the inputs and outputs are really quantized.

  print("inputs:")
  for i in range(len(x)):
    print(i, np.min(p[i]), np.max(p[i]))
  print("outputs:")
  print(np.min(p[-1]), np.max(p[-1]))

  # i_size and o_size are the channel sizes of the inputs and outputs

  o_size = y.shape[-1]
  i_size = p[0].shape[-1]

  if generate_pla:
    suffix = "pla"
  else:
    suffix = "csv"

  prefix = prefix + "/" if prefix else ""

  # lets try to remove the directory and create a new one

  try:
    shutil.rmtree(prefix + i_layer.name + "." + suffix)
  except OSError:
    pass

  try:
    os.makedirs(prefix + i_layer.name + "." + suffix)
  except OSError:
    pass

  table_ins = list()
  table_ous = list()

  print("...indexing inputs")

  # for each image in sampled x_train

  # on Intel processors, mp.cpu_count() returns number of threads

  number_of_processes = mp.cpu_count() // 2
  pool = mp.Pool(number_of_processes)

  results = []

  for n in range(0, x_train.shape[0], skip):

    res = pool.apply_async(
        parallel_index_table,
        args=((p[0][n:n+skip], p[1][n:n+skip]), n, skip, idx_height,
              idx_width, i_dict, o_dict, kernel, strides, padding,
              generate_pla))
    results.append(res)

  pool.close()
  pool.join()

  all_pools = [res.get(timeout=1) for res in results]

  table_ins = sum([ap[0] for ap in all_pools], [])
  table_ous = sum([ap[1] for ap in all_pools], [])

  # input and output size

  ni = len(table_ins[0])
  no = len(table_ous[0])

  print("... generating tables {} outputs, {} entries".format(
      o_size, len(table_ins)))

  # this step should be very fast

  files = []

  if OG_IS_SYMBOLIC:
    if output_group > 1:
      n_bits_og = output_group
    else:
      n_bits_og = 1
  else:
    if output_group == 2:
      n_bits_og = 1
    else:
      n_bits_og = int(np.ceil(np.log2(output_group)))

  # sometimes linux get very grumpy with too many files opened.
  # let's limit to 20.

  number_of_processes = min(20, mp.cpu_count() // 2)
  pool = mp.Pool(number_of_processes)

  for o in range(0, o_size, output_group):

    filename = "{}{}.{}/{}_{}.raw.{}".format(
        prefix, i_name, suffix, i_name, o, suffix)

    files.append(filename)

    header = []

    if generate_pla:
      header.append(".i {}\n".format(ni + n_bits_og))
      header.append(".o {}\n".format(no // o_size))
      header.append(".type fr\n")

      if OG_IS_SYMBOLIC and output_group > 1:
        header.append(".mv {} {} {} {}\n".format(
            3, ni, n_bits_og, no // o_size))

      # let's generate some labels

      header.append(".ob " + " ".join([
          "o_" + str(o) + "_" + str(o_bits - 1 - v)
          for v in range(o_bits)]) + "\n")

      i_names = []

      # name is i_<channel>_<kernel_row>_<kernel_col>_bit

      assert ni == (i_size * kernel[0] * kernel[1] * i_bits)

      for channel in range(i_size):
        for row in range(kernel[0]):
          for col in range(kernel[1]):
            for bit in range(i_bits):
              i_names.append("i_{}_{}_{}_{}".format(
                  channel, row, col, (i_bits - 1 - bit)))

      # if we are grouping multiple channels, these will be the inputs

      for c in range(n_bits_og):
        i_names.append("og_{}".format(n_bits_og - 1 - c))

      header.append(".ilb " + " ".join(i_names) + "\n")

    pool.apply_async(
        parallel_compress_output_table,
        args=((filename, header, table_ins, table_ous, output_group,
               generate_pla, n_bits_og, o, o_bits)))

  pool.close()
  pool.join()

  return files


================================================
FILE: experimental/lo/dense.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Implements dense (?, features) fancing input layer optimization."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import multiprocessing as mp
import os
import shutil

from .compress import Compressor
import numpy as np
import six
from tensorflow.keras.models import Model

DEBUG = int(os.getenv("DEBUG", 0))

OG_IS_SYMBOLIC = 0


def parallel_index_table(
    p, ni, size, i_dict, o_dict, generate_pla):
  """Processes the table in parallel and use espresso to optimize it."""

  print("... indexing table from {} to {} ({} => {})".format(
      ni, ni+size, p[0].shape, p[1].shape))

  table_ins = []
  table_ous = []

  table_set = Compressor(hash_only_input=True)

  if DEBUG:
    table_set_line = {}

  for n in range(size):

    i_values = p[0][n].flatten()
    o_values = p[1][n].flatten()

    # if we generate a pla entry, we care about a list of
    # bits. Otherwise, we care about a list of floating point
    # values.

    table_i = "".join([i_dict[v] for v in i_values])
    table_o = "".join([o_dict[v] for v in o_values])

    if generate_pla:
      table_s = "".join([str(v) for v in table_i])
      bit_str = table_s
    else:
      table_s = ",".join([str(v) for v in table_i])
      table_i = table_s
      bit_str = "".join(str(i_dict[v]) for v in i_values)
    is_table_zero = bit_str != "0"*len(bit_str)

    if table_set.has_entry(table_s) and not is_table_zero:

      # if table is already stored, we do not store it again.
      # from time to time, we may want to check if we have found
      # diverging output values.

      if DEBUG:

        (table_o_old, old_n) = table_set_line[table_s]

        if table_o != table_o_old:
          print("contradicting outputs n={} old_n={} out_p={} out={}".format(
              n, old_n, table_o_old, table_o))
          print(" I:", table_s)
          print(" I:", i_values)
          print("<<<", table_o_old)
          print(">>>", table_o)
          return (None, None)

      continue

    # these are unique table entries

    table_ins.append(table_i)
    table_ous.append(table_o)

    # we store this information in order to be able to debug
    # and discard information.

    table_set.add_entry(table_s)

    if DEBUG:
      table_set_line[table_s] = (table_o, n)

  print("... indexing table from {} to {} completed".format(ni, ni+size))

  return (table_ins, table_ous)


def parallel_compress_output_table(
    filename, header, table_ins, table_ous, output_group, generate_pla,
    n_bits_og, o, o_bits):
  """Processes in parallel compression of table and writes it to a disk."""

  f = open(filename, "w")

  f.write("".join(header))

  c = Compressor()

  for n in range(len(table_ins)):
    for og in range(output_group):

      if output_group > 1:
        if generate_pla:
          if OG_IS_SYMBOLIC:
            og_l = ["0"] * n_bits_og
            og_l[n_bits_og - 1 - og] = "1"
            og_b = "".join(og_l)
            table_i_suffix = " " + og_b
          else:
            og_b = bin(og)[2:]
            table_i_suffix = " " + "0"*(n_bits_og - len(og_b)) + og_b
        else:
          table_i_suffix = "," + str(og)
      else:
        table_i_suffix = ""
      table_i = table_ins[n] + table_i_suffix
      table_o = table_ous[n][(o+og)*o_bits:(o+og+1)*o_bits]

      if generate_pla:
        c.add_entry(table_i + " " + table_o)
      else:
        c.add_entry(table_i + "," + str(table_o[0]))

  for line in c():
    f.write("{}\n".format(line[0]))

  if generate_pla:
    f.write(".e\n")
  f.close()


def optimize_dense_logic(
    model, i_name, o_name, x_train, i_dict, o_dict,
    output_group=1, samples=2000,
    generate_pla=True, prefix=""):

  """Generates table for logic synthesis for dense or flattened layer.

  Generates table in either espresso format or csv format to be optimized
  for logic synthesis.

  Arguments:
    model: Keras model
    i_name: name of convolutional layer (input to this layer must be
      quantized).
    o_name: name of quantized output layer.
    x_train: training set to be used to dump table.
    i_dict: dictionary of floating point values to encoding for inputs.
    o_dict: dictionary of floating point values to encoding for outputs.
    output_group: by default, we compute one PE per channel output. The user
      can override that by specifying how many output channels should be
      bundled into the same PE.
    samples: how many images from x_train should be sampled when generating the
      tables.
    generate_pla: if true, we generate table in pla format. Otherwise, we
      generate a csv file.
    prefix: prefix name to create a directory.
  Returns:
    list of files generated.
  """

  i_layer = model.get_layer(i_name)
  o_layer = model.get_layer(o_name)

  # resample inputs

  skip = min(2000, samples)

  indexes = np.array(range(x_train.shape[0]))
  np.random.shuffle(indexes)

  x_train = x_train[indexes[:samples]]

  outputs = []

  x = i_layer.input
  y = o_layer.output

  if not isinstance(x, list):
    x = [x]

  outputs = x + [y]

  mo = Model(inputs=model.inputs, outputs=outputs)
  p = mo.predict(x_train)

  # in csv mode, each entry has "1" value, for PLA,
  # we encode the floating point into multiple bits.

  if not generate_pla:
    i_bits = 1
    # i_dict = {v:v for v in i_dict.keys()}
  else:
    i_bits = len(six.next(six.itervalues(i_dict)))

  if not generate_pla:
    o_bits = 1
    # o_dict = {v:v for v in o_dict.keys()}
  else:
    o_bits = len(six.next(six.itervalues(o_dict)))

  print("inputs:")
  for i in range(len(x)):
    print(i, np.min(p[i]), np.max(p[i]))
  print("outputs:")
  print(0, np.min(p[-1]), np.max(p[-1]))

  o_size = y.shape[-1]
  i_size = p[0].shape[-1]

  if generate_pla:
    suffix = "pla"
  else:
    suffix = "csv"

  prefix = prefix + "/" if prefix else ""

  # lets try to remove the directory and create a new one

  try:
    shutil.rmtree(prefix + i_layer.name + "." + suffix)
  except OSError:
    pass

  try:
    os.makedirs(prefix + i_layer.name + "." + suffix)
  except OSError:
    pass

  print("...indexing inputs")

  # for each image in sampled x_train

  # on Intel processors, mp.cpu_count() returns number of threads

  number_of_processes = mp.cpu_count() // 2
  pool = mp.Pool(number_of_processes)

  results = []

  for n in range(0, x_train.shape[0], skip):

    res = pool.apply_async(
        parallel_index_table,
        args=((p[0][n:n+skip], p[1][n:n+skip]), n, skip, i_dict, o_dict,
              generate_pla))
    results.append(res)

  pool.close()
  pool.join()

  all_pools = [res.get(timeout=1) for res in results]

  table_ins = sum([ap[0] for ap in all_pools], [])
  table_ous = sum([ap[1] for ap in all_pools], [])

  # input and output size

  ni = len(table_ins[0])
  no = len(table_ous[0])

  print("... generating tables {} outputs, {} entries".format(
      o_size, len(table_ins)))

  # this step should be very fast

  files = []

  if OG_IS_SYMBOLIC:
    if output_group > 1:
      n_bits_og = output_group
    else:
      n_bits_og = 1
  else:
    if output_group == 2:
      n_bits_og = 1
    else:
      n_bits_og = int(np.ceil(np.log2(output_group)))

  # sometimes linux get very grumpy with too many files opened.
  # let's limit to 20.

  number_of_processes = min(20, mp.cpu_count() // 2)
  pool = mp.Pool(number_of_processes)

  for o in range(0, o_size, output_group):

    filename = "{}{}.{}/{}_{}.raw.{}".format(
        prefix, i_name, suffix, i_name, o, suffix)

    files.append(filename)

    header = []

    if generate_pla:
      header.append(".i {}\n".format(ni + n_bits_og))
      header.append(".o {}\n".format(no // o_size))
      header.append(".type fr\n")

      if OG_IS_SYMBOLIC and output_group > 1:
        header.append(".mv {} {} {} {}\n".format(
            3, ni, n_bits_og, no // o_size))

      # let's generate some labels

      header.append(".ob " + " ".join([
          "o_" + str(o) + "_" + str(o_bits - 1 - v)
          for v in range(o_bits)]) + "\n")

      i_names = []

      # name is i_<features>_bit

      assert ni == (i_size * i_bits)

      for feature in range(i_size):
        for bit in range(i_bits):
          i_names.append("i_{}_{}".format(
              feature, (i_bits - 1 - bit)))

      # if we are grouping multiple channels, these will be the inputs

      for c in range(n_bits_og):
        i_names.append("og_{}".format(n_bits_og - 1 - c))

      header.append(".ilb " + " ".join(i_names) + "\n")

    pool.apply_async(
        parallel_compress_output_table,
        args=((filename, header, table_ins, table_ous, output_group,
               generate_pla, n_bits_og, o, o_bits)))

  pool.close()
  pool.join()

  return files


================================================
FILE: experimental/lo/generate_rf_code.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates expressions for random trees."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import numpy as np

DEBUG = int(os.environ.get("DEBUG", 0))
PRINT_DEBUG = int(os.environ.get("PRINT_DEBUG", 0))


def gen_random_tree_regressor(
    tree, code, bits, o_bits, o_decimal_digits, o_is_neg, bdd, offset, is_cc=True):
  """Generates HLS friendly C++ code for random tree regressor.

  Generates HLS friendly C++ code for Catapult.

  Arguments:
    tree: decision tree regressor from SkLearn.
    code: list of code lines to be append to.
    bits: list containing number of bits for each of the inputs.
    o_bits: number of bits for output.
    o_decimal_digits: number of decimal digits (right of the decimal point
        of o_bits for approximation of regressor in RandomTreeRegressor.
    o_is_neg: True or 1 if output can be negative.
    bdd: we actually try to cache entries (i,v,n1,n0) entries so that if
        they appear again, we reuse previously computed nodes.
    offset: each variable created in this function call is incremented by
        offset.
    is_cc: if True, generates C++, else Verilog.

  Returns:
    Tuple containing last variable name and current number of variables.

  """

  # extract information from tree

  n_nodes = tree.node_count
  children_left = tree.children_left
  children_right = tree.children_right
  feature = tree.feature
  threshold = tree.threshold
  values = np.copy(tree.value)

  o_suffix = ""
  if DEBUG:
    o_type = "float"
  elif is_cc:
    o_type = "ac_fixed<{},{},{}>".format(
        o_bits + o_decimal_digits,
        o_bits + o_is_neg,
        o_is_neg)
  else:
    o_sign = " signed" if o_is_neg else ""
    if o_bits + o_decimal_digits > 1:
      o_suffix = "[{}:0]".format(o_bits + o_decimal_digits - 1)
    o_type = "wire" + o_sign + " " + o_suffix


  def round_digits(x, decimal_digits):
    """Rounds to decimal_digits to the right of the decimal point."""

    if DEBUG:
      return x
    factor = (1 << decimal_digits) * 1.0
    x = x * factor
    return np.round(x) / factor

  is_leaves = np.zeros(shape=n_nodes, dtype=bool)

  stack = [(0, -1)]

  while stack:
    node_id, parent_depth = stack.pop()

    if children_left[node_id] != children_right[node_id]:
      stack.append((children_left[node_id], parent_depth+1))
      stack.append((children_right[node_id], parent_depth+1))
    else:
      is_leaves[node_id] = True
      values[node_id] = round_digits(tree.value[node_id], o_decimal_digits)
      if (
          values[node_id].flatten()[0] != tree.value[node_id].flatten()[0] and
          DEBUG
      ):
        print(node_id, values[node_id].flatten()[0],
              tree.value[node_id].flatten()[0])

  v_name = {}
  n_vars = offset

  bdd = {}

  def round_value_to_int(x):
    v = hex(int(np.round(x * (1 << (o_decimal_digits)))))
    if is_cc:
      if DEBUG:
        return str(x)
      else:
        return x
      #v + " /* {} */".format(x)
    else:
      return (
          str(o_bits + o_decimal_digits) + "'h" + v[2:] + " /* {} */".format(x)
      )

  if is_leaves[0]:
    v_name[0] = round_value_to_int(values[0].flatten()[0])
    code.append("  {} n_{} = {};".format(o_type, n_vars, v_name[0]))
    last_var = "n_{}".format(n_vars)
    n_vars += 1
  else:
    for i in range(n_nodes-1, -1, -1):
      if is_leaves[i]:
        continue

      if v_name.get(children_left[i], None) is not None:
        n1 = v_name[children_left[i]]
      elif is_leaves[children_left[i]]:
        n1 = round_value_to_int(values[children_left[i]].flatten()[0])
        v_name[children_left[i]] = n1
      else:
        n1 = "n_" + str(n_vars)
        n_vars += 1
        v_name[children_left[i]] = n1
        raise ValueError((children_left[i], n1, is_leaves[children_left[i]]))

      if v_name.get(children_right[i], None) is not None:
        n0 = v_name[children_right[i]]
      elif is_leaves[children_right[i]]:
        n0 = round_value_to_int(values[children_right[i]].flatten()[0])
        v_name[children_right[i]] = n0
      else:
        n0 = "n_" + str(n_vars)
        n_vars += 1
        v_name[children_right[i]] = n0
        raise ValueError((children_right[i], n0, is_leaves[children_right[i]]))

      if v_name.get(i, None) is not None:
        n = v_name[i]
        last_var = v_name[i]
      elif bdd.get((feature[i], threshold[i], n1, n0), None) is not None:
        n = bdd[(feature[i], threshold[i], n1, n0)]
        v_name[i] = n
        last_var = n
      elif n1 == n0:
        # store intermediate results so that we can build a dag, not a tree
        bdd[(feature[i], threshold[i], n1, n0)] = n1
        v_name[i] = n1
        last_var = n1
      else:
        n = "n_" + str(n_vars)
        n_vars += 1
        v_name[i] = n
        # store intermediate results so that we can build a dag, not a tree
        bdd[(feature[i], threshold[i], n1, n0)] = n
        t = int(threshold[i])
        if bits[feature[i]] == 1:
          if t == 0:
            n1, n0 = n0, n1
          code.append(
              "  {} {} = (i_{}) ? {} : {}; // x_{} {}".format(
                  o_type, v_name[i], feature[i], n1, n0, i,
                  threshold[i]))
        else:
          code.append(
              "  {} {} = (i_{} <= {}) ? {} : {}; // x_{} {}".format(
                  o_type, v_name[i], feature[i], t, n1, n0, i,
                  threshold[i]))
        last_var = v_name[i]

  return (last_var, n_vars)


def entry_to_hex(entry, max_value, size, is_cc):
  """Converts class instance to hexa number."""

  e_vector = [np.power(max_value+1, i) for i in range(len(entry)-1, -1, -1)]
  entry = np.array(entry)
  v = hex(np.sum(entry * e_vector))

  if is_cc:
    return v
  else:
    return str(size) + "'h" + v[2:] + " /* {} */".format(entry)


def gen_random_tree_classifier(
    tree, code, bits, bdd, max_value, values_rom, offset, is_cc=True):
  """Generates C++ or Verilog friendly code for random tree classifier.

  Generates HLS Catapult friendly code or RTL in Verilog for random tree
  classifier from SkLearn.

  Arguments:
    tree: RandomTreeClassifier from sklearn.
    code: list of strings containing code generated.
    bits: list containing number of bits for each of the inputs.
    bdd: we actually try to cache entries (i,v,n1,n0) entries so that if
        they appear again, we reuse previously computed nodes.
    max_value: random tree classifiers returns vector of classes with the
        number of instances found in the terminal leaf node. This variable
        specifies a clipping factor for each class type so that we have
        a bounded problem to synthesize.
    values_rom: to save space in classifier, we store class values in
        values_rom.
    offset: each variable created in this function call is incremented by
        offset.
    is_cc: if True, generates C++ code; otherwise, Verilog.

  Returns:
    Tuple containing last variable name and current number of variables.
  """

  # extract information from tree

  n_nodes = tree.node_count
  children_left = tree.children_left
  children_right = tree.children_right
  feature = tree.feature
  threshold = tree.threshold

  values = {}

  is_leaves = np.zeros(shape=n_nodes, dtype=bool)

  stack = [(0, -1)]

  rom_l = []

  use_rom = max_value >= 7

  n_classes = len(tree.value[0].flatten())

  max_bits = int(np.ceil(np.log2(max_value + 1)))

  while stack:
    node_id, parent_depth = stack.pop()

    if children_left[node_id] != children_right[node_id]:
      stack.append((children_left[node_id], parent_depth+1))
      stack.append((children_right[node_id], parent_depth+1))
    else:
      # is leaf node
      is_leaves[node_id] = True
      # get tree node output
      p_input_tuple = tree.value[node_id].flatten().astype(np.int32)
      max_input_value = np.max(p_input_tuple)
      min_input_value = np.min(p_input_tuple)
      # if max_value == 1, only keep top ones
      if max_value == 1:
        input_tuple = (p_input_tuple == max_input_value).astype(np.int32)
        tree.value[node_id] = (tree.value[node_id] == max_input_value).astype(
            tree.value[node_id].dtype)
      else: # if max_value <= 3:
        # SKLearn classifier computes probability for each entry instead of
        # suming them all. We should do the same.
        max_input_value = np.sum(p_input_tuple)
        min_input_value = 0
        # Just update tree.value to number so that we can compare accuracy of
        # quantization later.
        tree.value[node_id] = np.round(
            max_value *
            (tree.value[node_id] - min_input_value) /
            (max_input_value - min_input_value))
        input_tuple = tree.value[node_id].flatten()
      input_tuple = tuple(list(input_tuple.astype(np.int32)))

      # stores values in rom - we will use rom to store values if use_rom is
      # true.
      if values_rom.get(input_tuple, None) is None:
        values_rom[input_tuple] = len(values_rom)
        rom_l.append(input_tuple)
        if DEBUG:
          print(values_rom[input_tuple], input_tuple)

      if use_rom:
        values[node_id] = values_rom[input_tuple]
      else:
        values[node_id] = entry_to_hex(
            input_tuple, max_value, max_bits * n_classes, is_cc)

  # t_bits: entry type
  # l_bits: table line type
  if use_rom:
    t_bits = int(np.ceil(np.log2(len(values_rom))))
    l_bits = max_bits * n_classes
  else:
    t_bits = max_bits * n_classes

  # we only store the index here, as we read from a rom
  if is_cc:
    if DEBUG:
      t_type = "int"
    else:
      t_type = "ac_int<{},false>".format(t_bits)
  else:
    t_type = "wire [{}:0]".format(t_bits-1)

  v_name = {}
  n_vars = offset

  bdd = {}

  if is_leaves[0]:
    v_name[0] = t_type + "(" + str(values[0]) + ")"
    code.append("  {} n_{} = {};".format(
        t_type, n_vars, values[0]))
    last_var = "n_{}".format(n_vars)
    n_vars += 1
  else:
    for i in range(n_nodes-1, -1, -1):
      if is_leaves[i]:
        continue

      if v_name.get(children_left[i], None) is not None:
        n1 = v_name[children_left[i]]
      elif is_leaves[children_left[i]]:
        if is_cc:
          n1 = t_type + "(" + str(values[children_left[i]]) + ")"
        else:
          n1 = str(values[children_left[i]])
        v_name[children_left[i]] = n1
      else:
        n1 = "n_" + str(n_vars)
        n_vars += 1
        v_name[children_left[i]] = n1
        raise ValueError((children_left[i], n1, is_leaves[children_left[i]]))

      if v_name.get(children_right[i], None) is not None:
        n0 = v_name[children_right[i]]
      elif is_leaves[children_right[i]]:
        if is_cc:
          n0 = t_type + "(" + str(values[children_right[i]]) + ")"
        else:
          n0 = str(values[children_right[i]])
        v_name[children_right[i]] = n0
      else:
        n0 = "n_" + str(n_vars)
        n_vars += 1
        v_name[children_right[i]] = n0
        raise ValueError((children_right[i], n0, is_leaves[children_right[i]]))

      if v_name.get(i, None) is not None:
        n = v_name[i]
        last_var = v_name[i]
      elif bdd.get((feature[i], threshold[i], n1, n0), None) is not None:
        n = bdd[(feature[i], threshold[i], n1, n0)]
        v_name[i] = n
        last_var = n
      elif n1 == n0:
        # store intermediate results so that we can build a dag, not a tree
        bdd[(feature[i], threshold[i], n1, n0)] = n1
        v_name[i] = n1
        last_var = n1
      else:
        n = "n_" + str(n_vars)
        n_vars += 1
        v_name[i] = n
        # store intermediate results so that we can build a dag, not a tree
        bdd[(feature[i], threshold[i], n1, n0)] = n
        t = int(threshold[i])
        if bits[feature[i]] == 1:
          if t == 0:
            n1, n0 = n0, n1
          code.append(
              "  {} {} = (i_{}) ? {} : {}; // x_{} {}".format(
                  t_type, v_name[i], feature[i], n1, n0, i,
                  threshold[i]))
        else:
          code.append(
              "  {} {} = (i_{} <= {}) ? {} : {}; // x_{} {}".format(
                  t_type, v_name[i], feature[i], t, n1, n0, i,
                  threshold[i]))
        last_var = v_name[i]

  if use_rom:
    if is_cc:
      if DEBUG:
        l_type = "int"
      else:
        l_type = "ac_int<{},false>".format(l_bits)

      code.append("  {} {}_rom[{}]".format(l_type, last_var, len(values_rom)) +
                  " {")
      for i in range(len(values_rom)):
        code_s = "    " + entry_to_hex(rom_l[i], max_value, l_bits, is_cc)
        if i < len(values_rom)-1:
          code_s = code_s + ","
        code.append(code_s)
      code.append("  };")

    else:
      l_type = "wire [{}:0]".format(l_bits - 1)
      code.append("  function [{}:0] {}_rom;".format(l_bits-1, last_var))
      code.append("  input [{}:0] address;".format(t_bits-1))
      code.append("  begin")
      code.append("    case (address)")
      for i in range(len(values_rom)):
        code.append("    {}'d{}: {}_rom = {};".format(
            l_bits, i, last_var, entry_to_hex(rom_l[i], max_value, l_bits, is_cc)))
      code.append("    default: {}_rom = 0;".format(last_var))
      code.append("    endcase")
      code.append("  end")
      code.append("  endfunction")

    code.append("  {} v_{} = {}_rom[{}];".format(
        l_type, last_var, last_var, last_var))

    last_var = "v_" + last_var

  return last_var, n_vars


def gen_random_forest(
    rf, name, bits, is_neg, o_bits, o_is_neg, is_regressor=True,
    is_top_level=False, is_cc=True):
  """Generates HLS based C++ or SystemVerilog code for random forest."""

  # TODO(nunescoelho): need to take care of multiple outputs for classifier.
  # we can get better result if we do not look at the winning classifier,
  # but sum how many of them appear in each classifier for leaf nodes.

  bdd = {}
  values_rom = {}
  offset = 0
  code = []

  max_value = (1 << int(os.environ.get("MAX_BITS",1))) - 1
  decimal_digits = int(os.environ.get("MAX_BITS", 5))

  assert max_value > 0

  o_list = []
  for i in range(len(rf.estimators_)):
    tree = rf.estimators_[i].tree_
    code.append("  //----- TREE {}".format(i))
    if is_regressor:
      last_var, offset = gen_random_tree_regressor(
          tree, code, bits, o_bits, decimal_digits, o_is_neg, bdd, offset, is_cc)
    else:
      values_rom = {}
      last_var, offset = gen_random_tree_classifier(
          tree, code, bits, bdd, max_value, values_rom, offset, is_cc)

    o_list.append(last_var)

  if is_cc:
    header = [
        "#include <ac_int.h>",
        "#include <ac_fixed.h>",
        "#include <iostream>",
        "using namespace std;",
        "//#define _PRINT_DEBUG_",
        "#define PB(n) cout << #n << \":\" << n << endl;",
        "#define PS(n) \\",
        "  cout << #n << \":\" << n.to_double() << \" \"; \\",
        "  for(int i=n.width-1; i>=0; i--) cout << n[i]; cout << endl;"
    ]

    if DEBUG:
      header = header + [
          "static inline float round_even(float x) {",
          "  int x_int = truncf(x);",
          "  float x_dec = x - x_int;",
          "  if ((x_dec == 0.5) && (x_int % 2 == 0)) {",
          "    return truncf(x);",
          "  } else {",
          "    return truncf(x + 0.5);"
          "  }",
          "}"
      ]
      if is_top_level:
        header.append("#pragma hls_design top")
      header.append("void {}(int in[{}], int &out)".format(
          name, np.sum(bits), o_bits) + " {")
    else:
      n_bits = int(np.ceil(np.log2(len(o_list))))
      header = header + [
          "static inline ac_int<{},{}> round_even(ac_fixed<{},{},{}> x)".format(
              o_bits, o_is_neg,
              n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg,
              o_is_neg
          ) + " {",
          "  bool x_int_is_even = x[{}] == 0;".format(decimal_digits + n_bits),
          "  bool x_frac_is_0_5 = x[{}] && (x.slc<{}>(0) == 0);".format(
              n_bits + decimal_digits-1, n_bits + decimal_digits-1),
          "  if (x_frac_is_0_5 && x_int_is_even) {",
          "    return x.slc<{}>({});".format(o_bits, n_bits + decimal_digits),
          "  } else {",
          "    ac_int<{},{}> r = x.slc<{}>({}) + 1;".format(
              o_bits + 1, o_is_neg,
              o_bits + 1, n_bits + decimal_digits - 1),
          "    return r.slc<{}>(1);".format(o_bits + 1),
          #"    return (x + ac_fixed<{},{},{}>({})).slc<{}>({});".format(
          #    n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg,
          #    o_is_neg, 1<<(n_bits+decimal_digits-1),
          #    o_bits, n_bits + decimal_digits),
          #    #o_is_neg, len(o_list)/2, o_bits, n_bits + decimal_digits),
          "  }",
          "}"
      ]
      if is_top_level:
        header.append("#pragma hls_design top")
      header.append("void {}(ac_int<{},0> in, ac_int<{},{}> &out)".format(
          name, np.sum(bits), o_bits, o_is_neg) + " {")
  else:
    n_bits = int(np.ceil(np.log2(len(o_list))))
    i_decl = "  input [{}:0] in;".format(np.sum(bits)-1)
    o_sign = "signed " if o_is_neg else ""
    o_decl = "  output " + o_sign + "[{}:0] out;".format(o_bits-1)
    header = [
        "module " + name + "(in, out);",
        i_decl,
        o_decl,
        "",
        "  function {}[{}:0] round_even;".format(o_sign, o_bits),
        "  input {}[{}:0] x;".format(o_sign, n_bits + o_bits + decimal_digits - 1),
        "  reg x_int_is_even;",
        "  reg x_frac_is_0_5;",
        "  reg {}[{}:0] round_sum;".format(o_sign, o_bits + 1),
        "  begin",
        "    x_int_is_even = x[{}] == 0;".format(decimal_digits + n_bits),
        "    x_frac_is_0_5 = x[{}] && (x[{}:0] == 0);".format(
            n_bits + decimal_digits-1, n_bits + decimal_digits - 2),
        "    if (x_frac_is_0_5 && x_int_is_even)",
        "      round_even = x[{}:{}];".format(
            n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits),
        "    else",
        "    begin",
        "      round_sum = x[{}:{}] + 1;".format(
            n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits - 1),
        "      round_even = round_sum[{}:1];".format(o_bits + 1),
        "    end",
        #"      round_even = (x + {})[{}:{}];".format(
        #    #(1 << (n_bits + decimal_digits - 1)),
        #    n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits),
        "  end",
        "  endfunction"
    ]


  all_bits = np.sum(bits)
  sum_i = 0
  for i in range(bits.shape[0]):
    if is_cc:
      if bits[i] > 1:
        if DEBUG:
          header.append("  int i_{} = in[{}];".format(i, i))
        else:
          header.append("  ac_int<{},{}> i_{} = in.slc<{}>({});".format(
              bits[i], is_neg[i], i, bits[i], sum_i))
      else:
        header.append("  bool i_{} = in[{}];".format(i, sum_i))
    else:
      if bits[i] == 1:
        header.append("  wire i_{} = in[{}];".format(i, all_bits - sum_i - 1))
      else:
        header.append("  wire i_{}[{}:0] = in[{}:{}];".format(
            i, bits[i], sum_i + bits[i] - 1, all_bits - sum_i - 1))
    sum_i += bits[i]

  footer = []

  if is_regressor:
    n_bits = int(np.ceil(np.log2(len(o_list))))
    assert 1 << n_bits == len(o_list)

    if is_cc:

      if DEBUG:
        tmp_type = "float"
      else:
        tmp_type = "ac_fixed<{},{},{}>".format(
            n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg,
            o_is_neg)
      avg_o = "  {} o_tmp = {};".format(tmp_type, " + ".join(o_list))

      # rnd_o = "  o_tmp += {}({});".format(tmp_type, len(o_list)/2)

      if DEBUG:
        out = "  out = round_even(o_tmp / {});".format(len(o_list))
      else:
        out = "  out = round_even(o_tmp);"

      footer.append("  #ifdef _PRINT_DEBUG_")
      for o_name in o_list:
        footer.append("  PS({});".format(o_name))
      footer.append("  #endif")
      closing = "}"

    else:
      tmp_sign = "signed " if o_is_neg else ""
      avg_o = "  wire " + tmp_sign + "[{}:0] o_tmp = {};".format(
          n_bits + o_bits + decimal_digits - 1, " + ".join(o_list))

      for n in o_list:
        footer.append("  // always @({}) $display(\"{} = %f (%b)\", {} / 32.0, {});".format(n,n,n,n))
      footer.append("  // always @(o_tmp) $display(\"o_tmp = %b\", o_tmp);")

      out = "  assign out = round_even(o_tmp);"

      closing = "endmodule"

    footer = footer + [avg_o, out, closing]

  else:

    assert not o_is_neg

    footer = []

    o_suffix = ""
    if DEBUG:
      o_type = "int"
    elif is_cc:
      o_type = "ac_int<{},{}>".format(o_bits, o_is_neg)
    else:
      o_sign = " signed" if o_is_neg else ""
      o_suffix = "[{}:0]".format(o_bits)
      o_type = "wire" + o_sign + " " + o_suffix

    if is_cc:
      n_classes = 1 << o_bits
      max_bits = int(np.ceil(np.log2(max_value + 1)))
      log2_o_list = int(np.ceil(np.log2(len(o_list))))
      if DEBUG:
        log2_o_type = "int"
      else:
        log2_o_type = "ac_int<{},false>".format(log2_o_list + max_bits)
      sum_v = (
          "  {} sum[{}] = ".format(
              log2_o_type, 1 << o_bits) + "{" +
          ",".join("0" * (1 << o_bits)) + "};"
      )
      footer = [sum_v]
      for o_name in o_list:
        for i in range(n_classes):
          if DEBUG:
            footer.append("  sum[{}] += ({} >> {}) & {};".format(
                i, o_name, (n_classes - i) * max_bits - max_bits,
                hex((1 << max_bits) - 1)))
          else:
            footer.append("  sum[{}] += {}.slc<{}>({});".format(
                i, o_name, max_bits, (n_classes - i) * max_bits - max_bits))
        debug_print = []
        for i in range(n_classes):
          debug_print.append("{}.slc<{}>({}).to_string(AC_DEC)".format(
              o_name, max_bits, (n_classes - i) * max_bits - max_bits))
        footer_s = (
            "  cout << \"{} \" <<".format(o_name) +
            " << \" \" << ".join(debug_print) + " << endl;"
        )
        footer.append("  #ifdef _PRINT_DEBUG_")
        footer.append(footer_s)
        footer.append("  #endif")
      footer.append("  {} max_tmp = sum[0];".format(log2_o_type))
      footer.append("  {} max_id = 0;".format(o_type))
      footer.append("  for(int i=1; i<{}; i++)".format(1 << o_bits))
      footer.append(
        "    if (sum[i] >= max_tmp) { max_tmp = sum[i]; max_id = i; }")
      out = "  out = max_id;"

      footer.append(out)
      footer += ["}"]
    else:
      n_classes = 1 << o_bits
      max_bits = int(np.ceil(np.log2(max_value + 1)))
      log2_o_list = int(np.ceil(np.log2(len(o_list))))
      log2_o_type = "wire [{}:0]".format(log2_o_list + max_bits)
      footer = []
      for i in range(n_classes):
        code_s = "  {} sum_{} = ".format(log2_o_type, i)
        code_term = []
        for o_name in o_list:
          code_term.append("{}[{}:{}]".format(
              o_name, (n_classes - i) * max_bits, (n_classes - i) * max_bits - max_bits))
        code_s += " + ".join(code_term) + ";"
        footer.append(code_s)
        footer.append("  // always @(sum_{}) $display(\"sum_{} = %d\", sum_{});".format(
            i, i, i))
      footer.append("  reg [{}:0] max_tmp;".format(
          log2_o_list + max_bits - 1))
      footer.append("  reg [{}:0] max_id;".format(o_bits-1))
      footer.append("  integer i;")
      footer.append("  always @(" +
                    " or ".join(
                        ["sum_" + str(i) for i in range(n_classes)]) + ")")
      footer.append("  begin")
      footer.append("    max_tmp = sum_0; max_id = 0;")
      for i in range(1, n_classes):
        footer.append(
            "    if (sum_{} >= max_tmp) begin max_tmp = sum_{}; max_id = {}; end".format(
                i, i, i))
      footer.append("  end")
      footer.append("  assign out = max_id;")
      footer.append("endmodule")

  return header + code + footer


def gen_testbench_sv(rf, name, bits, is_neg, o_bits, o_is_neg, x, y, p, code):
  code.append("module tb;")
  x_0, x_1 = x.shape
  x_0_log2 = int(np.ceil(np.log2(x_0)))
  code.append("reg [{}:0] x_rom[{}:0];".format(x_1-1, x_0-1))
  code.append("initial $readmemb(\"x.rom\", x_rom, 0, {});".format(x_0-1))
  with open("x.rom", "w") as f:
    for i in range(len(x)):
      f.write("".join([str(int(v)) for v in x[i]]) + "\n")

  o_sign = "signed " if o_is_neg else ""
  o_type = o_sign + "[{}:0]".format(o_bits - 1)
  code.append("reg {} y_rom[{}:0];".format(o_type,x_0-1))
  code.append("reg {} p_rom[{}:0];".format(o_type,x_0-1))
  with open("y.rom","w") as f:
    for i in range(len(y)):
      f.write(hex(int(y[i]))+ "\n")
  with open("p.rom","w") as f:
    for i in range(len(y)):
      f.write(hex(int(p[i]))+ "\n")
  code.append("initial $readmemh(\"y.rom\", y_rom, 0, {});".format(x_0-1))
  code.append("initial $readmemh(\"p.rom\", p_rom, 0, {});".format(x_0-1))
  code.append("integer i;")
  code.append("integer cnt;")
  code.append("reg [{}:0] in;".format(x_1-1))
  code.append("wire {} out;".format(o_type))
  code.append("{} {}(in, out);".format(name, name))
  code.append("initial")
  code.append("begin")
  code.append("  cnt = 0;")
  code.append("  in = x_rom[i];")
  code.append("  for (i=0; i<{}; i=i+1)".format(x_0))
  code.append("  begin")
  code.append("    in = x_rom[i];")
  code.append("    #1000;")
  code.append("    if (p_rom[i] != out && y_rom[i] != out)")
  code.append("    begin")
  code.append("      $display(\"%d: %b y=%d p=%d -> %d\", i, x_rom[i], y_rom[i], p_rom[i], out);")
  code.append("    end")
  code.append("    else")
  code.append("    begin")
  code.append("      cnt = cnt + 1;")
  code.append("    end")
  code.append("  end")
  code.append("  $display(\"acc = %f\", 100.0 * cnt / {});".format(x_0))
  code.append("end")
  code.append("endmodule")


def gen_testbench_cc(rf, name, bits, is_neg, o_bits, o_is_neg, x, y, p, code):
  code.append("int x[{}][{}] = ".format(*x.shape) + "{")
  for i in range(len(x)):
    code_s = "  {" + ",".join([str(int(v)) for v in x[i]]) + "}"
    if i < len(x) - 1:
      code_s = code_s + ","
    code.append(code_s)
  code.append("};")
  code_s = (
      "int y[{}] = ".format(y.shape[0]) + "{" +
      ",".join([str(int(v)) for v in y]) + "};"
  )
  code.append(code_s)
  code_s = (
      "int p[{}] = ".format(p.shape[0]) + "{" +
      ",".join([str(int(v)) for v in p]) + "};"
  )
  code.append(code_s)

  code.append("int main()")
  code.append("{")
  code.append("  double acc = 0.0;")
  if DEBUG:
    code.append("  int in[{}];".format(x.shape[1]))
    code.append("  int out;")
  else:
    code.append("  ac_int<{},0> in;".format(x.shape[1]))
    code.append("  ac_int<{},{}> out;".format(o_bits, o_is_neg))

  code.append("  for (int i=0; i<{}; i++)".format(x.shape[0]) + "{")
  code.append("    for (int j=0; j<{}; j++) in[j] = x[i][j];".format(
      x.shape[1]))
  code.append("    {}(in, out);".format(name))
  code.append("    if (p[i] != out && y[i] != out) {")
  code.append("      cout << i << \": \";")
  code.append("      for (int j=0; j<{}; j++) cout << in[j];".format(
      x.shape[1]))
  if DEBUG:
    code.append("      cout << \" y=\" << y[i] << \" p=\" << p[i] << \" \" << out << endl;")
    code.append("    }")
    code.append("    acc += (y[i] == out);")
  else:
    code.append("      cout << \" y=\" << y[i] << \" p=\" << p[i] << \" \" << out.to_int() << endl;")
    code.append("      #ifdef _PRINT_DEBUG_")
    code.append("        exit(1);")
    code.append("      #endif")
    code.append("    }")
    code.append("    acc += (y[i] == out.to_int());")
  code.append("  }")
  code.append("  cout << \"acc = \" << 100.0 * acc  / {} << endl;".format(
      x.shape[0]))
  code.append("}")


================================================
FILE: experimental/lo/optimizer.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements random forest or logic otimizer function."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import multiprocessing as mp
import os
import pickle
import random
import shutil
import subprocess
import sys
import time
import warnings

import numpy as np
import six

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

from .compress import Compressor
from .generate_rf_code import gen_random_forest
from .table import load


def file_compress(fin, fout):
  """Compresses table using hash set."""
  c = Compressor()
  n_lines = 0
  for line in open(fin):
    n_lines += 1
    line = line.strip()
    c.add_entry(line)

  f = open(fout, "w")
  n_compressed = 0
  for line in c():
    n_compressed += 1
    f.write(line + "\n")
  f.close()
  print("... random forrest for {} reduced from {} to {} entries".format(
      os.path.basename(fin), n_lines, n_compressed))


def mp_rf_optimizer_func(fn_tuple):
  """Executes in parallel creation of random forrest creation."""

  fn, flags, file_suffix = fn_tuple

  n_trees = flags["n_trees"]
  is_regressor = flags["is_regressor"]
  sample_size = flags["sample_size"]
  n_features = flags["n_features"]
  max_depth = flags["max_depth"]

  if not file_suffix:
    file_suffix = "none"

  path_split = fn.split("/")
  path = "/".join(path_split[:-1]) + "/"
  fn_split = path_split[-1].split(".")
  # o_file = path + ".".join(fn_split[0:-2] + [fn_split[-1]])
  cv_file = path + ".".join(fn_split[0:-2] + [file_suffix])
  rfb_file = path + ".".join(fn_split[0:-2] + ["rb", "bin"])

  # let's compress the table first to make the job easier for random forest.
  # compression can usually achieve a ratio of 50x or more.

  # compress(fn, o_file)
  train = load(fn)

  n_features = "auto" if not n_features else float(n_features)

  # min_size = 1

  if max_depth:
    max_depth = int(max_depth)

  print("... creating random forrest for " + os.path.basename(fn) + " with " +
        str(sample_size) + " samples")

  if is_regressor:
    rf = RandomForestRegressor(
        n_estimators=n_trees,
        max_depth=max_depth,
        # min_samples_split=2,
        # min_samples_leaf=min_size,
        max_features=n_features,
        # max_leaf_nodes=100,
        # oob_score=True,
        # warm_start=True,
        bootstrap=True,
        random_state=42,
        n_jobs=1)
  else:
    rf = RandomForestClassifier(
        n_estimators=n_trees,
        max_depth=max_depth,
        # min_samples_split=2,
        # min_samples_leaf=min_size,
        max_features=n_features,
        # max_leaf_nodes=100,
        # oob_score=True,
        # warm_start=True,
        bootstrap=True,
        random_state=42,
        n_jobs=1)

  if sample_size and train.shape[0] >= 10000:
    sample_size = int(sample_size)
    np.random.seed(42)
    idx = np.random.choice(train.shape[0], train.shape[0], replace=False)

    x = train[idx[sample_size:], 0:-1]
    y = train[idx[sample_size:], -1]

    x_test = train[idx[0:sample_size], 0:-1]
    y_test = train[idx[0:sample_size], -1]
  else:
    x = train[:, 0:-1]
    y = train[:, -1]

    x_test = x
    y_test = y

  estimators = []
  with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    rf.fit(x, y)

  func_name = fn_split[0]

  bits = np.ceil(
      np.log2(
          np.abs(
              np.amax(x, axis=0) -
              np.amin(x, axis=0) + 1))).astype(np.int32)
  is_neg = (np.amin(x, axis=0) < 0).astype(np.int8)

  o_bits = np.ceil(
      np.log2(
          np.abs(
              np.amax(y, axis=0) -
              np.amin(y, axis=0) + 1))).astype(np.int32)
  o_is_neg = (np.amin(y, axis=0) < 0).astype(np.int8)

  rf.bits = bits
  rf.is_neg = is_neg
  rf.o_bits = o_bits
  rf.o_is_neg = o_is_neg

  code = gen_random_forest(
      rf, func_name, bits, is_neg, o_bits, o_is_neg,
      is_regressor=is_regressor, is_top_level=False,
      is_cc=file_suffix == "cc")

  open(cv_file, "w").write("\n".join(code))

  p = 1.0 * np.round(rf.predict(x_test))

  dy = np.max(train[:, -1]) - np.min(train[:, -1])

  error = np.sum(np.abs(y_test - p)) / (1.0 * p.shape[0] * dy)
  score = np.sum(y_test == p) / p.shape[0]

  print("y:", np.max(y_test), y_test[0:30].astype(np.int32))
  print("p:", np.max(p), p[0:30].astype(np.int32))

  print("... model {} with score of {:.2f}% and error of {:.2f}%".format(
      func_name, 100.0*score, 100.0*error))

  print("... saving model in {}".format(rfb_file))
  pickle.dump(rf, open(rfb_file, "wb"))
  return rfb_file


def mp_abc_optimizer_func(fn):
  """Performs espresso and abc optimization on a single espresso input."""

  fn_split = fn.split(".")
  o_file = ".".join(fn_split[0:-2] + [fn_split[-1]])
  v_file = ".".join(fn_split[0:-2] + ["v"])
  b_file = ".".join(fn_split[0:-2] + ["blif"])

  print("...running espresso in " + fn)

  espresso_flags = os.environ.get("ESPRESSO_FLAGS", "-Dexpand")

  cmd = "espresso {} {} > {}".format(fn, espresso_flags, o_file)

  output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)

  output = output.strip()
  if output:
    print(output)
    sys.stdout.flush()

  # check if network is empty

  for line in open(o_file):
    line = line.strip()
    if line[0:2] == ".p":
      terms = int(line[2:])
      # empty : espresso optimized away all the logic
      if terms == 0:
        shutil.copyfile(fn, o_file)
      break

  print("...running abc in " + o_file)

  abc_flags = os.environ.get("ABC_FLAGS", "")

  abc_flags_list = abc_flags.split(";") if abc_flags else []

  abc_cmds_list = (
      ["read_pla " + o_file] + abc_flags_list +
      ["strash",
       "dc2",
       "strash",
       "if -K 3",
       "write_verilog " + v_file,
       "write_blif " + b_file
       ])

  abc_cmds = ";".join(abc_cmds_list)

  cmd = "abc -c '" + abc_cmds + "'"

  output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)

  output = output.strip()
  if output:
    print(output)
    sys.stdout.flush()

  print("...generated " + v_file)


def run_abc_optimizer(files):
  """Implements logic optimizer using espresso/abc."""

  # intel processors sometimes return number of threads, not processors

  cpus = mp.cpu_count() // 2

  start_time = time.time()
  pool = mp.Pool(cpus)
  pool.map(mp_abc_optimizer_func, files)
  pool.close()
  print("Optimizer ran in {} seconds.".format(time.time() - start_time))


def run_rf_optimizer(files, flags, file_suffix="cc"):
  """Implements random forest main optimizer."""

  # intel processors sometimes return number of threads, not processors

  cpus = mp.cpu_count() // 2

  start_time = time.time()
  pool = mp.Pool(cpus)
  pool.map(mp_rf_optimizer_func, zip(
      files, [flags]*len(files), [file_suffix]*len(files)))
  pool.close()
  print("Optimizer ran in {} seconds.".format(time.time() - start_time))

  # generates header file

  # .../.../.../conv2d_0_m.csv/conv2d_0_m_0.csv
  #
  # returns conv2d_0_m for module_name

  module_name = files[0].split("/")[-2].split(".")[0]

  path_split = files[0].split("/")
  path = "/".join(path_split[:-1]) + "/"
  fn_split = path_split[-1].split(".")
  rfb_file = path + ".".join(fn_split[0:-2] + ["rb", "bin"])

  rf = pickle.load(open(rfb_file, "rb"))

  f = open(path + module_name + "." + file_suffix, "w")

  if file_suffix == "cc":
    f.write("#include <ac_int.h>\n\n")

  modules = []

  for fn in files:
    path_split = fn.split("/")
    path = "/".join(path_split[:-1]) + "/"
    fn_split = path_split[-1].split(".")
    v_file = ".".join(fn_split[0:-2] + [file_suffix])

    func_name = fn_split[0]

    if file_suffix == "v":
      f.write("'include \"" + v_file + "\"\n")
    else:
      f.write("#include \"" + v_file + "\"\n")

    modules.append(func_name)

  f.write("\n\n")

  if file_suffix == "v":
    f.write("module " + module_name + "(")
    f.write("input [" + str(np.sum(rf.bits)-1) + ":0] in, ")
    o_sign = " signed " if rf.o_is_neg else ""
    f.write("output " + o_sign + "[" + str(len(modules)*rf.o_bits-1) +
            ":0] out);\n")
  else:
    f.write("void " + module_name + "(")
    f.write("ac_int<" + str(np.sum(rf.bits)) + ",false> in, ")
    f.write("ac_int<" + str(len(modules)*rf.o_bits) + "," +
            ("true" if rf.o_is_neg else "false") +
            "> &out)\n")
    f.write("{\n")

  for o in range(len(modules)):
    if file_suffix == "v":
      f.write("  wire " + ("signed " if rf.o_is_neg else "") +
              "[" + str(rf.bits[-1]-1) + ":0] "
              "o_" + str(o) + ";\n")
      f.write("  " + modules[o] + "(in, o_" + str(o) + ");\n")
      f.write("  assign out[" + str(rf.o_bits*(o+1)-1) + ":" +
              str(rf.bits[-1]*o) + "] = o_" + str(o) + ";\n")
    else:
      f.write("  ac_int<" + str(rf.o_bits) + "," +
              ("true" if rf.o_is_neg else "false") +
              "> o_" + str(o) + "; " + modules[o] +
              "(in, o_" + str(o) + "); out.set_slc<" +
              str(rf.o_bits) + ">(" +
              str(rf.o_bits*o) + "," +
              "o_" + str(o) + ");\n")

  if file_suffix == "cc":
    f.write("}")

  f.close()


================================================
FILE: experimental/lo/random_forest/__init__.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from .utils import load
from .utils import load_csv
from .utils import load_pla
# from .random_forest import RandomForest
# from .random_tree import RandomTree


================================================
FILE: experimental/lo/random_forest/gen_random_tree.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates expressions for random trees."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

def gen_random_tree_cc(tree):
  n_nodes = tree.node_count
  children_left = tree.children_left
  children_right = tree.children_right
  feature = tree.feature
  threshold = tree.threshold

  node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
  is_leaves = np.zeros(shape=n_nodes, dtype=bool)

  stack = [(0, -1)]

  while (len(stack) > 0):
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    if children_left[node_id] != children_right[node_id]:
      stack.append((chidren_left[node_id], parent_depth+1))
      stack.append((children_right[node_id], parent_depth+1))
    else:
      is_leaves[node_id] = True

  for i in range(n_nodes):
    if is_leaves[i]:
      print("{}n_{} leaf node.".format("  "*node_depth[i], i))
    else:
      print("{}n_{} (i_{} <= {}) ? n_{} : n_{}".format(
          "  "*node_depth[i], i, feature[i], threshold[i],
          children_left[i], children_right[i]))


================================================
FILE: experimental/lo/random_forest/parser.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Parses PLA format usig ply."""
from ply import yacc
from ply import lex
import numpy as np

_1 = 1
_0 = 2
_X = 3
_U = 0

NOT = {_0: _1, _1: _0, _X: _U, _U: _U}

class PLA:
  def __init__(self):
    self.pla_i = []
    self.pla_o = []

pla = PLA()

tokens = [
  "I",
  "O",
  "MV",
  "ILB",
  "OB",
  "P",
  "L",
  "E",
  "TYPE",
  "SYMBOL",
  "NUMBER",
  "NEWLINE"
]

t_ignore = " \t|"
t_I = r"\.[iI]"
t_O = r"\.[oO]"
t_MV = r"\.[mM][vV]"
t_ILB = r"\.[iI][lL][bB]"
t_OB = r"\.[oO][bB]"
t_P = r"\.[pP]"
t_L = r"\.[lL]"
t_E = r"\.[eE]"
t_TYPE = r"\.type"
t_SYMBOL = r"[a-zA-Z_][a-zA-Z0-9_\<\>\-\$]*"

def t_NUMBER(t):
  r"[\d\-]+"
  return t

def t_NEWLINE(t):
  r"\n+"
  t.lexer.lineno += t.value.count("\n")
  return t

def t_error(t):
  print("Illegal character '{}'".format(t.value))
  t.lexer.skip(1)

lex.lex()

def p_pla(p):
  """pla : pla_declarations pla_table pla_end"""

def p_pla_declarations(p):
  """pla_declarations : pla_declarations pla_declaration
                      | pla_declaration"""

def p_pla_declaration(p):
  """pla_declaration : I NUMBER NEWLINE
                     | O NUMBER NEWLINE
                     | P NUMBER NEWLINE
                     | MV number_list NEWLINE
                     | ILB symbol_list NEWLINE
                     | OB symbol_list NEWLINE
                     | L NUMBER symbol_list NEWLINE
                     | TYPE SYMBOL NEWLINE
  """
  token = p[1].lower()
  if token == ".i":
    pla.ni = int(p[2])
  elif token == ".o":
    pla.no = int(p[2])
  elif token == ".mv":
    pla.mv = [int(v) for v in p[2]]
  elif token == ".ilb":
    pla.ilb = p[2]
  elif token == ".ob":
    pla.ob = p[2]
  elif token == ".l":
    pla.label = p[2]
  elif token == ".type":
    pla.set_type = p[2]


def p_pla_table(p):
  """pla_table : pla_table number_symbol_list NEWLINE
               | number_symbol_list NEWLINE"""
  if len(p[1:]) == 3:
    line = "".join(p[2])
  else:
    line = "".join(p[1])

  assert hasattr(pla, "ni") and hasattr(pla, "no")

  # right now we only process binary functions

  line = [_1 if v == "1" else _0 if v == "0" else _X for v in line]

  pla.pla_i.append(line[0:pla.ni])
  pla.pla_o.append(line[pla.ni:])


def p_pla_end(p):
  """pla_end : E opt_new_line"""
  pass


def p_opt_new_line(p):
  """opt_new_line : NEWLINE
                  |
  """
  pass


def p_number_list(p):
  """number_list : number_list NUMBER
                 | NUMBER
  """
  if len(p[1:]) == 2:
    p[0] = p[1] + [p[2]]
  else:
    p[0] = [p[1]]


def p_symbol_list(p):
  """symbol_list : symbol_list SYMBOL
                 | SYMBOL
  """
  if len(p[1:]) == 2:
    p[0] = p[1] + [p[2]]
  else:
    p[0] = [p[1]]


def p_number_symbol_list(p):
  """number_symbol_list : number_symbol_list number_or_symbol
                        | number_or_symbol
  """
  if len(p[1:]) == 2:
    p[0] = p[1] + [p[2]]
  else:
    p[0] = [p[1]]


def p_number_or_symbol(p):
  """number_or_symbol : NUMBER
                      | SYMBOL
  """
  p[0] = p[1]


def p_error(p):
  print("Error text at {}".format(p)) #p.value))

yacc.yacc()

def get_tokens(fn):
  lex.input("".join(open(fn).readlines()))
  return lex.token

def parse(fn):
  yacc.parse("".join(open(fn).readlines()))

  pla.pla_i = np.array(pla.pla_i)
  pla.pla_o = np.array(pla.pla_o)

  return pla


================================================
FILE: experimental/lo/random_forest/random_forest.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Creates a random forest to generate hardware for it."""

import numpy as np
import pickle
import os

from .random_tree import RandomTree

def fit_parallel(max_depth, min_size, sample, mask_stuck_at_values):

  tree = RandomTree(max_depth, min_size)
  tree.fit(sample, mask_stuck_at_values)

  return tree


class RandomForest:
  def __init__(
      self, max_depth, min_size, n_trees, use_mean=False,
      sample_size=None):
    self.max_depth = max_depth
    self.min_size = min_size
    self.use_mean = use_mean
    self.sample_size = sample_size
    self.n_trees = n_trees
    self.inputs = None
    self.bits = None
    self.is_neg = None

    self.trees = None

  @staticmethod
  def save(model, filename):
    """Saves model to disk."""
    print("... saving model in {}".format(filename))
    f = open(filename, "wb")
    pickle.dump(model, f)
    f.close()


  @staticmethod
  def load(filename):
    """Loads model from disk."""
    print("... loading model from {}".format(filename))
    f = open(filename, "rb")
    random_forest = pickle.load(f)
    f.close()

    return random_forest


  def subsample(self, dataset):
    """Subsamples dataset if we do not want to use entire dataset."""
    sample_idx = np.random.choice(
        dataset.shape[0], self.sample_size, replace=True)
    sample = dataset[sample_idx,...]
    return sample


  def fit(self, dataset, verbose=False):
    """Fits random tree to model."""
    self.inputs = dataset.shape[1]-1
    self.bits = np.ceil(
        np.log2(
            np.abs(
                np.amax(dataset, axis=0) -
                np.amin(dataset, axis=0)))).astype(np.int32)
    self.is_neg = (np.amin(dataset, axis=0) < 0).astype(np.int8)

    self.trees = []

    for i in range(self.n_trees):
      if verbose:
        print("... creating tree {}".format(i))

      # as subsample is an expensive operation, we will only perform it if it
      # reduces the dataset substantially

      if self.sample_size and self.sample_size < 0.3 * dataset.shape[0]:
        if verbose:
          print("... generated subsample of size {}".format(self.sample_size))
        sample = self.subsample(dataset)
      else:
        sample = dataset

      self.trees.append(fit_parallel(
          self.max_depth, self.min_size, sample, True))


  def predict_row(self, row):
    """Predicts output for single row."""
    result = [tree.predict_row(row) for tree in self.trees]
    if self.use_mean:
      return int(np.round(np.mean(result)))
    else:
      return max(set(result), key=result.count)


  def predict(self, data):
    """Predicts class based on data."""

    assert self.trees is not None

    return np.array([self.predict_row(data[i]) for i in range(data.shape[0])])


  def gen_code(self, filename, func_name):
    """Generates code for model."""

    assert self.bits is not None

    vd_list = []
    n_vars = 0
    for tree in self.trees:
      vd_list.append(tree.gen_code(n_vars))
      n_vars += len(vd_list[-1])

    # checks the type by the suffix

    is_v = filename.split(".")[-1] == "v"

    assert self.inputs

    f = open(filename, "w")

    i_bits = np.sum(self.bits[:-1])
    o_bits = self.bits[-1]
    o_sign = self.is_neg[-1]

    if is_v:
      f.write("module {}(input [{}:0] i, output [{}:0] o);\n".format(
          func_name, i_bits-1, o_bits-1))
    else:
      f.write("#include<ac_int.h>\n\n")
      f.write("void {}(ac_int<{},false> i, ac_int<{},{}> &o)\n".format(
          func_name, i_bits, o_bits, o_sign))
      f.write("{\n")


    # write function headline
    s_in_line = []

    i_bits = self.bits[0]
    i_sign = self.is_neg[0]

    if is_v:
      i_datatype = "  wire {}[{}:0] ".format(
          "signed " if i_sign else "", i_bits-1)
    else:
      i_datatype = "  ac_int<{},{}> ".format(i_bits, i_sign)

    len_s = len(i_datatype)

    for i in range(self.inputs):
      if is_v:
        s = (
            "i_" + str(i) + " = " + "i[" + str(i_bits*(i+1)-1) + ":" +
            str(i_bits*i) + "]"
        )
      else:
        s = (
            "i_" + str(i) + " = " + "i.slc<" + str(i_bits) + ">(" +
            str(i_bits*i) + ")"
        )
      if (
          len_s + len(s) + 2 > 70 or i_bits != self.bits[i] or
          i_sign != self.is_neg[i]
      ):
        f.write(i_datatype + ", ".join(s_in_line) + ";\n")

        s_in_line = []
        if is_v:
          i_datatype = "  wire {}[{}:0] ".format(
              "signed " if i_sign else "", i_bits-1)
        else:
          i_datatype = "  ac_int<{},{}> ".format(i_bits, i_sign)

        len_s = len(i_datatype)

      s_in_line.append(s)
      len_s += len(s) + 2

    if s_in_line:
      f.write(i_datatype + ", ".join(s_in_line) + ";\n")

    if is_v:
      o_datatype = "  wire {}[{}:0] ".format(
          "signed " if o_sign else "", o_bits)
    else:
      o_datatype = "  ac_int<{},{}> ".format(o_bits, o_sign)

    o_list = []
    for i in range(len(vd_list)):
      for v in vd_list[i]:
        if is_v:
          f.write(o_datatype + v + " = " + vd_list[i][v] + ";\n")
        else:
          f.write(o_datatype + v + " = " + vd_list[i][v] + ";\n")
      f.write("\n")
      o_list.append(v)

    assert len(o_list) <= 3

    if is_v:
      f.write("  assign ")
    else:
      f.write("  ")

    if len(o_list) == 1:
      f.write("o = " + o_list[0] + ";")
    elif len(o_list) == 2:
      cond = "( " + o_list[0] + " == " + o_list[1] + " ) "
      n1 = o_list[0]
      n0 = "( ( " + " + ".join(o_list) + " ) >> 1 )"
      f.write("o = " + cond + "? " + n1 + ": " + n0)
    elif len(o_list) == 3:
      cond = (
          "( " +
          "( " + " == ".join(o_list[0:2]) + " )?" + o_list[0] + ":" +
          "( " + " == ".join(o_list[1:]) + " )?" + o_list[1] + ":" +
          "( " + " == ".join([o_list[0], o_list[2]]) + " )?" + o_list[0] +
          ":" + "( " + " < ".join(o_list[0:2]) + " ) ?" +
          "( ( " + " < ".join(o_list[1:]) + " ) ?" + o_list[1] + ":" +
          o_list[2] + " ) : " +
          "( ( " + " < ".join([o_list[0], o_list[2]]) + " ) ?" + o_list[0] +
          ":" + o_list[2] + " )"
      )
      f.write("o = " + cond + ";\n")
    if is_v:
      f.write("endmodule")
    else:
      f.write("}")

    f.close()


================================================
FILE: experimental/lo/random_forest/random_tree.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements Random Forest for quantized netlist."""

from csv import reader
from math import sqrt
import os
import pprint
from random import seed
from random import randrange
import sys

import numpy as np
from .parser import parse, _X, _0, _1

class RandomTree:
  def __init__(self, max_depth, min_size):
    self.min_size = min_size
    self.max_depth = max_depth
    self.n_features = None

  def split_into_groups(self, index, value, dataset):
    mask_l = dataset[:, index] < value
    mask_r = np.logical_not(mask_l)
    left = dataset[mask_l,...]
    right = dataset[mask_r,...]
    return left, right

  def gini_index(self, groups, classes):
    # count all samples at split point
    n_instances = float(sum([len(group) for group in groups]))
    # sum weighted Gini index for each group
    gini = 0.0
    for group in groups:
      size = float(len(group))
      # avoid divide by zero
      if size == 0:
        continue
      score = 0.0
      # score the group based on the score for each class
      for class_val in classes:
        p = np.array([np.sum(group[:, -1] == class_val) / size
                      for class_val in classes])
        score += np.sum(np.power(p, 2))

      # weight the group score by its relative size
      gini += (1.0 - score) * (size / n_instances)
    return gini

  def select_best_split(self, dataset):
    class_values = list(set(list(dataset[:,-1].flatten())))

    b_index, b_value, b_score, b_groups = 9999, 9999, 9999, None

    # because several of the entries may be don't cares, we will select the
    # whole set and restrict to only the ones that are not don't cares

    features = list(
        np.random.choice(len(dataset[0])-1, self.n_features, p=self.probs,
                         replace=False))

    for index in features:
      assert self.mask[index] == True
      b_values = list(set(list(dataset[:, index])))
      for b in b_values:
        groups = self.split_into_groups(index, b, dataset)
        gini = self.gini_index(groups, class_values)
        if gini < b_score:
          b_index, b_value, b_score, b_groups = index, b, gini, groups

    return {'index': b_index, 'value': b_value, 'groups': b_groups}

  def select_terminal(self, group):
    outcomes = list(group[:,-1].flatten())
    return max(set(outcomes), key=outcomes.count)

  def split_node(self, node, depth):
    left, right = node['groups']
    del(node['groups'])

    # check for a no split
    if left.shape[0] == 0:
      node['left'] = node['right'] = self.select_terminal(right)
      return
    elif right.shape[0] == 0:
      node['left'] = node['right'] = self.select_terminal(left)
      return

    # check for max depth
    if depth >= self.max_depth:
      node['left'], node['right'] = (self.select_terminal(left),
                                     self.select_terminal(right))
      return

    # process left child
    if len(set(list(
        left[:, -1].flatten()))) == 1 or left.shape[0] <= self.min_size:
      node['left'] = self.select_terminal(left)
    else:
      node['left'] = self.select_best_split(left)
      self.split_node(node['left'], depth + 1)

    # process right child
    if len(set(list(
        right[:, -1].flatten()))) == 1 or right.shape[0] <= self.min_size:
      node['right'] = self.select_terminal(right)
    else:
      node['right'] = self.select_best_split(right)
      self.split_node(node['right'], depth+1)

  def create_mask(self, dataset):
    self.mask = np.amin(dataset, axis=0) != np.amax(dataset, axis=0)

  def fit(self, dataset, mask_stuck_at_values=False):
    if mask_stuck_at_values:
      self.create_mask(dataset)
    else:
      self.mask = np.ones(dataset.shape[1])

    self.probs = self.mask[:-1].astype(np.float32) / np.sum(self.mask[:-1])

    if not self.n_features:
      self.n_features = int(np.sqrt(dataset.shape[1] - 1))

    self.root = self.select_best_split(dataset)
    self.split_node(self.root, 1)

  def predict_internal(self, node, data):
    if data[node['index']] < node['value']:
      if isinstance(node['left'], dict):
        return self.predict_internal(node['left'], data)
      else:
        return node['left']
    else:
      if isinstance(node['right'], dict):
        return self.predict_internal(node['right'], data)
      else:
        return node['right']


  def predict_row(self, row):
    return self.predict_internal(self.root, row)


  def predict(self, data):
    return np.array(self.predict_row(data[i]) for i in range(data.shape[0]))

  def gen_code_internal(self, node, var_dict, n_offset):
    # traverse left
    cond = '( i_' + str(node['index']) + ' < ' + str(node['value']) + ' )'
    if isinstance(node['left'], dict):
      n0 = self.gen_code_internal(node['left'], var_dict, n_offset)
    else:
      n0 = str(node['left'])

    if isinstance(node['right'], dict):
      n1 = self.gen_code_internal(node['right'], var_dict, n_offset)
    else:
      n1 = str(node['right'])

    index = len(var_dict) + n_offset
    r = 'n_' + str(index)
    stmt = cond + '? ' + n0 + ' : ' + n1
    var_dict[r] = stmt

    return r

  def gen_code(self, n_offset=0):
    var_dict = {}

    self.gen_code_internal(self.root, var_dict, n_offset)

    return var_dict


================================================
FILE: experimental/lo/random_forest/utils.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Reads and processes tables of PLAs and CSVs."""

from csv import reader
from math import sqrt
import os
import pprint
from random import seed
from random import randrange
import sys

import numpy as np
from .parser import parse, _X, _0, _1


def str_column_to_float(dataset, column):
  """Converts string column to float."""
  for row in dataset:
    row[column] = float(row[column].strip())

def str_column_to_int(dataset, column):
  """Converts string column to int."""
  for row in dataset:
    row[column] = int(row[column].strip())

def str_column_to_number(dataset, column):
  """Converts output to integer if possible or float."""

  class_values = [row[column] for row in dataset]
  unique = set(class_values)
  lookup = dict()
  is_symbolic = False
  for value in unique:
    try:
      # try int first
      lookup[value] = int(value)
    except ValueError:
      try:
        # if it fails, try float
        lookup[value] = float(value)
      except ValueError:
        # if it fails, it is symbolic
        is_symbolic = True
        break

  # best we an do is to assign unique numbers to the classes
  if is_symbolic:
    for i, value in enumerate(unique):
      lookup[value] = i

  # convert output to unique number
  for row in dataset:
    row[column] = lookup[row[column]]

  return lookup


def load_csv(filename):
  """Loads CSV file."""
  dataset = list()
  with open(filename, 'r') as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if not row:
        continue
      dataset.append(row)

  # converts data to int's
  for i in range(0, len(dataset[0])-1):
    str_column_to_int(dataset, i)

  # converts output to int or float
  str_column_to_number(dataset, len(dataset[0])-1)
  dataset = np.array(dataset)

  return dataset


def load_pla(filename):
  """Loads PLA file."""
  dataset = list()
  pla = parse(filename)
  for i,o in zip(pla.pla_i, pla.pla_o):
    i_s = [1 if v == _1 else 0 if v == _0 else 0 for v in i]
    o_s = [sum([(1 << (len(o)-1-oo)) if o[oo] == _1 else 0
                for oo in range(len(o))])]
    dataset.append(i_s + o_s)
  dataset = np.array(dataset)
  return dataset


def load(filename):
  """Loads and decides if we will load PLA or CSV file based on suffix."""

  suffix_split = filename.split(".")

  if suffix_split[-1] == "pla":
    print("... loading pla")
    dataset = load_pla(filename)
  else:
    dataset = load_csv(filename)
  return dataset


================================================
FILE: experimental/lo/receptive.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

from .utils import get_padding_value


def print_rf(layer_name, x):
  print("Layer {}:".format(layer_name))
  print(
      "\theight/width: {}\n\tstride: {}\n\teq_kernel_size: {}\n\tstart: {}\n".format(
          *x)
  )


def rf_computation_for_layer(layer, layer_in):
  k, s, p = layer
  n_in, j_in, r_in, start_in = layer_in

  n_out = int(math.floor((n_in + 2*p - k)/s)) + 1

  if s == 1 and p == 1:
    n_out = n_in

  actual_p = (n_out-1)*s - n_in + k
  p_r = math.ceil(actual_p/2)
  p_l = math.floor(actual_p/2)

  j_out = j_in * s

  r_out = r_in + (k-1)*j_in

  start_out = start_in + (int((k-1)/2) - p_l) * j_in

  return n_out, j_out, r_out, start_out


def model_to_receptive_field(model, i_name, o_name):
  layers_h = []
  layers_w = []

  i_layer = model.get_layer(i_name)
  o_layer = model.get_layer(o_name)

  # right now this only works for sequential layers

  i_index = model.layers.index(i_layer)
  o_index = model.layers.index(o_layer)

  for i in range(i_index, o_index+1):
    k_h, k_w = (1, 1)
    s_h, s_w = (1, 1)
    p_h, p_w = (0, 0)

    if hasattr(model.layers[i], "kernel_size"):
      kernel = model.layers[i].kernel_size

      if isinstance(kernel, int):
        kernel = [kernel, kernel]

      k_h, k_w = kernel[0], kernel[1]

    if hasattr(model.layers[i], "strides"):
      strides = model.layers[i].strides

      if isinstance(strides, int):
        strides = [strides, strides]

      s_h, s_w = strides[0], strides[1]

    if hasattr(model.layers[i], "padding"):
      padding = model.layers[i].padding

      if isinstance(padding, str):
        padding = [padding, padding]

      p_h = get_padding_value(padding[0], k_h)
      p_w = get_padding_value(padding[1], k_w)

    layers_h.append((k_h, s_h, p_h))
    layers_w.append((k_w, s_w, p_w))

  x_h = (i_layer.input.shape[1], 1, 1, 0.5)
  x_w = (i_layer.input.shape[2], 1, 1, 0.5)

  for l_h, l_w in zip(layers_h, layers_w):
    x_h = rf_computation_for_layer(l_h, x_h)
    x_w = rf_computation_for_layer(l_w, x_w)

  strides = (x_h[1], x_w[1])
  kernel = (x_h[2], x_w[2])
  padding = ("valid", "valid")

  return (strides, kernel, padding)


================================================
FILE: experimental/lo/table/__init__.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from .utils import load
from .utils import load_csv
from .utils import load_pla


================================================
FILE: experimental/lo/table/parser.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Parses PLA format usig ply."""
from ply import yacc
from ply import lex
import numpy as np

_1 = 1
_0 = 2
_X = 3
_U = 0

NOT = {_0: _1, _1: _0, _X: _U, _U: _U}

class PLA:
  def __init__(self):
    self.pla_i = []
    self.pla_o = []

pla = PLA()

tokens = [
  "I",
  "O",
  "MV",
  "ILB",
  "OB",
  "P",
  "L",
  "E",
  "TYPE",
  "SYMBOL",
  "NUMBER",
  "NEWLINE"
]

t_ignore = " \t|"
t_I = r"\.[iI]"
t_O = r"\.[oO]"
t_MV = r"\.[mM][vV]"
t_ILB = r"\.[iI][lL][bB]"
t_OB = r"\.[oO][bB]"
t_P = r"\.[pP]"
t_L = r"\.[lL]"
t_E = r"\.[eE]"
t_TYPE = r"\.type"
t_SYMBOL = r"[a-zA-Z_][a-zA-Z0-9_\<\>\-\$]*"

def t_NUMBER(t):
  r"[\d\-]+"
  return t

def t_NEWLINE(t):
  r"\n+"
  t.lexer.lineno += t.value.count("\n")
  return t

def t_error(t):
  print("Illegal character '{}'".format(t.value))
  t.lexer.skip(1)

lex.lex()

def p_pla(p):
  """pla : pla_declarations pla_table pla_end"""

def p_pla_declarations(p):
  """pla_declarations : pla_declarations pla_declaration
                      | pla_declaration"""

def p_pla_declaration(p):
  """pla_declaration : I NUMBER NEWLINE
                     | O NUMBER NEWLINE
                     | P NUMBER NEWLINE
                     | MV number_list NEWLINE
                     | ILB symbol_list NEWLINE
                     | OB symbol_list NEWLINE
                     | L NUMBER symbol_list NEWLINE
                     | TYPE SYMBOL NEWLINE
  """
  token = p[1].lower()
  if token == ".i":
    pla.ni = int(p[2])
  elif token == ".o":
    pla.no = int(p[2])
  elif token == ".mv":
    pla.mv = [int(v) for v in p[2]]
  elif token == ".ilb":
    pla.ilb = p[2]
  elif token == ".ob":
    pla.ob = p[2]
  elif token == ".l":
    pla.label = p[2]
  elif token == ".type":
    pla.set_type = p[2]


def p_pla_table(p):
  """pla_table : pla_table number_symbol_list NEWLINE
               | number_symbol_list NEWLINE"""
  if len(p[1:]) == 3:
    line = "".join(p[2])
  else:
    line = "".join(p[1])

  assert hasattr(pla, "ni") and hasattr(pla, "no")

  # right now we only process binary functions

  line = [_1 if v == "1" else _0 if v == "0" else _X for v in line]

  pla.pla_i.append(line[0:pla.ni])
  pla.pla_o.append(line[pla.ni:])


def p_pla_end(p):
  """pla_end : E opt_new_line"""
  pass


def p_opt_new_line(p):
  """opt_new_line : NEWLINE
                  |
  """
  pass


def p_number_list(p):
  """number_list : number_list NUMBER
                 | NUMBER
  """
  if len(p[1:]) == 2:
    p[0] = p[1] + [p[2]]
  else:
    p[0] = [p[1]]


def p_symbol_list(p):
  """symbol_list : symbol_list SYMBOL
                 | SYMBOL
  """
  if len(p[1:]) == 2:
    p[0] = p[1] + [p[2]]
  else:
    p[0] = [p[1]]


def p_number_symbol_list(p):
  """number_symbol_list : number_symbol_list number_or_symbol
                        | number_or_symbol
  """
  if len(p[1:]) == 2:
    p[0] = p[1] + [p[2]]
  else:
    p[0] = [p[1]]


def p_number_or_symbol(p):
  """number_or_symbol : NUMBER
                      | SYMBOL
  """
  p[0] = p[1]


def p_error(p):
  print("Error text at {}".format(p)) #p.value))

yacc.yacc()

def get_tokens(fn):
  lex.input("".join(open(fn).readlines()))
  return lex.token

def parse(fn):
  yacc.parse("".join(open(fn).readlines()))

  pla.pla_i = np.array(pla.pla_i)
  pla.pla_o = np.array(pla.pla_o)

  return pla


================================================
FILE: experimental/lo/table/utils.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Reads and processes tables of PLAs and CSVs."""

from csv import reader
from csv import QUOTE_NONNUMERIC
from math import sqrt
import os
import pprint
from random import seed
from random import randrange
import sys

import numpy as np
from .parser import parse, _X, _0, _1


def str_column_to_float(dataset, column):
  """Converts string column to float."""
  for row in dataset:
    row[column] = float(row[column].strip())

def str_column_to_int(dataset, column, d_values):
  """Converts string column to int."""
  for row in dataset:
    v = int(row[column].strip())
    row[column] = v if not d_values else d_values[v]

def str_column_to_number(dataset, column):
  """Converts output to integer if possible or float."""

  class_values = [row[column] for row in dataset]
  unique = set(class_values)
  lookup = dict()
  is_symbolic = False
  for value in unique:
    try:
      # try int first
      lookup[value] = int(value)
    except ValueError:
      try:
        # if it fails, try float
        lookup[value] = float(value)
      except ValueError:
        # if it fails, it is symbolic
        is_symbolic = True
        break

  # best we an do is to assign unique numbers to the classes
  if is_symbolic:
    for i, value in enumerate(unique):
      lookup[value] = i

  # convert output to unique number
  for row in dataset:
    row[column] = lookup[row[column]]

  return lookup


def int2bin(v, bits):
  str_v = format((v & ((1<<bits)-1)), "#0" + str(bits+2) + "b")[2:]
  return [int(b) for b in str_v]


def load_csv(filename):
  """Loads CSV file."""
  dataset = list()

  with open(filename, 'r') as file:
    csv_reader = reader(file, quoting=QUOTE_NONNUMERIC)
    for row in csv_reader:
      if not row:
        continue
      dataset.append(row)
      #dataset.append([int(v) for v in row])

  return np.array(dataset)


def load_pla(filename):
  """Loads PLA file."""
  dataset = list()
  pla = parse(filename)
  for i,o in zip(pla.pla_i, pla.pla_o):
    i_s = [1 if v == _1 else 0 if v == _0 else 0 for v in i]
    o_s = [sum([(1 << (len(o)-1-oo)) if o[oo] == _1 else 0
                for oo in range(len(o))])]
    dataset.append(i_s + o_s)
  dataset = np.array(dataset)
  return dataset


def load(filename):
  """Loads and decides if we will load PLA or CSV file based on suffix."""

  suffix_split = filename.split(".")

  if suffix_split[-1] == "pla":
    print("... loading pla")
    dataset = load_pla(filename)
  else:
    dataset = load_csv(filename)
  return dataset


================================================
FILE: experimental/lo/utils.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Computes padding and quantization dictionary values."""

import numpy as np


def get_padding_value(padding, kernel):
  """Returns padding value for kernel."""

  if padding == "valid":
    return 0
  elif padding == "same":
    return kernel // 2
  elif padding == "full":
    return kernel - 1

  raise ValueError("accepted paddings are 'valid', 'same' or 'full', found " +
                   padding)


def get_quantized_bits_dict(bits, ibits, sign=False, mode="bin"):
  """Returns map from floating values to bit encoding."""

  o_dict = {}

  n_bits = bits

  for b in range(1 << (bits - sign)):
    v = (1.0 * b) * (1 << ibits) / (1 << bits)
    if mode == "bin":
      b_str = bin(b)[2:]
      b_str = "0" * (n_bits - len(b_str)) + b_str
    else:  # mode == "dec":
      b_str = str(b)

    o_dict[v] = b_str

    if b > 0 and sign:
      if mode == "bin":
        b_str = bin(-b & ((1 << n_bits) - 1))[2:]
      else:  # mode == "dec"
        b_str = str(-b)

      o_dict[-v] = b_str

  if sign:
    v = (1.0 * (1 << (bits - sign))) * (1 << ibits) / (1 << bits)
    if mode == "bin":
      b_str = bin(-(1 << (bits - sign)) & ((1 << bits) - 1))[2:]
    else:
      b_str = str(-(1 << (bits - sign)))
    o_dict[-v] = b_str
  return o_dict


def get_quantized_po2_dict(
    bits, max_exp, sign=False, make_smaller_zero=True, mode="bin"):
  """Returns map from floating values to bit encoding."""

  # if make_smaller_zero we will make sure smaller number is 000...0

  # mode = "bin" |-> make_smaller_zero

  assert mode != "bin" or  make_smaller_zero

  o_dict = {}

  if max_exp > 0:
    v = 1.0
    if mode == "bin":
      b_str = "0" * bits
    else:
      b_str = "1"

    o_dict[v] = b_str

    if sign:
      v = -1.0
      if mode == "bin":
        b_str = "1" + "0"*(bits-sign)
      else:
        b_str = "-1"

      o_dict[v] = b_str

  for b in range(1, 1<<(bits - sign - 1)):
    v = np.power(2.0, -b)
    if mode == "bin":
      b_sign = "0" if sign else ""
      b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]
    else:
      b_str = str(v)
    o_dict[v] = b_str

    if b <= max_exp:
      v = np.power(2.0, b)
      if mode == "bin":
        b_str = bin(b)[2:]
        b_str = b_sign + "0"*(bits - sign - len(b_str)) + b_str
      else:
        b_str = str(v)
      o_dict[v] = b_str

    if sign:
      v = -np.power(2.0, -b)
      if mode == "bin":
        b_sign = "1" if sign else ""
        b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]
      else:
        b_str = str(v)
      o_dict[v] = b_str

      if b <= max_exp:
        v = -np.power(2.0, b)
        if mode == "bin":
          b_str = bin(b)[2:]
          b_str = b_sign + "0"*(bits - sign - len(b_str)) + b_str
        else:
          b_str = str(v)
        o_dict[v] = b_str

  b = 1 << (bits - sign - 1)
  v = np.power(2.0, -b)
  if mode == "bin":
    b_sign = "0" if sign else ""
    b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]
  else:
    b_str = str(v)
  o_dict[v] = b_str

  smaller_mask = b_str

  if sign:
    v = -np.power(2.0, -b)
    if mode == "bin":
      b_sign = "1" if sign else ""
      b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:]
    else:
      b_str = str(v)
    o_dict[v] = b_str

  def invert_bit(bit, mask):
    """Inverts bits if mask is 1."""

    if mask == "0":
      return bit
    else:
      return "0" if bit == "1" else "1"

  if mode == "bin":
    if make_smaller_zero:
      for v in o_dict:
        o_dict[v] = "".join(
            invert_bit(bit, mask_bit)
            for bit, mask_bit in zip(o_dict[v], smaller_mask))
  else:
    keys_sorted = list(sorted(o_dict.keys()))
    if make_smaller_zero:
      min_positive_key = min([abs(v) for v in keys_sorted])
      min_positive_index = keys_sorted.index(min_positive_key)
    else:
      min_positive_index = 0
    for i, k in enumerate(keys_sorted):
      o_dict[k] = str(i - min_positive_index)

  return o_dict


def get_ternary_dict(mode="bin"):
  """Returns map from floating values to bit encoding."""

  if mode == "bin":
    return {-1.0: "11", 0.0: "00", 1.0: "01"}
  else:
    return {-1.0: "-1", 0.0: "0", 1.0: "1"}


def get_binary_dict(symmetric=False, mode="bin"):
  """Returns map from floating values to bit encoding."""

  if mode == "bin":
    if symmetric:
      return {-1.0: "10", 1.0: "01"}
    else:
      return {0.0: "0", 1.0: "1"}
  else:
    if symmetric:
      return {-1.0: "-1", 1.0: "1"}
    else:
      return {0.0: "0", 1.0: "1"}


================================================
FILE: notebook/AutoQKeras.ipynb
================================================
{
 "cells": [
   {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##### Copyright 2020 Google LLC\n",
    "#\n",
    "#\n",
    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "# https://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "QC9sVuNrzT-f"
   },
   "source": [
    "# Introduction\n",
    "\n",
    "In this notebook, we show how to quantize a model using AutoQKeras.\n",
    "\n",
    "As usual, let's first make sure we are using Python 3."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 51
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 926,
     "status": "ok",
     "timestamp": 1591840345558,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "0sY-O2IfzdB3",
    "outputId": "1c5a4e7a-1003-4b56-a30a-ca6bc196f18b"
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "print(sys.version)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "6V7FxYH0zfY0"
   },
   "source": [
    "Now, let's load some packages we will need to run AutoQKeras."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "wuVqOAcbz3Go"
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "import json\n",
    "import pprint\n",
    "import numpy as np\n",
    "import six\n",
    "import tempfile\n",
    "import tensorflow.compat.v2 as tf\n",
    "# V2 Behavior is necessary to use TF2 APIs before TF2 is default TF version internally.\n",
    "tf.enable_v2_behavior()\n",
    "from tensorflow.keras.optimizers import *\n",
    "\n",
    "from qkeras.autoqkeras import *\n",
    "from qkeras import *\n",
    "from qkeras.utils import model_quantize\n",
    "from qkeras.qtools import run_qtools\n",
    "from qkeras.qtools import settings as qtools_settings\n",
    "\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "import tensorflow_datasets as tfds\n",
    "\n",
    "print(\"using tensorflow\", tf.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's define `get_data` and `get_model` as you may not have stand alone access to examples directory inside autoqkeras."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data(dataset_name, fast=False):\n",
    "  \"\"\"Returns dataset from tfds.\"\"\"\n",
    "  ds_train = tfds.load(name=dataset_name, split=\"train\", batch_size=-1)\n",
    "  ds_test = tfds.load(name=dataset_name, split=\"test\", batch_size=-1)\n",
    "\n",
    "  dataset = tfds.as_numpy(ds_train)\n",
    "  x_train, y_train = dataset[\"image\"].astype(np.float32), dataset[\"label\"]\n",
    "\n",
    "  dataset = tfds.as_numpy(ds_test)\n",
    "  x_test, y_test = dataset[\"image\"].astype(np.float32), dataset[\"label\"]\n",
    "\n",
    "  if len(x_train.shape) == 3:\n",
    "    x_train = x_train.reshape(x_train.shape + (1,))\n",
    "    x_test = x_test.reshape(x_test.shape + (1,))\n",
    "\n",
    "  x_train /= 256.0\n",
    "  x_test /= 256.0\n",
    "\n",
    "  x_mean = np.mean(x_train, axis=0)\n",
    "\n",
    "  x_train -= x_mean\n",
    "  x_test -= x_mean\n",
    "\n",
    "  nb_classes = np.max(y_train) + 1\n",
    "  y_train = to_categorical(y_train, nb_classes)\n",
    "  y_test = to_categorical(y_test, nb_classes)\n",
    "\n",
    "  print(x_train.shape[0], \"train samples\")\n",
    "  print(x_test.shape[0], \"test samples\")\n",
    "  return (x_train, y_train), (x_test, y_test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.initializers import *\n",
    "from tensorflow.keras.layers import *\n",
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.optimizers import *\n",
    "\n",
    "class ConvBlockNetwork(object):\n",
    "  \"\"\"Creates Convolutional block type of network.\"\"\"\n",
    "\n",
    "  def __init__(\n",
    "      self,\n",
    "      shape,\n",
    "      nb_classes,\n",
    "      kernel_size,\n",
    "      filters,\n",
    "      dropout_rate=0.0,\n",
    "      with_maxpooling=True,\n",
    "      with_batchnorm=True,\n",
    "      kernel_initializer=\"he_normal\",\n",
    "      bias_initializer=\"zeros\",\n",
    "      use_separable=False,\n",
    "      use_xnornet_trick=False,\n",
    "      all_conv=False\n",
    "  ):\n",
    "    \"\"\"Creates class.\n",
    "\n",
    "    Args:\n",
    "      shape: shape of inputs.\n",
    "      nb_classes: number of output classes.\n",
    "      kernel_size: kernel_size of network.\n",
    "      filters: sizes of filters (if entry is a list, we create a block).\n",
    "      dropout_rate: dropout rate if > 0.\n",
    "      with_maxpooling: if true, use maxpooling.\n",
    "      with_batchnorm: with BatchNormalization.\n",
    "      kernel_initializer: kernel_initializer.\n",
    "      bias_initializer: bias and beta initializer.\n",
    "      use_separable: if \"dsp\", do conv's 1x3 + 3x1. If \"mobilenet\",\n",
    "        use MobileNet separable convolution. If False or \"none\", perform single\n",
    "        conv layer.\n",
    "      use_xnornet_trick: use bn+act after max pool to enable binary\n",
    "        to avoid saturation to largest value.\n",
    "      all_conv: if true, implements all convolutional network.\n",
    "    \"\"\"\n",
    "    self.shape = shape\n",
    "    self.nb_classes = nb_classes\n",
    "    self.kernel_size = kernel_size\n",
    "    self.filters = filters\n",
    "    self.dropout_rate = dropout_rate\n",
    "    self.with_maxpooling = with_maxpooling\n",
    "    self.with_batchnorm = with_batchnorm\n",
    "    self.kernel_initializer = kernel_initializer\n",
    "    self.bias_initializer = bias_initializer\n",
    "    self.use_separable = use_separable\n",
    "    self.use_xnornet_trick = use_xnornet_trick\n",
    "    self.all_conv = all_conv\n",
    "\n",
    "  def build(self):\n",
    "    \"\"\"Builds model.\"\"\"\n",
    "    x = x_in = Input(self.shape, name=\"input\")\n",
    "    for i in range(len(self.filters)):\n",
    "      if len(self.filters) > 1:\n",
    "        name_suffix_list = [str(i)]\n",
    "      else:\n",
    "        name_suffix_list = []\n",
    "      if not isinstance(self.filters[i], list):\n",
    "        filters = [self.filters[i]]\n",
    "      else:\n",
    "        filters = self.filters[i]\n",
    "      for j in range(len(filters)):\n",
    "        if len(filters) > 1:\n",
    "          name_suffix = \"_\".join(name_suffix_list + [str(j)])\n",
    "        else:\n",
    "          name_suffix = \"_\".join(name_suffix_list)\n",
    "        if self.use_separable == \"dsp\":\n",
    "          kernels = [(1, self.kernel_size), (self.kernel_size, 1)]\n",
    "        else:\n",
    "          kernels = [(self.kernel_size, self.kernel_size)]\n",
    "        for k, kernel in enumerate(kernels):\n",
    "          strides = 1\n",
    "          if (\n",
    "              not self.with_maxpooling and j == len(filters)-1 and\n",
    "              k == len(kernels)-1\n",
    "          ):\n",
    "            strides = 2\n",
    "          if self.use_separable == \"dsp\":\n",
    "            kernel_suffix = (\n",
    "                \"\".join([str(k) for k in kernel]) + \"_\" + name_suffix)\n",
    "          elif self.use_separable == \"mobilenet\":\n",
    "            depth_suffix = (\n",
    "                \"\".join([str(k) for k in kernel]) + \"_\" + name_suffix)\n",
    "            kernel_suffix = \"11_\" + name_suffix\n",
    "          else:\n",
    "            kernel_suffix = name_suffix\n",
    "          if self.use_separable == \"mobilenet\":\n",
    "            x = DepthwiseConv2D(\n",
    "                kernel,\n",
    "                padding=\"same\", strides=strides,\n",
    "                use_bias=False,\n",
    "                name=\"conv2d_dw_\" + depth_suffix)(x)\n",
    "            if self.with_batchnorm:\n",
    "              x = BatchNormalization(name=\"conv2d_dw_bn_\" + depth_suffix)(x)\n",
    "            x = Activation(\"relu\", name=\"conv2d_dw_act_\" + depth_suffix)(x)\n",
    "            kernel = (1, 1)\n",
    "            strides = 1\n",
    "          x = Conv2D(\n",
    "              filters[j], kernel,\n",
    "              strides=strides, use_bias=not self.with_batchnorm,\n",
    "              padding=\"same\",\n",
    "              kernel_initializer=self.kernel_initializer,\n",
    "              bias_initializer=self.bias_initializer,\n",
    "              name=\"conv2d_\" + kernel_suffix)(x)\n",
    "          if not (\n",
    "              self.with_maxpooling and self.use_xnornet_trick and\n",
    "              j == len(filters)-1 and k == len(kernels)-1\n",
    "          ):\n",
    "            if self.with_batchnorm:\n",
    "              x = BatchNormalization(\n",
    "                  beta_initializer=self.bias_initializer,\n",
    "                  name=\"bn_\" + kernel_suffix)(x)\n",
    "            x = Activation(\"relu\", name=\"act_\" + kernel_suffix)(x)\n",
    "      if self.with_maxpooling:\n",
    "        x = MaxPooling2D(2, 2, name=\"mp_\" + name_suffix)(x)\n",
    "        # this is a trick from xnornet to enable full binary or ternary\n",
    "        # networks to be after maxpooling.\n",
    "        if self.use_xnornet_trick:\n",
    "          x = BatchNormalization(\n",
    "              beta_initializer=self.bias_initializer,\n",
    "              name=\"mp_bn_\" + name_suffix)(x)\n",
    "          x = Activation(\"relu\", name=\"mp_act_\" + name_suffix)(x)\n",
    "      if self.dropout_rate > 0:\n",
    "        x = Dropout(self.dropout_rate, name=\"drop_\" + name_suffix)(x)\n",
    "\n",
    "    if not self.all_conv:\n",
    "      x = Flatten(name=\"flatten\")(x)\n",
    "      x = Dense(\n",
    "          self.nb_classes,\n",
    "          kernel_initializer=self.kernel_initializer,\n",
    "          bias_initializer=self.bias_initializer,\n",
    "          name=\"dense\")(x)\n",
    "      x = Activation(\"softmax\", name=\"softmax\")(x)\n",
    "    else:\n",
    "      x = Conv2D(\n",
    "          self.nb_classes, 1, strides=1, padding=\"same\",\n",
    "          kernel_initializer=self.kernel_initializer,\n",
    "          bias_initializer=self.bias_initializer,\n",
    "          name=\"dense\")(x)\n",
    "      x = Activation(\"softmax\", name=\"softmax\")(x)\n",
    "      x = Flatten(name=\"flatten\")(x)\n",
    "\n",
    "    model = Model(inputs=[x_in], outputs=[x])\n",
    "\n",
    "    return model\n",
    "\n",
    "\n",
    "def get_model(dataset):\n",
    "  \"\"\"Returns a model for the demo of AutoQKeras.\"\"\"\n",
    "  if dataset == \"mnist\":\n",
    "    model = ConvBlockNetwork(\n",
    "        shape=(28, 28, 1),\n",
    "        nb_classes=10,\n",
    "        kernel_size=3,\n",
    "        filters=[16, 32, 48, 64, 128],\n",
    "        dropout_rate=0.2,\n",
    "        with_maxpooling=False,\n",
    "        with_batchnorm=True,\n",
    "        kernel_initializer=\"he_uniform\",\n",
    "        bias_initializer=\"zeros\",\n",
    "    ).build()\n",
    "\n",
    "  elif dataset == \"fashion_mnist\":\n",
    "    model = ConvBlockNetwork(\n",
    "        shape=(28, 28, 1),\n",
    "        nb_classes=10,\n",
    "        kernel_size=3,\n",
    "        filters=[16, [32]*3, [64]*3],\n",
    "        dropout_rate=0.2,\n",
    "        with_maxpooling=True,\n",
    "        with_batchnorm=True,\n",
    "        use_separable=\"mobilenet\",\n",
    "        kernel_initializer=\"he_uniform\",\n",
    "        bias_initializer=\"zeros\",\n",
    "        use_xnornet_trick=True\n",
    "    ).build()\n",
    "\n",
    "  elif dataset == \"cifar10\":\n",
    "    model = ConvBlockNetwork(\n",
    "        shape=(32, 32, 3),\n",
    "        nb_classes=10,\n",
    "        kernel_size=3,\n",
    "        filters=[16, [32]*3, [64]*3, [128]*3],\n",
    "        dropout_rate=0.2,\n",
    "        with_maxpooling=True,\n",
    "        with_batchnorm=True,\n",
    "        use_separable=\"mobilenet\",\n",
    "        kernel_initializer=\"he_uniform\",\n",
    "        bias_initializer=\"zeros\",\n",
    "        use_xnornet_trick=True\n",
    "    ).build()\n",
    "\n",
    "  elif dataset == \"cifar100\":\n",
    "    model = ConvBlockNetwork(\n",
    "        shape=(32, 32, 3),\n",
    "        nb_classes=100,\n",
    "        kernel_size=3,\n",
    "        filters=[16, [32]*3, [64]*3, [128]*3, [256]*3],\n",
    "        dropout_rate=0.2,\n",
    "        with_maxpooling=True,\n",
    "        with_batchnorm=True,\n",
    "        use_separable=\"mobilenet\",\n",
    "        kernel_initializer=\"he_uniform\",\n",
    "        bias_initializer=\"zeros\",\n",
    "        use_xnornet_trick=True\n",
    "    ).build()\n",
    "\n",
    "  model.summary()\n",
    "\n",
    "  return model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "uXsGtqRcN7fY"
   },
   "source": [
    "`AutoQKeras` has some examples on how to run with `mnist`, `fashion_mnist`, `cifar10` and `cifar100`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 51
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 18554,
     "status": "ok",
     "timestamp": 1591840377936,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "lB8CBTehz9FK",
    "outputId": "09f791cf-8db5-40c5-b17d-89d433308716"
   },
   "outputs": [],
   "source": [
    "DATASET = \"mnist\"\n",
    "(x_train, y_train), (x_test, y_test) = get_data(DATASET)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "bk4rOks2OIbW"
   },
   "source": [
    "Before we create the model, let's see if we can perform distributed training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 206
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 304,
     "status": "ok",
     "timestamp": 1591840378251,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "EMbYcKb-wMOc",
    "outputId": "22e85769-4659-4212-ccdb-4b00be2fcefe"
   },
   "outputs": [],
   "source": [
    "physical_devices = tf.config.list_physical_devices()\n",
    "for d in physical_devices:\n",
    "  print(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 34
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 14553,
     "status": "ok",
     "timestamp": 1591840392823,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "eMVill0TxUuG",
    "outputId": "97c07213-fdce-4eed-9af7-cc51393cd996"
   },
   "outputs": [],
   "source": [
    "has_tpus = np.any([d.device_type == \"TPU\" for d in physical_devices])\n",
    "\n",
    "if has_tpus:\n",
    "  TPU_WORKER = 'local'\n",
    "\n",
    "  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(\n",
    "      tpu=TPU_WORKER, job_name='tpu_worker')\n",
    "  if TPU_WORKER != 'local':\n",
    "    tf.config.experimental_connect_to_cluster(resolver, protocol='grpc+loas')\n",
    "  tf.tpu.experimental.initialize_tpu_system(resolver)\n",
    "  strategy = tf.distribute.experimental.TPUStrategy(resolver)\n",
    "  print('Number of devices: {}'.format(strategy.num_replicas_in_sync))\n",
    "\n",
    "  cur_strategy = strategy\n",
    "else:\n",
    "  cur_strategy = tf.distribute.get_strategy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "6FIAmXgOOPWg"
   },
   "source": [
    "Now we can create the model with the distributed strategy in place if TPUs are available. We have some test models that we can use, or you can build your own models. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 977
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1149,
     "status": "ok",
     "timestamp": 1591840393983,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "P0_-26kwxZiQ",
    "outputId": "bf2828fe-2968-4d7d-82e7-0e2b87f063ae"
   },
   "outputs": [],
   "source": [
    "with cur_strategy.scope():\n",
    "  model = get_model(DATASET)\n",
    "  custom_objects = {}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "jok7tJq1OVuJ"
   },
   "source": [
    "Let's see the accuracy on a unquantized model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 360
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 10292,
     "status": "ok",
     "timestamp": 1591840404285,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "nvFSJpeDxmWZ",
    "outputId": "ceac171d-2357-4d2a-ecbe-6c2775bc2a94"
   },
   "outputs": [],
   "source": [
    "with cur_strategy.scope():\n",
    "  optimizer = Adam(lr=0.02)\n",
    "  model.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
    "  model.fit(x_train, y_train, epochs=10, batch_size=2048, steps_per_epoch=29, validation_data=(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "pKArZ2VwQlph"
   },
   "source": [
    "For `mnist`, we should get 99% validation accuracy, and for `fashion_mnist`, we should get around 86% of validation accuracy. Let's get a metric for high-level estimation of energy of this model. \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 413,
     "status": "ok",
     "timestamp": 1591840404708,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "AlIk3gtFS6iJ",
    "outputId": "780a9c28-6234-49ff-9a85-e52bf00a5c59"
   },
   "outputs": [],
   "source": [
    "  reference_internal = \"fp32\"\n",
    "  reference_accumulator = \"fp32\"\n",
    "\n",
    "  q = run_qtools.QTools(\n",
    "      model,\n",
    "      # energy calculation using a given process\n",
    "      # \"horowitz\" refers to 45nm process published at\n",
    "      # M. Horowitz, \"1.1 Computing's energy problem (and what we can do about\n",
    "      # it), \"2014 IEEE International Solid-State Circuits Conference Digest of\n",
    "      # Technical Papers (ISSCC), San Francisco, CA, 2014, pp. 10-14, \n",
    "      # doi: 10.1109/ISSCC.2014.6757323.\n",
    "      process=\"horowitz\",\n",
    "      # quantizers for model input\n",
    "      source_quantizers=[quantized_bits(8, 0, 1)],\n",
    "      is_inference=False,\n",
    "      # absolute path (including filename) of the model weights\n",
    "      # in the future, we will attempt to optimize the power model\n",
    "      # by using weight information, although it can be used to further\n",
    "      # optimize QBatchNormalization.\n",
    "      weights_path=None,\n",
    "      # keras_quantizer to quantize weight/bias in un-quantized keras layers\n",
    "      keras_quantizer=reference_internal,\n",
    "      # keras_quantizer to quantize MAC in un-quantized keras layers\n",
    "      keras_accumulator=reference_accumulator,\n",
    "      # whether calculate baseline energy\n",
    "      for_reference=True)\n",
    "  \n",
    "# caculate energy of the derived data type map.\n",
    "energy_dict = q.pe(\n",
    "    # whether to store parameters in dram, sram, or fixed\n",
    "    weights_on_memory=\"sram\",\n",
    "    # store activations in dram or sram\n",
    "    activations_on_memory=\"sram\",\n",
    "    # minimum sram size in number of bits. Let's assume a 16MB SRAM.\n",
    "    min_sram_size=8*16*1024*1024,\n",
    "    # whether load data from dram to sram (consider sram as a cache\n",
    "    # for dram. If false, we will assume data will be already in SRAM\n",
    "    rd_wr_on_io=False)\n",
    "\n",
    "# get stats of energy distribution in each layer\n",
    "energy_profile = q.extract_energy_profile(\n",
    "    qtools_settings.cfg.include_energy, energy_dict)\n",
    "# extract sum of energy of each layer according to the rule specified in\n",
    "# qtools_settings.cfg.include_energy\n",
    "total_energy = q.extract_energy_sum(\n",
    "    qtools_settings.cfg.include_energy, energy_dict)\n",
    "\n",
    "pprint.pprint(energy_profile)\n",
    "print()\n",
    "print(\"Total energy: {:.2f} uJ\".format(total_energy / 1000000.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "-eDXxDSUVJ2m"
   },
   "source": [
    "During the computation, we had a dictionary that outlines the energy per layer (`energy_profile`), and total energy (`total_energy`). The reader should remember that `energy_profile` may need additional filtering as implementations will fuse some\n",
    "layers. When we compute the `total_energy`, we consider an approximation that some layers will be fused to compute the final energy number. For example, a convolution layer followed by an activation layer will be fused into a single layer so that the output of the convolution layer is not used.\n",
    "\n",
    "You have to remember that our high-level model for energy has several assumptions:\n",
    "\n",
    "The energy of a layer is estimated as `energy(layer) = energy(input) + energy(parameters) + energy(MAC) + energy(output)`.\n",
    "\n",
    "1) Reading inputs, parameters and outputs consider only _compulsory_ accesses, i.e. first access to the data, which is independent of the hardware architecture. If you remember _The 3 C's of Caches_ (https://courses.cs.washington.edu/courses/cse410/99au/lectures/Lecture-10-18/tsld035.htm) other types of accesses will depend on the accelerator architecture.\n",
    "\n",
    "2) For the multiply-and-add (MAC) energy estimation, we only consider the energy to compute the MAC, but not any other type energy. For example, in a real accelerator, you have registers, glue logic, pipeline logic that will affect the overall energy profile of the device.\n",
    "\n",
    "Although this model is simple and provides an initial estimate on what to expect, it has high-variance with respect to actual energy numbers you will find in practice, especially with respect to different architectural implementations.\n",
    "\n",
    "We assume that the real energy `Energy(layer)` is a linear combination of the high-level energy model, i.e.`Energy(layer) = k1 * energy(layer) + k2`, where `k1` and `k2` are constants that depend on the architecture of the accelerator. One can think of `k1` as the factor that accounts for the additional storage to keep the model running, and `k2` as the additional always on logic that is required to perform the operations. If we compare the energy of two implementations with different quantizations of the same layer, let's say `layer1` and `layer2`, `Energy(layer1) > Energy(layer2)` holds true iff `energy(layer1) > energy(layer2)` for the same architecture, but for different architectures, this will not be true in general.\n",
    "\n",
    "Despite its limitations to predict a single energy number, this model is quite good to compare the energy of two different models, or different types of quantizations, when we restrict it to a single architecture, and that's how we use it here."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Hr1FL8wVSy-q"
   },
   "source": [
    "# Quantizing a Model With `AutoQKeras`\n",
    "\n",
    "To quantize this model with `AutoQKeras`, we need to define the quantization for kernels, biases and activations; forgiving factors and quantization strategy.\n",
    "\n",
    "Below we define which quantizers are allowed for kernel, bias, activations and linear. Linear is a proxy that we use to capture `Activation(\"linear\")` to apply quantization without applying a non-linear operation.  In some networks, we found that this trick may be necessary to better represent the quantization space.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "vSsEwDr_yRG4"
   },
   "outputs": [],
   "source": [
    "quantization_config = {\n",
    "        \"kernel\": {\n",
    "                \"binary\": 1,\n",
    "                \"stochastic_binary\": 1,\n",
    "                \"ternary\": 2,\n",
    "                \"stochastic_ternary\": 2,\n",
    "                \"quantized_bits(2,1,1,alpha=1.0)\": 2,\n",
    "                \"quantized_bits(4,0,1,alpha=1.0)\": 4,\n",
    "                \"quantized_bits(8,0,1,alpha=1.0)\": 8,\n",
    "                \"quantized_po2(4,1)\": 4\n",
    "        },\n",
    "        \"bias\": {\n",
    "                \"quantized_bits(4,0,1)\": 4,\n",
    "                \"quantized_bits(8,3,1)\": 8,\n",
    "                \"quantized_po2(4,8)\": 4\n",
    "        },\n",
    "        \"activation\": {\n",
    "                \"binary\": 1,\n",
    "                \"ternary\": 2,\n",
    "                \"quantized_relu_po2(4,4)\": 4,\n",
    "                \"quantized_relu(3,1)\": 3,\n",
    "                \"quantized_relu(4,2)\": 4,\n",
    "                \"quantized_relu(8,2)\": 8,\n",
    "                \"quantized_relu(8,4)\": 8,\n",
    "                \"quantized_relu(16,8)\": 16\n",
    "        },\n",
    "        \"linear\": {\n",
    "                \"binary\": 1,\n",
    "                \"ternary\": 2,\n",
    "                \"quantized_bits(4,1)\": 4,\n",
    "                \"quantized_bits(8,2)\": 8,\n",
    "                \"quantized_bits(16,10)\": 16\n",
    "        }\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "GmW_xaAvZo4D"
   },
   "source": [
    "Now let's define how to apply quantization. In the simplest form, we specify how many bits for kernels, biases and activations by layer types. Note that the entry `BatchNormalization` needs to be specified here, as we only quantize layer types specified by these patterns.  For example, a `Flatten` layer is not quantized as it does not change the data type of its inputs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "emTRLIZmR-P7"
   },
   "outputs": [],
   "source": [
    "limit = {\n",
    "    \"Dense\": [8, 8, 4],\n",
    "    \"Conv2D\": [4, 8, 4],\n",
    "    \"DepthwiseConv2D\": [4, 8, 4],\n",
    "    \"Activation\": [4],\n",
    "    \"BatchNormalization\": []\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "-iu5gFNhaLNE"
   },
   "source": [
    "Here, we are specifying that we want to use at most 4 bits for weights and activations, and at most 8 bits for biases in convolutional and depthwise convolutions, but we allow up to 8 bits for kernels in dense layers."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "ZUMQGEIDblSa"
   },
   "source": [
    "Let's define now the forgiving factor. We will consider energy minimization as a goal as follows.  Here, we are saying that we allow 8% reduction in accuracy for a 2x reduction in energy, both reference and trials have parameters and activations on SRAM, both reference model and quantization trials do not read/write from DRAM on I/O operations, and we should consider both experiments to use SRAMs with minimum tensor sizes (commonly called distributed SRAM implementation).\n",
    "\n",
    "We also need to specify the quantizers for the inputs. In this case, we want to use `int8` as source quantizers. Other possible types are `int16`, `int32`, `fp16` or `fp32`, besides `QKeras` quantizer types.\n",
    "\n",
    "Finally, to be fair, we want to compare our quantization against fixed-point 8-bit inputs, outputs, activations, weights and biases, and 32-bit accumulators.\n",
    "\n",
    "Remember that a `forgiving factor` forgives a drop in a metric such as `accuracy` if the gains of the model are much bigger than the drop. For example, it corresponds to the sentence *we allow $\\tt{delta}\\%$ reduction in accuracy if the quantized model has $\\tt{rate} \\times$ smaller energy than the original model*, being a multiplicative factor to the metric. It is computed by $1 + \\tt{delta} \\times  \\log_{\\tt{rate}}(\\tt{stress} \\times \\tt{reference\\_cost} / \\tt{trial\\_cost})$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "kS31TuZ-aKb1"
   },
   "outputs": [],
   "source": [
    "goal = {\n",
    "    \"type\": \"energy\",\n",
    "    \"params\": {\n",
    "        \"delta_p\": 8.0,\n",
    "        \"delta_n\": 8.0,\n",
    "        \"rate\": 2.0,\n",
    "        \"stress\": 1.0,\n",
    "        \"process\": \"horowitz\",\n",
    "        \"parameters_on_memory\": [\"sram\", \"sram\"],\n",
    "        \"activations_on_memory\": [\"sram\", \"sram\"],\n",
    "        \"rd_wr_on_io\": [False, False],\n",
    "        \"min_sram_size\": [0, 0],\n",
    "        \"source_quantizers\": [\"int8\"],\n",
    "        \"reference_internal\": \"int8\",\n",
    "        \"reference_accumulator\": \"int32\"\n",
    "        }\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "-QzyWPA-dCxm"
   },
   "source": [
    "There are a few more things we need to define. Let's bundle them on a dictionary and pass them to `AutoQKeras`.  We will try a maximum of 10 trials (`max_trials`) just to limit the time we will spend finding the best quantization here.  Please note that this parameter is not valid if you are running in `hyperband` mode.\n",
    "\n",
    "`output_dir` is the directory where we will store our results. Since we are running on a colab, we will let `tempfile` chooce a directory for us.\n",
    "\n",
    "`learning_rate_optimizer` allows `AutoQKeras` to change the optimization function and the `learning_rate` to try to improve the quantization results. Since it is still experimental, it may be the case that in some cases it will get worse results. \n",
    "\n",
    "Because we are tuning filters as well, we should set `transfer_weights` to `False` as the trainable parameters will have different shapes.\n",
    "\n",
    "In `AutoQKeras` we have three modes of operation: `random`, `bayesian` and `hyperband`. I recommend the user to refer to `KerasTuner` (https://keras-team.github.io/keras-tuner/) for a complete description of them.\n",
    "\n",
    "`tune_filters` can be set to `layer`, `block` or `none`. If `tune_filters` is `block`, we change the filters by the same amount for all layers being quantized in the trial. If `tune_filters` is `layer`, we will possibly change the number of filters for each layer independently. Finally, if `tune_filters` is `none`, we will not perform filter tuning.\n",
    "\n",
    "Together with `tune_filters`, `tune_filter_exceptions` allows the user to specify by a regular expression which filters we should not perform filter tuning, which is especially good for the last layers of the network.\n",
    "\n",
    "Filter tuning is a very important feature of `AutoQKeras`. When we deep quantize a model, we may need less or more filters for each layer (and you can guess we do not know a priori how many filters we will need for each layer). Let me give you a rationale behind this.\n",
    "\n",
    "- **less filters**: let us assume we have two set of filter coefficients we want quantize: $[-0.3, 0.2, 0.5, 0.15]$ and $[-0.5, 0.4, 0.1, 0.65]$. If we apply a $\\tt{binary}$ quantizer with $\\tt{scale} = \\big\\lceil \\log_2(\\frac{\\sum |w|}{N}) \\big\\rceil$, where $w$ are the filter coefficients and $N$ is the number of coefficients, we will end up with the same filter $\\tt{binary}([-0.3, 0.2, 0.5, 0.15]) = \\tt{binary}([-0.5, 0.4, 0.1, 0.65]) = [-1,1,1,1] \\times 0.5$. In this case we are assuming the $\\tt{scale}$ is a power-of-2 number so that it can be efficiently implemented by a shift operation;\n",
    "\n",
    "- **more filters**: it is clear that quantization will drop information (just look at the example above) and deep quantization will drop more information, so to recover some of the boundary regions in layers that perform feature extraction, we may need to add more filters to the layer when we quantize it.\n",
    "\n",
    "We do not want to quantize the `softmax` layer, which is the last layer of the network. In `AutoQKeras`, you can specify the indexes that you want to perform quantization by specifying the corresponding index of the layer in `Keras`, i.e. if you can get the layer as `model.layers[i]` in `Keras`, `i` is the index of the layer.\n",
    "\n",
    "Finally, for data parallel distributed training, we should pass the strategy in `distribution_strategy` to `KerasTuner`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 54
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 297,
     "status": "ok",
     "timestamp": 1591840405963,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "2-fyACb2dIAN",
    "outputId": "a180fa3f-8cc3-4f70-ce70-c05c28f88d1e"
   },
   "outputs": [],
   "source": [
    "run_config = {\n",
    "  \"output_dir\": tempfile.mkdtemp(),\n",
    "  \"goal\": goal,\n",
    "  \"quantization_config\": quantization_config,\n",
    "  \"learning_rate_optimizer\": False,\n",
    "  \"transfer_weights\": False,\n",
    "  \"mode\": \"random\",\n",
    "  \"seed\": 42,\n",
    "  \"limit\": limit,\n",
    "  \"tune_filters\": \"layer\",\n",
    "  \"tune_filters_exceptions\": \"^dense\",\n",
    "  \"distribution_strategy\": cur_strategy,\n",
    "  # first layer is input, layer two layers are softmax and flatten\n",
    "  \"layer_indexes\": range(1, len(model.layers) - 1),\n",
    "  \"max_trials\": 20\n",
    "}\n",
    "\n",
    "print(\"quantizing layers:\", [model.layers[i].name for i in run_config[\"layer_indexes\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 471192,
     "status": "ok",
     "timestamp": 1591840877167,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "MxlZFpa3fBv2",
    "outputId": "4d339846-1832-4a79-89b3-c9c4944dd47a"
   },
   "outputs": [],
   "source": [
    "autoqk = AutoQKeras(model, metrics=[\"acc\"], custom_objects=custom_objects, **run_config)\n",
    "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "LW_qN8-lOwL0"
   },
   "source": [
    "Now, let's see which model is the best model we got.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 3961,
     "status": "ok",
     "timestamp": 1591840881173,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "1L7KivAoffaL",
    "outputId": "f44b07a3-027d-4d69-9864-b3670815c407"
   },
   "outputs": [],
   "source": [
    "qmodel = autoqk.get_best_model()\n",
    "qmodel.save_weights(\"qmodel.h5\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "RB2xBRhJiwoh"
   },
   "source": [
    "We got here >90% reduction in energy when compared to 8-bit tensors and 32-bit accumulators. Remember that our original number was 3.3 uJ for fp32.  The end model has 11 nJ for the quantized model as opposed to 204 nJ for the 8-bit original quantized model. As these energy numbers are from high-level energy models, you should remember to consider the relations between them, and not the actual numbers."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Wy0zcqvQoBnb"
   },
   "source": [
    "Let's train this model to see how much accuracy we can get of it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 71353,
     "status": "ok",
     "timestamp": 1591840952535,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "_ipZSEfgoGdb",
    "outputId": "b184269d-1161-417a-e1ae-e852dc451561"
   },
   "outputs": [],
   "source": [
    "qmodel.load_weights(\"qmodel.h5\")\n",
    "with cur_strategy.scope():\n",
    "  optimizer = Adam(lr=0.02)\n",
    "  qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
    "  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Fr95jcPROz7p"
   },
   "source": [
    "One of problems of trying to quantize the whole thing in one shot is that we may end up with too many choices to make, which will make the entire search space very high. In order to reduce the search space, `AutoQKeras` has two methods to enable users to cope with the explosion of choices."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "9zc7ZrnbPIJA"
   },
   "source": [
    "## Grouping Layers to Use the Same Choice\n",
    "\n",
    "In this case, we can provide regular expressions to `limit` to specify layer names that should be grouped together. In our example, suppose we want to group  convolution layers (except the first one) and all activations except the last one to use the same quantization.\n",
    "\n",
    "For the first convolution layer, we want to limit the quantization types to fewer choices as the input is already an 8-bit number.  The last activation will be fed to a feature classifier layer, so we may leave it with more bits. Because our `dense` is actually a `Conv2D` operation, we will enable 8-bits for the weights by layer name. \n",
    "\n",
    "We first need to look at the names of the layers for this. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 428
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 301,
     "status": "ok",
     "timestamp": 1591840952867,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "w-d8nhG0pJF0",
    "outputId": "6529b630-f382-4e2a-94ef-ba3d9e3f875c"
   },
   "outputs": [],
   "source": [
    "pprint.pprint([layer.name for layer in model.layers])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "32Enp890pU_4"
   },
   "source": [
    "Convolution layers for `mnist` have names specified as `conv2d_[01234]`. Activation layers have names specified as `act_[01234]`. So, we can create the following regular expressions to reduce the search space in our model.\n",
    "\n",
    "Please note that layer class names always select different quantizers, so the user needs to specify a pattern for layer names if he/she wants to use the same quantization for the group of layers.\n",
    "\n",
    "You can see here another feature of the limit. You can specify the maximum number of bits, or cherry pick which quantizers you want to try for a specific layer if instead of the maximum number of bits you specify a list of quantizers fron `quantization_config`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Y5XItp95PHW6"
   },
   "outputs": [],
   "source": [
    "limit = {\n",
    "    \"Dense\": [8, 8, 4],\n",
    "    \"Conv2D\": [4, 8, 4],\n",
    "    \"DepthwiseConv2D\": [4, 8, 4],\n",
    "    \"Activation\": [4],\n",
    "    \"BatchNormalization\": [],\n",
    "\n",
    "    \"^conv2d_0$\": [\n",
    "                   [\"binary\", \"ternary\", \"quantized_bits(2,1,1,alpha=1.0)\"],\n",
    "                   8, 4\n",
    "    ],\n",
    "    \"^conv2d_[1234]$\": [4, 8, 4],\n",
    "    \"^act_[0123]$\": [4],\n",
    "    \"^act_4$\": [8],\n",
    "    \"^dense$\": [8, 8, 4]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "EJs1L-jIie7w"
   },
   "outputs": [],
   "source": [
    "run_config = {\n",
    "  \"output_dir\": tempfile.mkdtemp(),\n",
    "  \"goal\": goal,\n",
    "  \"quantization_config\": quantization_config,\n",
    "  \"learning_rate_optimizer\": False,\n",
    "  \"transfer_weights\": False,\n",
    "  \"mode\": \"random\",\n",
    "  \"seed\": 42,\n",
    "  \"limit\": limit,\n",
    "  \"tune_filters\": \"layer\",\n",
    "  \"tune_filters_exceptions\": \"^dense\",\n",
    "  \"distribution_strategy\": cur_strategy,\n",
    "  \"layer_indexes\": range(1, len(model.layers) - 1),\n",
    "  \"max_trials\": 40\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 993665,
     "status": "ok",
     "timestamp": 1591841947161,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "c7eSwXyijhzc",
    "outputId": "6c76a21f-cbb3-4bc5-b899-b02c28821b78"
   },
   "outputs": [],
   "source": [
    "autoqk = AutoQKeras(model, metrics=[\"acc\"], custom_objects=custom_objects, **run_config)\n",
    "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "7sYp8Z2pnLi1"
   },
   "source": [
    "Let's see the reduction now."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 7109,
     "status": "ok",
     "timestamp": 1591841954308,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "yj826gNhjsfK",
    "outputId": "2e7f17d7-794e-44f6-d23a-452759727a53"
   },
   "outputs": [],
   "source": [
    "qmodel = autoqk.get_best_model()\n",
    "qmodel.save_weights(\"qmodel.h5\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "eXMcqxLAnY8t"
   },
   "source": [
    "Let's train this model for more time to see how much we can get in accuracy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 68145,
     "status": "ok",
     "timestamp": 1591842022471,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "qpT8QgkJnQPa",
    "outputId": "61e711db-6187-4047-dae8-9ce2d093f56c"
   },
   "outputs": [],
   "source": [
    "qmodel.load_weights(\"qmodel.h5\")\n",
    "with cur_strategy.scope():\n",
    "  optimizer = Adam(lr=0.02)\n",
    "  qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
    "  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "gAV6Kw0QoODq"
   },
   "source": [
    "## Quantization by Blocks\n",
    "\n",
    "In the previous section, we enforced that all decisions were the same in order to reduce the number of options to quantize a model. \n",
    "\n",
    "Another approach is still to allow models to have each block of layers to makde their own choice, but quantizing the blocks sequentially, either from inputs to outputs, or by quantizing higher energy blocks first.\n",
    "\n",
    "The rationale for this method is that if we quantize the blocks one by one, and assuming that each block has $N$ choices, and $B$ blocks, we end up trying $N B$ options, instead of $N^B$ choices.  The reader should note that this is an approximation as there is no guarantee that we will obtain the best quantization possible.\n",
    "\n",
    "Should you do sequential from inputs to outputs or starting from the block that has the highest impact?\n",
    "\n",
    "If you have a network like ResNet, and if you want to do filter tuning, you need to block the layers by the resnet definition of a block, i.e. including full identity or convolutional blocks, and quantize the model from inputs to outputs, so that you can preserve at each stage the number of channels for the residual block. \n",
    "\n",
    "In order to perform quantization by blocks, you need to specify two other parameters in our `run_config`. `blocks` is a list of regular expressions of the groups you want to quantize. If a layer does not match the block pattern, it will not be quantized.  `schedule_block` specifies the mode for block quantization scheduling. It can be `sequential` or `cost` if you want to schedule first the blocks by decreasing cost size (energy or bits).\n",
    "\n",
    "In this model, there are a few optimizations that we perform automatically. First, we dynamically reduce the learning rate of the blocks that we have already quantized as setting them to not-trainable does not seem to work, so we still allow them to train, but at a slower pace. In addition, we try to dynamically adjust the learning rate for the layer we are trying to quantize as opposed to the learning rate of the unquantized layers. Finally, we transfer the weights of the models we have already quantized whenever we can do (if the shapes remain the same). \n",
    "\n",
    "Regardless on how we schedule the operations, we amortize the nubmer of trials for the cost of the block (energy or bits with respect to the total energy or number of bits of the network).\n",
    "\n",
    "Instead of invoking `AutoQKeras` now, we will invoke `AutoQKeras` scheduler."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "NUz4A6SKnhUf"
   },
   "outputs": [],
   "source": [
    "run_config = {\n",
    "  \"output_dir\": tempfile.mkdtemp(),\n",
    "  \"goal\": goal,\n",
    "  \"quantization_config\": quantization_config,\n",
    "  \"learning_rate_optimizer\": False,\n",
    "  \"transfer_weights\": False,\n",
    "  \"mode\": \"random\",\n",
    "  \"seed\": 42,\n",
    "  \"limit\": limit,\n",
    "  \"tune_filters\": \"layer\",\n",
    "  \"tune_filters_exceptions\": \"^dense\",\n",
    "  \"distribution_strategy\": cur_strategy,\n",
    "  \"layer_indexes\": range(1, len(model.layers) - 1),\n",
    "  \"max_trials\": 40,\n",
    "\n",
    "  \"blocks\": [\n",
    "    \"^.*_0$\",\n",
    "    \"^.*_1$\",\n",
    "    \"^.*_2$\",\n",
    "    \"^.*_3$\",\n",
    "    \"^.*_4$\",\n",
    "    \"^dense\"\n",
    "  ],\n",
    "  \"schedule_block\": \"cost\"\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "JWJiZZ9vsORJ"
   },
   "source": [
    "Because specifying regular expressions is error prone, we recommend that you first try to run `AutoQKerasScheduler` in debug mode to print the blocks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 737
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 395,
     "status": "ok",
     "timestamp": 1591842023212,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "uSOxKQGwsqf2",
    "outputId": "18647e4f-ef7a-4c6a-aeb8-0c9c2039fdbb"
   },
   "outputs": [],
   "source": [
    "pprint.pprint([layer.name for layer in model.layers])\n",
    "autoqk = AutoQKerasScheduler(model, metrics=[\"acc\"], custom_objects=custom_objects, debug=True, **run_config)\n",
    "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "TQPUKPZhC_SI"
   },
   "source": [
    "All blocks seem to be fine. Let's find the best quantization now."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1938883,
     "status": "ok",
     "timestamp": 1591843962106,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "sXt-cRKvDEaL",
    "outputId": "36db3217-86ff-4425-ee12-f637a4fc1841"
   },
   "outputs": [],
   "source": [
    "autoqk = AutoQKerasScheduler(model, metrics=[\"acc\"], custom_objects=custom_objects, **run_config)\n",
    "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 291
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 396,
     "status": "ok",
     "timestamp": 1591843962540,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "ArdGbsXFDK-I",
    "outputId": "43730cd5-93fc-4838-c49a-1f3f4151fa54"
   },
   "outputs": [],
   "source": [
    "qmodel = autoqk.get_best_model()\n",
    "qmodel.save_weights(\"qmodel.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 69779,
     "status": "ok",
     "timestamp": 1591844032332,
     "user": {
      "displayName": "Claudionor Coelho",
      "photoUrl": "",
      "userId": "01084525977535968041"
     },
     "user_tz": 420
    },
    "id": "RHGb6YHFEgtV",
    "outputId": "5578ce49-1ee9-4063-deab-1b3db9f4b66b"
   },
   "outputs": [],
   "source": [
    "qmodel.load_weights(\"qmodel.h5\")\n",
    "with cur_strategy.scope():\n",
    "  optimizer = Adam(lr=0.02)\n",
    "  qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
    "  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "fJCkMdAcjnoh"
   },
   "source": [
    "Perfect! You have learned how to perform automatic quantization using AutoQKeras with QKeras."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "last_runtime": {
    "build_target": "//learning/deepmind/dm_python:dm_notebook3_tpu",
    "kind": "private"
   },
   "name": "AutoQKeras.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: notebook/CodebookQuantization.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##### Copyright 2020 Google LLC\n",
    "#\n",
    "#\n",
    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "# https://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Codebook based quantization\n",
    "\n",
    "Codebook based quantizaion is a non-uniform quantization technique that maps each weight or activation value to the index of a value in the codebook. This allows us to compress weights/activations even further with neglibible loss in performance. We will demonstrate this by training an object classification model and applying codebook quantization to the activation with the most values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.regularizers import *\n",
    "from tensorflow.keras.layers import *\n",
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.optimizers import *\n",
    "from tensorflow.keras.datasets import *\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "\n",
    "from qkeras import *\n",
    "from qkeras.codebook import *\n",
    "\n",
    "\n",
    "def get_data(name, sample_size=1.0):\n",
    "  (x_train, y_train), (x_test, y_test) = globals()[name].load_data()\n",
    "\n",
    "  if len(x_train.shape) == 3:\n",
    "    x_train = x_train.reshape(x_train.shape + (1,))\n",
    "    x_test = x_test.reshape(x_test.shape + (1,))\n",
    "\n",
    "  x_train = x_train.astype(\"float32\")\n",
    "  x_test = x_test.astype(\"float32\")\n",
    "\n",
    "  mean = np.mean(x_train,axis=(0,1,2,3))\n",
    "  std = np.std(x_train,axis=(0,1,2,3))\n",
    "  x_train = (x_train-mean)/(std+1e-7)\n",
    "  x_test = (x_test-mean)/(std+1e-7)\n",
    "\n",
    "  y_train_c = to_categorical(y_train, np.max(y_train) + 1)\n",
    "  y_test_c = to_categorical(y_test, np.max(y_test) + 1)\n",
    "\n",
    "  if sample_size != 1.0:\n",
    "    indexes = np.asarray(range(x_train.shape[0]))\n",
    "    np.random.shuffle(indexes)\n",
    "    indexes = indexes[:int(x_train.shape[0] * sample_size)]\n",
    "\n",
    "    x_train = x_train[indexes]\n",
    "    y_train_c = y_train_c[indexes]\n",
    "\n",
    "  return (x_train, y_train_c), (x_test, y_test_c)\n",
    "\n",
    "\n",
    "def get_model(\n",
    "  name, X_train, y_train, X_test, y_test,\n",
    "  blocks=[[32], [64], [128]],\n",
    "  quantizer_list=[\n",
    "      \"quantized_relu_po2(4,4)\",\n",
    "      \"quantized_relu_po2(4,4)\"\n",
    "  ],\n",
    "  use_stochastic_rounding=0,\n",
    "  l1v=None,\n",
    "  epochs=10,\n",
    "  load_weights=True):\n",
    "\n",
    "  if l1v is None:\n",
    "    l1v = [0.0] * len(blocks)\n",
    "\n",
    "  X_shape = X_train.shape[1:]\n",
    "  x_i = x = Input(X_shape)\n",
    "\n",
    "  for b, block in enumerate(blocks):\n",
    "    # we are assuming we want to quantize the block that has sparsity\n",
    "    # so let's add dropout to the next layer\n",
    "\n",
    "    if b >= 1 and l1v[b-1] != 0.0:\n",
    "      x = Dropout(0.3, name=f\"drop{b}\")(x)\n",
    "\n",
    "    for i in range(len(block)):\n",
    "      x = QConv2D(\n",
    "          block[i], kernel_size=(3,3), strides=(2,2), padding=\"same\",\n",
    "          kernel_quantizer=f\"quantized_bits(4, use_stochastic_rounding={use_stochastic_rounding})\",\n",
    "          bias_quantizer=f\"quantized_po2(4, use_stochastic_rounding={use_stochastic_rounding})\",\n",
    "          kernel_regularizer=l1(l1v[b]) if l1v[b] != 0.0 else None,\n",
    "          name=f\"d{b}_{i}\")(x)\n",
    "      if i != len(block) - 1:\n",
    "        if quantizer_list[b] in [\"linear\", \"relu\", \"softmax\", \"sigmoid\"]:\n",
    "          x = Activation(quantizer_list[b], name=f\"a{b}_{i}\")(x)\n",
    "        else:\n",
    "          x = QActivation(quantizer_list[b], name=f\"a{b}_{i}\")(x)\n",
    "      else:\n",
    "        x = QBatchNormalization(name=f\"bn{b}_{i}\")(x)\n",
    "    if b < len(blocks) - 1:\n",
    "      if quantizer_list[b] in [\"linear\", \"relu\", \"softmax\", \"sigmoid\"]:\n",
    "        x = Activation(quantizer_list[b], name=f\"a{b}_{len(block)-1}\")(x)\n",
    "      else:\n",
    "        x = QActivation(quantizer_list[b], name=f\"a{b}_{len(block)-1}\")(x)\n",
    "    else:\n",
    "      if len(block) > 0:\n",
    "        x = QActivation(f\"quantized_relu(6,2, use_stochastic_rounding={use_stochastic_rounding})\", \n",
    "                        name=f\"a{b}_{len(block)-1}\")(x)\n",
    "      x = Flatten(name=\"flatten\")(x)\n",
    "      x = QDense(\n",
    "          y_train.shape[1], name=f\"d{len(blocks)-1}_{len(block)}\")(x)\n",
    "      x = Activation(\"softmax\", name=f\"a{len(blocks)-1}_{len(block)}\")(x)\n",
    "\n",
    "  model = Model(inputs=x_i, outputs=x)\n",
    "  model.summary()\n",
    "\n",
    "  model.compile(loss=\"categorical_crossentropy\", optimizer=Adam(0.001), metrics=[\"acc\"])\n",
    "\n",
    "  try:\n",
    "    if load_weights and os.path.isfile(name + \".h5\"):\n",
    "      print('Found file...')\n",
    "      model.load_weights(name + \".h5\")\n",
    "    else:\n",
    "      model.fit(X_train, y_train, validation_data=(X_test, y_test),\n",
    "                batch_size=128, epochs=epochs, verbose=2)\n",
    "      model.save_weights(name + \".h5\")\n",
    "  except:\n",
    "    model.fit(X_train, y_train, validation_data=(X_test, y_test),\n",
    "              batch_size=128, epochs=epochs, verbose=2)\n",
    "    model.save_weights(name + \".h5\")\n",
    "\n",
    "  return model\n",
    "\n",
    "\n",
    "name = \"cifar10\"\n",
    "(X_train, y_train), (X_test, y_test) = get_data(name, sample_size=1)\n",
    "model = get_model(\n",
    "  name, X_train, y_train, X_test, y_test,\n",
    "  blocks=[[32, 32], [64, 64], [128]],\n",
    "  quantizer_list=[\"quantized_relu(6,2)\", \"quantized_relu(6,2)\"],\n",
    "  epochs=50,\n",
    "  load_weights=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from qkeras.codebook import *\n",
    "\n",
    "cb_tables, models, km_models = activation_compression(\n",
    "  model, \n",
    "  {'loss' : \"categorical_crossentropy\", 'metrics' : [\"acc\"]},\n",
    "  [2], 3, \n",
    "  X_train, y_train, \n",
    "  X_test, y_test,\n",
    "  sample_size=0.3\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "q = models[0].layers[-1].quantizer\n",
    "in_table, out_table = cb_tables[0]\n",
    "print(q)\n",
    "print('in_table:', in_table)\n",
    "print('out_table:', out_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "for i,x in enumerate(q.range()):\n",
    "  print(f'{x:8}, {in_table[out_table[i]]:6}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Weight compression using codebook quantization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conv_weights = model.layers[1].weights[0].numpy()\n",
    "print(conv_weights.shape)\n",
    "quantizer = model.layers[1].kernel_quantizer_internal\n",
    "print(quantizer)\n",
    "axis = 3\n",
    "bits = 3\n",
    "index_table, codebook_table = weight_compression(\n",
    "  conv_weights, \n",
    "  bits, \n",
    "  axis, \n",
    "  quantizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(codebook_table.shape)\n",
    "codebook_table[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(index_table.shape)\n",
    "index_table[:,:,:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_conv_weights = np.zeros(conv_weights.shape)\n",
    "for i in range(conv_weights.shape[axis]):\n",
    "  new_conv_weights[:,:,:,i] = codebook_table[i][index_table[:,:,:,i]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_conv_weights[:,:,:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conv_weights[:,:,:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bias = model.layers[1].weights[1].numpy()\n",
    "model.layers[1].set_weights([new_conv_weights, bias])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.evaluate(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py3",
   "language": "python",
   "name": "py3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: notebook/QKerasTutorial.ipynb
================================================
{
 "cells": [
   {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##### Copyright 2020 Google LLC\n",
    "#\n",
    "#\n",
    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "# https://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# QKeras Lab Book"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__QKeras__ is a quantization extension to Keras that provides drop-in replacement for some of the Keras layers, especially the ones that creates parameters and activation layers, and perform arithmetic operations, so that we can quickly create a deep quantized version of Keras network.\n",
    "\n",
    "According to Tensorflow documentation, Keras is a high-level API to build and train deep learning models. It's used for fast prototyping, advanced research, and production, with three key advantages:\n",
    "\n",
    "- User friendly<br>\n",
    "Keras has a simple, consistent interface optimized for common use cases. It provides clear and actionable feedback for user errors.\n",
    "\n",
    "- Modular and composable<br>\n",
    "Keras models are made by connecting configurable building blocks together, with few restrictions.\n",
    "\n",
    "- Easy to extend<br>\n",
    "Write custom building blocks to express new ideas for research. Create new layers, loss functions, and develop state-of-the-art models.\n",
    "\n",
    "__QKeras__ is being designed to extend the functionality of Keras using Keras' design principle, i.e. being user friendly, modular and extensible, adding to it being \"minimally intrusive\" of Keras native functionality.\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Related Work\n",
    "\n",
    "__QKeras__ has been implemented based on the work of _\"B.Moons et al. - Minimum Energy Quantized Neural Networks\"_ , Asilomar Conference on Signals, Systems and Computers, 2017 and _“Zhou, S. et al. DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients,”_ but the framework should be easily extensible. The original code from QNN can be found below.\n",
    "\n",
    "https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\n",
    "\n",
    "__QKeras__ extends QNN by providing a richer set of layers (including SeparableConv2D, DepthwiseConv2D, ternary and stochastic ternary quantizations), besides some functions to aid the estimation for the accumulators and conversion between non-quantized to quantized networks. Finally, our main goal is easy of use, so we attempt to make QKeras layers a true drop-in replacement for Keras, so that users can easily exchange non-quantized layers by quantized ones."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Layers Implemented in QKeras\n",
    "\n",
    "The following layers have been implemented in __QKeras__.\n",
    "\n",
    "- __`QDense`__\n",
    "\n",
    "- __`QConv1D`__\n",
    "\n",
    "- __`QConv2D`__\n",
    "\n",
    "- __`QDepthwiseConv2D`__\n",
    "\n",
    "- __`QSeparableConv2D`__ (depthwise + pointwise expanded, extended from MobileNet SeparableConv2D implementation)\n",
    "\n",
    "- __`QActivation`__\n",
    "\n",
    "- __`QAveragePooling2D`__ (in fact, a AveragePooling2D stacked with a QActivation layer for quantization of the result, so this layer does not exist)\n",
    "\n",
    "- __`QBatchNormalization`__\n",
    "\n",
    "- __`QOctaveConv2D`__\n",
    "\n",
    "It is worth noting that not all functionality is safe at this time to be used with other high-level operations, such as with layer wrappers. For example, `Bidirectional` layer wrappers are used with RNNs.  If this is required, we encourage users to use quantization functions invoked as strings instead of the actual functions as a way through this, but we may change that implementation in the future.\n",
    "\n",
    "__`QSeparableConv2D`__ is implemented as a depthwise + pointwise quantized expansions, which is extended from the `SeparableConv2D` implementation of MobileNet. With the exception of __`QBatchNormalization`__, if quantizers are not specified, no quantization is applied to the layer and it ends up behaving like the orgininal unquantized layers. On the other hand, __`QBatchNormalization`__ has been implemented differently as if the user does not specify any quantizers as parameters, it uses a set up that has worked best when attempting to implement quantization efficiently in hardware and software, i.e. `gamma` and `variance` with po2 quantizers (as they become shift registers in an implementation, and with further constraining variance po2 quantizer to use quadratic approximation as we take the square root of the variance to obtain the standard deviation), `beta` using po2 quantizer to maintain the dynamic range aspect of the center parameter, and `mean` remaining unquantized, as it inherits the properties of the previous layer.\n",
    "\n",
    "Activation has been migrated to __`QActivation`__ although it __QKeras__ also recognizes activation parameter used in convolutional and dense layers.\n",
    "\n",
    "We have improved the setup of quantization as convolution, dense and batch normalization layers now notify the quantizers when the quantizers are used as internal parameters, so the user does not need to worry about setting up options that work best in `weights` and `bias` like `alpha` and `use_stochastic_rounding` (although users may override the automatic setup).\n",
    "\n",
    "Finally, in the current version, we have eliminated the need to set up the range of the quantizers like `kernel_range` in __`QDense`__. This is automatically computed internally at this point. Although we kept the parameters for backward compatibility, these parameters will be removed in the future."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Activation Layers and Quantizers Implemented in __QKeras__\n",
    "\n",
    "Quantizers and activation layers are treated interchangingly in __QKeras__.   \n",
    "\n",
    "The list of quantizers and its parameters is listed below.\n",
    "\n",
    "- __`smooth_sigmoid(x)`__\n",
    "\n",
    "- __`hard_sigmoid(x)`__\n",
    "\n",
    "- __`binary_sigmoid(x)`__\n",
    "\n",
    "- __`smooth_tanh(x)`__\n",
    "\n",
    "- __`hard_tanh(x)`__\n",
    "\n",
    "- __`binary_tanh(x)`__\n",
    "\n",
    "- __`quantized_bits(bits=8, integer=0, symmetric=0, keep_negative=1, alpha=None, use_stochastic_rouding=False)(x)`__\n",
    "\n",
    "- __`bernoulli(alpha=None, temperature=6.0, use_real_sigmoid=True)(x)`__\n",
    "\n",
    "- __`stochastic_ternary(alpha=None, threshold=None, temperature=8.0, use_real_sigmoid=True)(x)`__\n",
    "\n",
    "- __`ternary(alpha=None, threshold=None, use_stochastic_rounding=False)(x)`__\n",
    "\n",
    "- __`stochastic_binary(alpha=None, temperature=6.0, use_real_sigmoid=True)(x)`__\n",
    "\n",
    "- __`binary(use_01=False, alpha=None, use_stochastic_rounding=False)(x)`__\n",
    "\n",
    "- __`quantized_relu(bits=8, integer=0, use_sigmoid=0, use_stochastic_rounding=False)(x)`__\n",
    "\n",
    "- __`quantized_ulaw(bits=8, integer=0, symmetric=0, u=255.0)(x)`__\n",
    "\n",
    "- __`quantized_tanh(bits=8, integer=0, symmetric=0, use_stochastic_rounding=False)(x)`__\n",
    "\n",
    "- __`quantized_po2(bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False)(x)`__\n",
    "\n",
    "- __`quantized_relu_po2(bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False)(x)`__\n",
    "\n",
    "The __`stochastic_*`__ functions and __`bernoulli`__ rely on stochastic versions of the activation functions, so they are best suited for weights and biases.  They draw a random number with uniform distribution from `sigmoid` of the input x, and result is based on the expected value of the activation function. Please refer to the papers if you want to understand the underlying theory, or the documentation in qkeras/quantizers.py. The parameter `temperature` determines how steep the sigmoid function will behave, and the default values seem to work fine.\n",
    "\n",
    "As we lower the number of bits, rounding becomes problematic as it adds bias to the number system. Numpy attempt to reduce the effects of bias by rounding to even instead of rounding to infinity. Recent results (_\"Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, Pritish Narayanan; Deep Learning with Limited Numerical Precision_ [https://arxiv.org/abs/1502.02551]) suggested using stochastic rounding, which uses the fracional part of the number as a probability to round up or down. We can turn on stochastic rounding in some quantizers by setting `use_stochastic_rounding` to `True` in __`quantized_bits`__, __`binary`__, __`ternary`__, __`quantized_relu`__ and __`quantized_tanh`__, __`quantized_po2`__, and __`quantized_relu_po2`__. Please note that if one is considering an efficient hardware or software implementation, we should avoid setting this flag to `True` in activations as it may affect the efficiency of an implementation. In addition, as mentioned before, we already set this flag to `True` in some quantized layers when the quantizers are used as weights/biases.\n",
    "\n",
    "The parameters `bits` specify the number of bits for the quantization, and `integer` specifies how many bits of `bits` are to the left of the decimal point. Finally, our experience in training networks with __`QSeparableConv2D`__, it is advisable to allocate more bits between the depthwise and the pointwise quantization, and both __`quantized_bits`__ and __`quantized_tanh`__ should use symmetric versions for weights and bias in order to properly converge and eliminate the bias.\n",
    "\n",
    "We have substantially improved stochastic rounding implementation in __QKeras__ $>= 0.7$, and added a symbolic way to compute alpha in __`binary`__, __`stochastic_binary`__, __`ternary`__, __`stochastic_ternary`__, __`bernoulli`__ and __`quantized_bits`__. Right now, a scale and the threshold (for ternary and stochastic_ternary) can be computed independently of the distribution of the inputs, which is required when using these quantizers in weights.\n",
    "\n",
    "The main problem in using very small bit widths in large deep learning networks stem from the fact that weights are initialized with variance roughly $\\propto \\sqrt{1/\\tt{fanin}}$, but during the training the variance shifts outwards.  If the smallest quantization representation (threshold in ternary networks) is smaller than $\\sqrt{1/\\tt{fanin}}$, we run the risk of having the weights stuck at 0 during training. So, the weights need to dynamically adjust to the variance shift from initialization to the final training.  This can be done by scaling the quantization. \n",
    "\n",
    "Scale is computed using the formula $\\sum(\\tt{dot}(Q,x))/\\sum(\\tt{dot}(Q,Q))$ which is described in several papers, including _Mohammad Rastegari, Vicente Ordonez, Joseph Redmon, Ali Farhadi \"XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks\"_ [https://arxiv.org/abs/1603.05279]. Scale computation is computed for each output channel, making our implementation sometimes behaving like a mini-batch normalization adjustment.  \n",
    "\n",
    "For __`ternary`__ and __`stochastic_ternary`__, we iterate between scale computation and threshold computation, as presented in _K. Hwang and W. Sung, \"Fixed-point feedforward deep neural network design using weights +1, 0, and −1,\" 2014 IEEE Workshop on Signal Processing Systems (SiPS), Belfast, 2014, pp. 1-6_ which makes the search for threshold and scale tolerant to different input distributions. This is especially important when we need to consider that the threshold shifts depending on the input distribution,  affecting the scale as well, as pointed out by _Fengfu Li, Bo Zhang, Bin Liu, \"Ternary Weight Networks\"_ [https://arxiv.org/abs/1605.04711]. \n",
    "\n",
    "When computing the scale in these quantizers, if `alpha=\"auto\"`, we compute the scale as a floating point number. If `alpha=\"auto_po2\"`, we enforce the scale to be a power of 2, meaning that an actual hardware or software implementation can be performed by just shifting the result of the convolution or dense layer to the right or left by checking the sign of the scale (positive shifts left, negative shifts right), and taking the log2 of the scale.  This behavior is compatible with shared exponent approaches, as it performs a shift adjustment to the channel.\n",
    "\n",
    "We have implemented a method for each quantizer called __`_set_trainable_parameter`__ that instructs __QKeras__ to set best options when this quantizer is used as a weight or for gamma, variance and beta in __`QBatchNormalization`__, so in principle, users should not worry about this.\n",
    "\n",
    "The following pictures show the behavior of __`binary`__ vs stochastic rounding in __`binary`__ vs __`stochastic_binary`__ (Figure 1) and __`ternary`__ vs stochastic rounding in __`ternary`__ and __`stochastic_ternary`__ (Figure 2). We generated a normally distributed input with mean 0.0 and standard deviation of 0.02, ordered the data, and ran the quantizer 1,000 times, averaging the result for each case. Note that because of scale, the output does not range from $[-1.0, +1.0]$, but from $[-\\tt{scale}, +\\tt{scale}]$.\n",
    "\n",
    "\n",
    "<img src=\"images/figure1.png\" alt=\"Binary quantizers\" title=\"Figure 1: Behavior of binary quantizers\" style=\"width:60%;height:60%;\"/><center>Figure 1: Behavior of binary quantizers</center>\n",
    "\n",
    "<img src=\"images/figure2.png\" alt=\"Ternary quantizers\" title=\"Figure 2: Behavior of ternary quantizers\" style=\"width:60%;height:60%;\"/><center>Figure 2: Behavior of ternary quantizers</center>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using QKeras\n",
    "\n",
    "__QKeras__ works by tagging all variables and weights/bias created by Keras as well as output of arithmetic layers by quantized functions. Quantized functions can be instantiated directly in __`QDense`__/__`QConv2D`__/__`QSeparableConv2D`__ functions, and they can be passed to __`QActivation`__, which act as a merged quantization and activation function.\n",
    "\n",
    "In order to successfully quantize a model, users need to replace layers that create variables (trainable or not) (`Dense`, `Conv2D`, etc) by their equivalent ones in __Qkeras__ (__`QDense`__, __`QConv2D`__, etc), and any layers that perform math operations need to be quantized afterwards.\n",
    "\n",
    "Quantized values are clipped between their maximum and minimum quantized representation (which may be different than $[-1.0, 1.0]$), although for `po2` type of quantizers, we still recommend the users to specify the parameter for `max_value`.\n",
    "\n",
    "An example of a very simple network is given below in Keras."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import six\n",
    "import numpy as np\n",
    "import tensorflow.compat.v2 as tf\n",
    "\n",
    "from tensorflow.keras.layers import *\n",
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.datasets import mnist\n",
    "from tensorflow.keras.utils import to_categorical"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def CreateModel(shape, nb_classes):\n",
    "    x = x_in = Input(shape)\n",
    "    x = Conv2D(18, (3, 3), name=\"conv2d_1\")(x)\n",
    "    x = Activation(\"relu\", name=\"act_1\")(x)\n",
    "    x = Conv2D(32, (3, 3), name=\"conv2d_2\")(x)\n",
    "    x = Activation(\"relu\", name=\"act_2\")(x)\n",
    "    x = Flatten(name=\"flatten\")(x)\n",
    "    x = Dense(nb_classes, name=\"dense\")(x)\n",
    "    x = Activation(\"softmax\", name=\"softmax\")(x)\n",
    "    \n",
    "    model = Model(inputs=x_in, outputs=x)\n",
    "\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data():\n",
    "    (x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "    x_train = x_train.reshape(x_train.shape + (1,)).astype(\"float32\")\n",
    "    x_test = x_test.reshape(x_test.shape + (1,)).astype(\"float32\")\n",
    "\n",
    "    x_train /= 256.0\n",
    "    x_test /= 256.0\n",
    "\n",
    "    x_mean = np.mean(x_train, axis=0)\n",
    "\n",
    "    x_train -= x_mean\n",
    "    x_test -= x_mean\n",
    "\n",
    "    nb_classes = np.max(y_train)+1\n",
    "    y_train = to_categorical(y_train, nb_classes)\n",
    "    y_test = to_categorical(y_test, nb_classes)\n",
    "\n",
    "    return (x_train, y_train), (x_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_test, y_test) = get_data()\n",
    "\n",
    "model = CreateModel(x_train.shape[1:], y_train.shape[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(x_train, y_train, epochs=3, batch_size=128, validation_data=(x_test, y_test), verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Great! it is relatively easy to create a network that converges in MNIST with very high test accuracy. The reader should note that we named all the layers as it will make it easier to automatically convert the network by name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The corresponding quantized network is presented below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from qkeras import *\n",
    "\n",
    "def CreateQModel(shape, nb_classes):\n",
    "    x = x_in = Input(shape)\n",
    "    x = QConv2D(18, (3, 3),\n",
    "        kernel_quantizer=\"stochastic_ternary\", \n",
    "        bias_quantizer=\"quantized_po2(4)\",\n",
    "        name=\"conv2d_1\")(x)\n",
    "    x = QActivation(\"quantized_relu(2)\", name=\"act_1\")(x)\n",
    "    x = QConv2D(32, (3, 3), \n",
    "        kernel_quantizer=\"stochastic_ternary\", \n",
    "        bias_quantizer=\"quantized_po2(4)\",\n",
    "        name=\"conv2d_2\")(x)\n",
    "    x = QActivation(\"quantized_relu(2)\", name=\"act_2\")(x)\n",
    "    x = Flatten(name=\"flatten\")(x)\n",
    "    x = QDense(nb_classes,\n",
    "        kernel_quantizer=\"quantized_bits(3,0,1)\",\n",
    "        bias_quantizer=\"quantized_bits(3)\",\n",
    "        name=\"dense\")(x)\n",
    "    x = Activation(\"softmax\", name=\"softmax\")(x)\n",
    "    \n",
    "    model = Model(inputs=x_in, outputs=x)\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "qmodel = CreateQModel(x_train.shape[1:], y_train.shape[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.optimizers import Adam\n",
    "\n",
    "qmodel.compile(\n",
    "    loss=\"categorical_crossentropy\",\n",
    "    optimizer=Adam(0.0005),\n",
    "    metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "qmodel.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test), verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You should note that we had to lower the learning rate and train the network for longer time. On the other hand, the network should not involve in any multiplications in the convolution layers, and very small multipliers in the dense layers.\n",
    "\n",
    "Please note that the last `Activation` was not changed to __`QActivation`__ as during inference we usually perform the operation `argmax` on the result instead of `softmax`.\n",
    "\n",
    "It seems it is a lot of code to write besides the main network, but in fact, this additional code is only specifying the sizes of the weights and the sizes of the outputs in the case of the activations.  Right now, we do not have a way to extract this information from the network structure or problem we are trying to solve, and if we quantize too much a layer, we may end up not been able to recover from that later on."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Converting a Model Automatically\n",
    "\n",
    "In addition to the drop-in replacement of Keras functions, we have written the following function to assist anyone who wants to quantize a network.\n",
    "\n",
    "__`model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False)`__\n",
    "\n",
    "This function converts an non-quantized model (such as the one from `model` in the previous example) into a quantized version, by applying a configuration specified by the dictionary `quantizer_config`, and `activation_bits` specified for unamed activation functions, with this parameter probably being removed in future versions.\n",
    "\n",
    "The parameter `custom_objects` specifies object dictionary unknown to Keras, required when you copy a model with lambda layers, or customized layer functions, for example, and if `transfer_weights` is `True`, the returned model will have as initial weights the weights from the original model, instead of using random initial weights.\n",
    "\n",
    "The dictionary specified in `quantizer_config` can be indexed by a layer name or layer class name. In the example below, conv2d_1 corresponds to the first convolutional layer of the example, while  QConv2D corresponds to the default behavior of two dimensional convolutional layers. The reader should note that right now we recommend using __`QActivation`__ with a dictionary to avoid the conversion of activations such as `softmax` and `linear`.  In addition, although we could use `activation` field in the layers, we do not recommend that. \n",
    "\n",
    "`{\n",
    "  \"conv2d_1\": {\n",
    "      \"kernel_quantizer\": \"stochastic_ternary\",\n",
    "      \"bias_quantizer\": \"quantized_po2(4)\"\n",
    "  },\n",
    "  \"QConv2D\": {\n",
    "      \"kernel_quantizer\": \"stochastic_ternary\",\n",
    "      \"bias_quantizer\": \"quantized_po2(4)\"\n",
    "  },\n",
    "  \"QDense\": {\n",
    "      \"kernel_quantizer\": \"quantized_bits(3,0,1)\",\n",
    "      \"bias_quantizer\": \"quantized_bits(3)\"\n",
    "  },\n",
    "  \"act_1\": \"quantized_relu(2)\",\n",
    "  \"QActivation\": { \"relu\": \"quantized_relu(2)\" }\n",
    "}`\n",
    "\n",
    "In the following example, we will quantize the model using a different strategy.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = {\n",
    "  \"conv2d_1\": {\n",
    "      \"kernel_quantizer\": \"stochastic_binary\",\n",
    "      \"bias_quantizer\": \"quantized_po2(4)\"\n",
    "  },\n",
    "  \"QConv2D\": {\n",
    "      \"kernel_quantizer\": \"stochastic_ternary\",\n",
    "      \"bias_quantizer\": \"quantized_po2(4)\"\n",
    "  },\n",
    "  \"QDense\": {\n",
    "      \"kernel_quantizer\": \"quantized_bits(4,0,1)\",\n",
    "      \"bias_quantizer\": \"quantized_bits(4)\"\n",
    "  },\n",
    "  \"QActivation\": { \"relu\": \"binary\" },\n",
    "  \"act_2\": \"quantized_relu(3)\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "from qkeras.utils import model_quantize\n",
    "\n",
    "qmodel = model_quantize(model, config, 4, transfer_weights=True)\n",
    "\n",
    "for layer in qmodel.layers:\n",
    "    if hasattr(layer, \"kernel_quantizer\"):\n",
    "        print(layer.name, \"kernel:\", str(layer.kernel_quantizer_internal), \"bias:\", str(layer.bias_quantizer_internal))\n",
    "    elif hasattr(layer, \"quantizer\"):\n",
    "        print(layer.name, \"quantizer:\", str(layer.quantizer))\n",
    "\n",
    "print()\n",
    "qmodel.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "qmodel.compile(\n",
    "    loss=\"categorical_crossentropy\",\n",
    "    optimizer=Adam(0.001),\n",
    "    metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "qmodel.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test), verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "in addition to __`model_quantize`__, __QKeras__ offers the additional utility functions.\n",
    "\n",
    "__`BinaryToThermometer(x, classes, value_range, with_residue=False, merge_with_channels, use_two_hot_encoding=False)`__\n",
    "\n",
    "This function converts a dense binary encoding of inputs to one-hot (with scales).\n",
    "\n",
    "Given input matrix `x` with values (for example) 0, 1, 2, 3, 4, 5, 6, 7, create a number of classes as follows:\n",
    "\n",
    "If classes=2, value_range=8, with_residue=0, a true one-hot representation is created, and the remaining bits are truncated, using one bit representation.\n",
    "\n",
    "`\n",
    "0 - [1,0] 1 - [1,0] 2 - [1,0] 3 - [1,0]\n",
    "4 - [0,1] 5 - [0,1] 6 - [0,1] 7 - [0,1]\n",
    "`\n",
    "\n",
    "If classes=2, value_range=8, with_residue=1, the residue is added to the one-hot class, and the class will use 2 bits (for the remainder) + 1 bit (for the one hot)\n",
    "\n",
    "`\n",
    "0 - [1,0] 1 - [1.25,0] 2 - [1.5,0] 3 - [1.75,0]\n",
    "4 - [0,1] 5 - [0,1.25] 6 - [0,1.5] 7 - [0,1.75]\n",
    "`\n",
    "\n",
    "The arguments of this functions are as follows:\n",
    "\n",
    "`\n",
    "x: the input vector we want to convert. typically its dimension will be\n",
    "      (B,H,W,C) for an image, or (B,T,C) or (B,C) for for a 1D signal, where\n",
    "      B=batch, H=height, W=width, C=channels or features, T=time for time\n",
    "      series.\n",
    "classes: the number of classes to (or log2(classes) bits) to use of the\n",
    "      values.\n",
    "value_range: max(x) - min(x) over all possible x values (e.g. for 8 bits,\n",
    "      we would use 256 here).\n",
    "with_residue: if true, we split the value range into two sets and add\n",
    "      the decimal fraction of the set to the one-hot representation for partial\n",
    "      thermometer representation.\n",
    "merge_with_channels: if True, we will not create a separate dimension\n",
    "      for the resulting matrix, but we will merge this dimension with\n",
    "      the last dimension.\n",
    "use_two_hot_encoding: if true, we will distribute the weight between\n",
    "      the current value and the next one to make sure the numbers will always\n",
    "      be < 1.\n",
    "`\n",
    "\n",
    "__`model_save_quantized_weights(model, filename)`__\n",
    "\n",
    "This function saves the quantized weights in the model or writes the quantized weights in the file `filename` for production, as the weights during training are maintained non-quantized because of training. Typically, you should call this function before productizing the final model.  The saved model is compatible with Keras for inference, so for power-of-2 quantization, we will not return `(sign, round(log2(weights)))`, but rather `(-1)**sign*2**(round(log2(weights)))`. We also return a dictionary containing the name of the layer and the quantized weights, and for power-of-2 quantizations, we will return `sign` and `round(log2(weights))` so that other tools can properly process that.\n",
    "\n",
    "__`load_qmodel(filepath, custom_objects=None, compile=True)`__\n",
    "\n",
    "Load quantized model from Keras's model.save() h5 file, where filepath is the path to the filename, custom_objects is an optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization, and compile instructs __QKeras__ to compile the model after reading it.  If an optimizer was found as part of the saved model, the model is already compiled. Otherwise, the model is uncompiled and a warning will be displayed. When compile is set to `False`, the compilation is omitted without any warning.\n",
    "\n",
    "__`print_model_sparsity(model)`__\n",
    "\n",
    "Prints sparsity for the pruned layers in the model.\n",
    "\n",
    "__`quantized_model_debug(model, X_test, plot=False)`__\n",
    "\n",
    "Debugs and plots model weights and activations. It is usually useful to print weights, biases and activations for inputs and outputs when debugging a model.  model contains the mixed quantized/unquantized layers for a model. We only print/plot activations and weights/biases for quantized models with the exception of Activation. X_test is the set of inputs we will use to compute activations, and we recommend that the user uses a subsample from the entire set he/she wants to debug. if plot is True, we also plot weights and activations (inputs/outputs) for each layer.\n",
    "\n",
    "__`extract_model_operations(model)`__\n",
    "\n",
    "As each operation depends on the quantization method for the weights/bias and on the quantization of the inputs, we estimate which operations are required for each layer of the quantized model.  For example, inputs of a __`QDense`__ layer are quantized using __`quantized_relu_po2`__ and weights are quantized using __`quantized_bits`__, the matrix multiplication can be implemented as a barrel shifter + accumulator without multiplication operations. Right now, we return for each layer one of the following operations: `mult`, `barrel`, `mux`, `adder`, `xor`, and the sizes of the operator.\n",
    "\n",
    "We are currently refactoring this function and it may be substantially changed in the future.\n",
    "\n",
    "__`print_qstats(model)`__\n",
    "\n",
    "Prints statistics of number of operations per operation type and layer so that user can see how big the model is. This function utilizes __`extract_model_operations`__.\n",
    "\n",
    "An example of the output is presented below.\n",
    "\n",
    "`Number of operations in model:\n",
    "    conv2d_0_m                    : 25088 (smult_4_8)\n",
    "    conv2d_1_m                    : 663552 (smult_4_4)\n",
    "    conv2d_2_m                    : 147456 (smult_4_4)\n",
    "    dense                         : 5760  (smult_4_4)\n",
    "\n",
    "Number of operation types in model:\n",
    "    smult_4_4                     : 816768\n",
    "    smult_4_8                     : 25088`\n",
    "\n",
    "In this example, smult_4_4 stands for 4x4 bit signed multiplication and smult_4_8 stands for 8x4 signed multiplication.\n",
    "\n",
    "We are currently refactoring this function and it may be substantially changed in the future.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the quantized network `qmodel`, let's print the statistics of the model and weights."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "print_qstats(qmodel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "from qkeras.utils import quantized_model_debug\n",
    "\n",
    "quantized_model_debug(qmodel, x_test, plot=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Where the values in `conv2d_1 -4.6218   4.0295 ( -1.0000   1.0000) ( -0.5000   0.5000) a(  0.125000   0.500000)` corresponde to min and max values of the output of the convolution layer, weight ranges (min and max), bias (min and max) and alpha (min and max)."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: notebook/QRNNTutorial.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##### Copyright 2020 Google LLC\n",
    "#\n",
    "#\n",
    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "# https://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "import tempfile\n",
    "\n",
    "import numpy as np\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.enable_v2_behavior()\n",
    "\n",
    "from tensorflow.keras.layers import Input, Dense, Embedding, SimpleRNN, GRU, LSTM, Bidirectional\n",
    "from tensorflow.keras.optimizers import *\n",
    "from tensorflow.keras.datasets import imdb\n",
    "from tensorflow.keras.preprocessing import sequence\n",
    "\n",
    "from qkeras.autoqkeras import *\n",
    "from qkeras import *\n",
    "\n",
    "print(\"using tensorflow\", tf.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "physical_devices = tf.config.list_physical_devices()\n",
    "for d in physical_devices:\n",
    "  print(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "  device_name = os.environ['COLAB_TPU_ADDR']\n",
    "  TPU_ADDRESS = 'grpc://' + device_name\n",
    "  print('Found TPU at: {}'.format(TPU_ADDRESS))\n",
    "  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)\n",
    "  tf.config.experimental_connect_to_cluster(resolver)\n",
    "  # This is the TPU initialization code that has to be at the beginning.\n",
    "  tf.tpu.experimental.initialize_tpu_system(resolver)\n",
    "  print(\"All devices: \", tf.config.list_logical_devices('TPU'))\n",
    "  strategy = tf.distribute.experimental.TPUStrategy(resolver)  \n",
    "except KeyError:\n",
    "  print('TPU not found')\n",
    "  strategy = tf.distribute.get_strategy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(12)\n",
    "tf.random.set_seed(12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "max_features = 10000\n",
    "# cut texts after this number of words\n",
    "# (among top max_features most common words)\n",
    "maxlen = 100\n",
    "BATCH_SIZE = 1000\n",
    "SHUFFLE_BUFFER_SIZE = 25000\n",
    "\n",
    "print('Loading data...')\n",
    "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)\n",
    "print(len(x_train), 'train sequences')\n",
    "print(len(x_test), 'test sequences')\n",
    "\n",
    "print('Pad sequences (samples x time)')\n",
    "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n",
    "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\n",
    "print('x_train shape:', x_train.shape)\n",
    "print('x_test shape:', x_test.shape)\n",
    "y_train = np.array(y_train)\n",
    "y_test = np.array(y_test)\n",
    "\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n",
    "\n",
    "train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(SHUFFLE_BUFFER_SIZE)\n",
    "test_dataset = test_dataset.batch(BATCH_SIZE)\n",
    "\n",
    "train_dataset, test_dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using QKeras\n",
    "\n",
    "__QKeras__ works by tagging all variables and weights/bias created by Keras as well as output of arithmetic layers by quantized functions. Quantized functions can be instantiated directly in __`QSimpleRNN`__/__`QLSTM`__/__`QGRU`__/__`QBidirectional`__/__`QDense`__/__`QConv2D`__/__`QSeparableConv2D`__ functions, and they can be passed to __`QActivation`__, which act as a merged quantization and activation function.\n",
    "\n",
    "In order to successfully quantize a model, users need to replace layers that create variables (trainable or not) (`LSTM`, `Conv2D`, etc) by their equivalent ones in __QKeras__ (__`QLSTM`__/__`QDense`__, etc), and any layers that perform math operations need to be quantized afterwards.\n",
    "\n",
    "Quantized values are clipped between their maximum and minimum quantized representation (which may be different than $[-1.0, 1.0]$), although for `po2` type of quantizers, we still recommend the users to specify the parameter for `max_value`.\n",
    "\n",
    "An example of a very simple recurrent network is given below in Keras."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "units = 64\n",
    "embedding_dim = 64\n",
    "loss = 'binary_crossentropy'\n",
    "\n",
    "def create_model(batch_size=None):\n",
    "  x = x_in = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32)\n",
    "  x = Embedding(input_dim=max_features, output_dim=embedding_dim)(x)\n",
    "  x = Activation('linear', name='embedding_act')(x)\n",
    "  x = Bidirectional(LSTM(units))(x)\n",
    "  x = Dense(1)(x)\n",
    "  x = Activation('sigmoid')(x)\n",
    "  model = tf.keras.Model(inputs=[x_in], outputs=[x])\n",
    "  return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "tf.keras.backend.clear_session()\n",
    "with strategy.scope():\n",
    "  model = create_model(BATCH_SIZE)\n",
    "  custom_objects = {}\n",
    "  model.compile(\n",
    "      optimizer=Adam(learning_rate=0.01),\n",
    "      loss=loss,\n",
    "      metrics=['acc'])\n",
    "\n",
    "model.summary()\n",
    "print('Train...')\n",
    "model.fit(\n",
    "    train_dataset,\n",
    "    epochs=10,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    validation_data=test_dataset,\n",
    "    verbose=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Replacing with quantized layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def create_qmodel(batch_size=None):\n",
    "  x = x_in = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32)\n",
    "  x = Embedding(input_dim=max_features, output_dim=embedding_dim)(x)\n",
    "  x = QActivation('binary', name='embedding_act')(x)\n",
    "  x = QLSTM(\n",
    "    units,\n",
    "    activation='quantized_tanh(4)',\n",
    "    recurrent_activation='quantized_relu(4,0,1)',\n",
    "    kernel_quantizer='stochastic_ternary(\"auto\")',\n",
    "    recurrent_quantizer='quantized_bits(2,1,1,alpha=1.0)',\n",
    "    bias_quantizer='quantized_bits(4,0,1)')(x)\n",
    "  x = QDense(\n",
    "    1, \n",
    "    kernel_quantizer=\"quantized_bits(4,0,1)\",\n",
    "    bias_quantizer='quantized_bits(4,0,1)')(x)\n",
    "  x = QActivation('sigmoid')(x)\n",
    "  model = tf.keras.Model(inputs=[x_in], outputs=[x])\n",
    "  return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.keras.backend.clear_session()\n",
    "with strategy.scope():\n",
    "  qmodel = create_qmodel(BATCH_SIZE)\n",
    "  custom_objects = {}\n",
    "  qmodel.compile(\n",
    "      optimizer=Adam(learning_rate=0.01),\n",
    "      loss=loss,\n",
    "      metrics=['acc'])\n",
    "\n",
    "qmodel.summary()\n",
    "print('Train...')\n",
    "qmodel.fit(train_dataset,\n",
    "          batch_size=BATCH_SIZE,\n",
    "          epochs=10,\n",
    "          verbose=2,\n",
    "          validation_data=test_dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Converting a Model Automatically\n",
    "\n",
    "In addition to the drop-in replacement of Keras functions, we have written the following function to assist anyone who wants to quantize a network.\n",
    "\n",
    "__`model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False)`__\n",
    "\n",
    "This function converts an non-quantized model (such as the one from `model` in the previous example) into a quantized version, by applying a configuration specified by the dictionary `quantizer_config`, and `activation_bits` specified for unamed activation functions, with this parameter probably being removed in future versions.\n",
    "\n",
    "The parameter `custom_objects` specifies object dictionary unknown to Keras, required when you copy a model with lambda layers, or customized layer functions, for example, and if `transfer_weights` is `True`, the returned model will have as initial weights the weights from the original model, instead of using random initial weights.\n",
    "\n",
    "The dictionary specified in `quantizer_config` can be indexed by a layer name or layer class name. In the example below, conv2d_1 corresponds to the first convolutional layer of the example, while  QConv2D corresponds to the default behavior of two dimensional convolutional layers. The reader should note that right now we recommend using __`QActivation`__ with a dictionary to avoid the conversion of activations such as `softmax` and `linear`.  In addition, although we could use `activation` field in the layers, we do not recommend that. \n",
    "\n",
    "`{\n",
    "  \"conv2d_1\": {\n",
    "      \"kernel_quantizer\": \"stochastic_ternary\",\n",
    "      \"bias_quantizer\": \"quantized_po2(4)\"\n",
    "  },\n",
    "  \"QConv2D\": {\n",
    "      \"kernel_quantizer\": \"stochastic_ternary\",\n",
    "      \"bias_quantizer\": \"quantized_po2(4)\"\n",
    "  },\n",
    "  \"QDense\": {\n",
    "      \"kernel_quantizer\": \"quantized_bits(3,0,1)\",\n",
    "      \"bias_quantizer\": \"quantized_bits(3)\"\n",
    "  },\n",
    "  \"act_1\": \"quantized_relu(2)\",\n",
    "  \"QActivation\": { \"relu\": \"quantized_relu(2)\" }\n",
    "}`\n",
    "\n",
    "In the following example, we will quantize the model using a different strategy.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bits = 4\n",
    "quantizer_config = {\n",
    "  \"bidirectional\": {\n",
    "      'activation' : f\"quantized_tanh({bits})\",\n",
    "      'recurrent_activation' : f\"quantized_relu(4,0,1)\",\n",
    "      'kernel_quantizer' : f\"quantized_bits({bits}, alpha='auto')\",\n",
    "      'recurrent_quantizer' : f\"quantized_bits({bits}, alpha='auto')\",\n",
    "      'bias_quantizer' : f\"quantized_bits({bits}, alpha='auto')\",\n",
    "  },\n",
    "  \"dense\": {\n",
    "      'kernel_quantizer' : f\"quantized_bits({bits}), alpha='auto'\",\n",
    "      'bias_quantizer' : f\"quantized_bits({bits}), alpha='auto'\"\n",
    "  },\n",
    "  \"embedding_act\": f\"quantized_bits({bits}), alpha='auto'\",\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "tf.keras.backend.clear_session()\n",
    "with strategy.scope():\n",
    "  model = create_model(BATCH_SIZE)\n",
    "  custom_objects = {}\n",
    "  \n",
    "  qmodel = model_quantize(model, quantizer_config, bits, custom_objects)\n",
    "  qmodel.compile(\n",
    "      optimizer=Adam(learning_rate=0.01),\n",
    "      loss=loss,\n",
    "      metrics=['acc'])\n",
    "  \n",
    "qmodel.summary()\n",
    "print('Train...')\n",
    "qmodel.fit(train_dataset,\n",
    "          batch_size=BATCH_SIZE,\n",
    "          epochs=10,\n",
    "          verbose=2,\n",
    "          validation_data=test_dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quantizing a Model With `AutoQKeras`\n",
    "\n",
    "To quantize this model with `AutoQKeras`, we need to define the quantization for kernels, biases and activations; forgiving factors and quantization strategy.\n",
    "\n",
    "Below we define which quantizers are allowed for kernel, bias, activations and linear. Linear is a proxy that we use to capture `Activation(\"linear\")` to apply quantization without applying a non-linear operation.  In some networks, we found that this trick may be necessary to better represent the quantization space.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.keras.backend.clear_session()\n",
    "with strategy.scope():\n",
    "  model = create_model(BATCH_SIZE)\n",
    "  custom_objects = {}\n",
    "  model.compile(\n",
    "      optimizer=Adam(learning_rate=0.01),\n",
    "      loss=loss,\n",
    "      metrics=['acc'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "quantization_config = {\n",
    "        \"kernel\": {\n",
    "                \"stochastic_binary\": 1,\n",
    "                \"stochastic_ternary\": 2,\n",
    "                \"quantized_bits(4,0,1,alpha=1.0)\": 4,\n",
    "                \"quantized_po2(4,1)\": 4\n",
    "        },\n",
    "        \"recurrent_kernel\": {\n",
    "                \"stochastic_binary\": 1,\n",
    "                \"stochastic_ternary\": 2,\n",
    "                \"quantized_bits(4,0,1,alpha=1.0)\": 4,\n",
    "                \"quantized_po2(4,1)\": 4\n",
    "          \n",
    "        },\n",
    "        \"recurrent_activation\": {\n",
    "                \"quantized_relu(4,0,1)\": 4          \n",
    "        },\n",
    "        \"bias\": {\n",
    "                \"quantized_bits(4,0,1)\": 4,\n",
    "                \"quantized_po2(4,1)\": 4\n",
    "        },\n",
    "        \"activation\" : {\n",
    "            \"stochastic_ternary('auto')\": 2,\n",
    "            \"quantized_tanh(4)\" : 4, \n",
    "            \"quantized_relu_po2(4,1)\": 4,\n",
    "            \"quantized_relu(4,2)\": 4,\n",
    "        },\n",
    "        \"linear\": { \n",
    "                \"stochastic_ternary('auto')\" : 2,\n",
    "                \"quantized_tanh(4)\" : 4, \n",
    "                \"quantized_relu_po2(4,1)\": 4,\n",
    "                \"quantized_relu(3,1)\": 3,\n",
    "                \"quantized_relu(4,2)\": 4,\n",
    "        }\n",
    "}\n",
    "\n",
    "limit = {\n",
    "    \"Dense\": [4],\n",
    "    \"Bidirectional\": [4],\n",
    "    \"Activation\": [4],\n",
    "    \"default\" : [4]*4\n",
    "}\n",
    "\n",
    "goal = {\n",
    "    \"type\": \"bits\",\n",
    "    \"params\": {\n",
    "        \"delta_p\": 8.0,\n",
    "        \"delta_n\": 8.0,\n",
    "        \"rate\": 2.0,\n",
    "        \"stress\": 1.0,\n",
    "        \"input_bits\": 4,\n",
    "        \"output_bits\": 4,\n",
    "        \"ref_bits\": 4,\n",
    "        \"config\": {\n",
    "            \"default\": [\"parameters\", \"activations\"]\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "run_config = {\n",
    "  \"output_dir\": tempfile.mkdtemp(),\n",
    "  \"goal\": goal,\n",
    "  \"quantization_config\": quantization_config,\n",
    "  \"learning_rate_optimizer\": False,\n",
    "  \"transfer_weights\": False,\n",
    "  \"mode\": \"random\",\n",
    "  \"seed\": 42,\n",
    "  \"limit\": limit,\n",
    "  \"tune_filters\": \"layer\",\n",
    "  \"tune_filters_exceptions\": \"^dense\",\n",
    "  \"distribution_strategy\": strategy,\n",
    "\n",
    "  \"layer_indexes\": range(2, len(model.layers) - 1),\n",
    "  \"max_trials\": 1000\n",
    "}\n",
    "\n",
    "print(\"quantizing layers:\", [model.layers[i].name for i in run_config[\"layer_indexes\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "autoqk = AutoQKeras(model, metrics=[\"acc\"], custom_objects={}, **run_config)\n",
    "autoqk.fit(\n",
    "  train_dataset, \n",
    "  validation_data=test_dataset, \n",
    "  batch_size=BATCH_SIZE, \n",
    "  epochs=10,\n",
    "  verbose=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "qmodel = autoqk.get_best_model()\n",
    "qmodel.save_weights(\"qmodel.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print_qmodel_summary(qmodel)\n",
    "print(get_quantization_dictionary(qmodel))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py3",
   "language": "python",
   "name": "py3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: qkeras/__init__.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exports qkeras modules to quantizer package."""

# We use wildcard import for convenience at this moment, which will be later
# refactored and removed.
import tensorflow as tf

from .b2t import *  # pylint: disable=wildcard-import
from .estimate import *  # pylint: disable=wildcard-import
from .qconv2d_batchnorm import QConv2DBatchnorm
from .qconvolutional import *  # pylint: disable=wildcard-import
from .qdepthwise_conv2d_transpose import QDepthwiseConv2DTranspose
from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm
from .qlayers import *  # pylint: disable=wildcard-import
from .qmac import *  # pylint: disable=wildcard-import
from .qnormalization import *  # pylint: disable=wildcard-import
from .qoctave import *  # pylint: disable=wildcard-import
from .qpooling import *  # pylint: disable=wildcard-import
from .qrecurrent import *  # pylint: disable=wildcard-import
from .qseparable_conv2d_transpose import QSeparableConv2DTranspose
#from .qtools.run_qtools import QTools
#from .qtools.settings import cfg
from .quantizers import *  # pylint: disable=wildcard-import
from .registry import *  # pylint: disable=wildcard-import
from .safe_eval import *  # pylint: disable=wildcard-import


assert tf.executing_eagerly(), "QKeras requires TF with eager execution mode on"

__version__ = "0.9.0"


================================================
FILE: qkeras/autoqkeras/__init__.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exports autoqkeras as a package."""

# We use wildcard import for convenience at this moment, which will be later
# refactored and removed.
from .autoqkeras_internal import *  # pylint: disable=wildcard-import
from .quantization_config import default_quantization_config  # pylint: disable=line-too-long
from .utils import *  # pylint: disable=wildcard-import


================================================
FILE: qkeras/autoqkeras/autoqkeras_internal.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements support for auto-quantization."""

import collections
import json
import os
import re
import copy
from absl import logging
import keras_tuner as kt
from keras_tuner import HyperModel
from keras_tuner import BayesianOptimization
from keras_tuner import Hyperband
from keras_tuner import RandomSearch
import numpy as np
import six
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.metrics import categorical_accuracy
from tensorflow.keras.metrics import sparse_categorical_accuracy
from qkeras.autoqkeras.forgiving_metrics import forgiving_factor  # pylint: disable=line-too-long
from qkeras.autoqkeras.forgiving_metrics import ForgivingFactor  # pylint: disable=line-too-long
from qkeras.autoqkeras.quantization_config import default_quantization_config  # pylint: disable=line-too-long
from qkeras.autoqkeras.utils import print_qmodel_summary
from qkeras.utils import clone_model
from qkeras.utils import model_quantize


# AutoQKHyperModel is implemented on top of keras_tuner
# It basically creates a quantized model based on some rules
# and it computes a acc_delta that boosts the accuracy when
# choosing smaller models.

# Boosting function behaves like this.
# We use the following formula to compute the decrease factor:
#   reference_size: number of parameters + activations of the model,
#     assuming an 8-bit implementation.
#   trial_size: number of parameters + activations of trial.
#
#   1) First, we compute how many times we decresed/increased the model
#     i = log(reference_size / trial_size) / log(rate)
#
#   2) Then, we use delta_p / delta_n if model is smaller/bigger
#      than reference model.
#
#      delta = i * (
#          (i < 0) * delta_n + (i >= 0) * delta_p
#      )
#
#   3) the accuracy of the model (score) is adjusted by acc * delta
#
#   The delta "boosts" the accuracy to allow worse model to be
#   chosen by hypermodel tuner.
#

REGISTERED_LAYERS = ["Dense", "Conv1D", "Conv2D", "DepthwiseConv2D",
                     "SimpleRNN", "LSTM", "GRU", "Bidirectional",
                     "Conv2DTranspose", "SeparableConv1D", "SeparableConv2D"]

Q_LAYERS = list(map(lambda x : 'Q' + x, REGISTERED_LAYERS))

SEQUENCE_LAYERS = ["SimpleRNN", "LSTM", "GRU", "Bidirectional"]

class AutoQKHyperModel(HyperModel):
  """Creates an hypermodel to attempt to quantize a reference model.

     Arguments:
       model: Model to be quantized.
       metrics: List of metrics to be used.
       custom_objects: Custom objects used by Keras during quantization.
       target: Secondary metric to chase during search ("bits" or "energy").
       transfer_weights: if true, transfer weights from unquantized model.
       frozen_layers: if true, these layers will not be quantized but
         weights transferred from original model.
       activation_bits: parameter to be used by 'model_quantize'.
       limit: limit the number of bits in quantizers, specified as dictionary.
       tune_filters: one of "block", "layer", "none" for tuning entire
         network, each layer separately, or no tuning.
       tune_filters_exceptions: name of layers that will not be tuned.
       layer_indexes: we only quantize layers whose ids are in layer_indexes.
       learning_rate_optimizer: if true, we optimize learning rate along with
         other parameters.
       head_name: specify which head to calcuate score/trial-size from in
         autoqkeras
       quantization_config: dictionary containing configuration of
         quantizers for kernel, bias and activation.
       extend_model_metrics: If to append the trial size and score metrics to
         model metrics, which are used for AutoQKeras to determine the quality
         of a model.

     Returns:
       quantized model in trial and boosted accuracy function compiled
       into quantized model.
  """

  def __init__(
      self, model, metrics, custom_objects=None, target=None,
      transfer_weights=False, frozen_layers=None, activation_bits=4, limit=None,
      tune_filters="none", tune_filters_exceptions=None,
      layer_indexes=None, learning_rate_optimizer=False,
      head_name=None, quantization_config=None, extend_model_metrics=True,
  ):
    self.model = model
    self.metrics = metrics
    self.custom_objects = custom_objects if custom_objects else {}

    self.target = target

    self.reference_size = self.target.get_reference(model)

    self.transfer_weights = transfer_weights
    self.frozen_layers = frozen_layers if frozen_layers else []
    self.activation_bits = activation_bits
    self.head_name = head_name
    self.extend_model_metrics = extend_model_metrics
    # make sure we have at least 3 elements in list
    # first one for kernel, second one for bias and thid one for activations.
    #
    # limit is in the format, where default replaces missing values:
    # '{
    #      "Conv2D":[weight,bias,activation],
    #      "RNN":[weight,bias,recurrent,activation],
    #      "Dense":[weight,bias,activation],
    #      "Activation":[activation]
    #      "default": value
    #  }'

    if limit is None:
      self.limit = {}
    else:
      self.limit = limit

    self.groups = {}

    assert isinstance(self.limit, dict)

    if self.limit.get("default", None) is None:
      default = 8
    else:
      default = self.limit["default"]

    # make sure we have entries for every type of layer we process
    self._adjust_limit(default)

    print("Limit configuration:" + json.dumps(self.limit))

    assert tune_filters in ["block", "layer", "none"]

    self.tune_filters = tune_filters
    self.tune_filters_exceptions = re.compile(tune_filters_exceptions)

    self.layer_indexes = layer_indexes
    self.learning_rate_optimizer = learning_rate_optimizer

    # load quantizer types for each type of quantizer
    if quantization_config is None:
      self.quantization_config = default_quantization_config
    else:
      self.quantization_config = quantization_config

  def _adjust_limit(self, default):
    """Makes sure limit has all the fields required."""
    if isinstance(default, list):
      assert 3 <= len(default) <= 4 
    else:
      default = [default] * 3

    # we consider that if name is not there, we will ignore the layer
    for name in REGISTERED_LAYERS:
      if name in self.limit:
        length = len(self.limit[name])
        if length < 4 and name in SEQUENCE_LAYERS:
          assert len(default) == 4
          self.limit[name] = self.limit[name] + default[length:]
        elif length < 3:
          # No recurrent limit needed for non recurrent layers
          self.limit[name] = self.limit[name] + default[length:2] + default[-1:]

  def _n(self, name, s_list):
    """Creates a unique name for the tuner."""
    return name + "_".join([str(v) for v in s_list])

  def _get_quantizer(self, hp, head, layer_name, layer_class_name,
                     i_list=None, is_kernel=True, is_linear=False):
    """Gets a quantizer randomly for kernels/bias/activations."""

    # first pick up which group we belong to.

    if not i_list:
      i_list = []

    if is_linear:
      # linear quantizers
      field_name = "linear"
      kq = self.quantization_config["linear"]
      index = 0
      q_list = list(kq.keys())
      q_dict = kq
    elif "kernel" in head:
      # kernel quantizers
      field_name = "kernel"
      kq = self.quantization_config["kernel"]
      index = 0
      q_list = list(kq.keys())
      q_dict = kq
    elif "bias" in head:
      # bias quantizers
      field_name = "bias"
      bq = self.quantization_config["bias"]
      index = 1
      q_list = list(bq.keys())
      q_dict = bq
    elif "pointwise_kernel" in head: # limit is same as kernel
      # pointwise kernel quantizers
      field_name = "pointwise_kernel"
      kq = self.quantization_config["pointwise_kernel"]
      index = 2
      q_list = list(kq.keys())
      q_dict = kq
    elif "recurrent_kernel" in head: # limit is same as kernel
      # recurrent kernel quantizers
      field_name = "recurrent_kernel"
      kq = self.quantization_config["recurrent_kernel"]
      index = 2
      q_list = list(kq.keys())
      q_dict = kq
    elif "recurrent_activation" in head: # limit is same as kernel
      # recurrent activation quantizers
      field_name = "recurrent_activation"
      raq = self.quantization_config["recurrent_activation"]
      index = -1
      q_list = list(raq.keys())
      q_dict = raq
    else:
      # activation quantizers
      field_name = "activation"
      aq = self.quantization_config["activation"]
      index = -1
      q_list = list(aq.keys())
      q_dict = aq

    # we first we search for layer name. If it is not there, we switch to
    # layer class name.

    found_pattern = False
    name = layer_class_name
    count = -1
    for i, pattern in enumerate(self.limit):
      if re.match(pattern, layer_name):
        found_pattern = True
        name = pattern
        count = i
        break

    # for partially quantized networks we may not have
    # the layer class name in the set.

    if name == layer_class_name and name not in self.limit:
      return None, -1

    # groups is a dictionary that contains dictionary of the
    # patterns so that we can group everything together

    if found_pattern:
      if name in self.groups and index in self.groups[name]:
        return self.groups[name][index]

      # not there, let's use a different name for
      # the head and field
      head = "qk_group_" + str(count) + "_" + field_name
      head = name + "_" + field_name

    # limit group can be a list of quantizers or a
    # number that tells us maximum number of bits

    if isinstance(self.limit[name][index], list):
      # we assume this is a subset of the q_keys
      # entry in quantization_config will be like:
      #   "Conv2D": [ ["q1", "q2", "q3"], ... ]
      #
      # we always assume this list is a subset of
      # the original list or we will raise an
      # error.

      q_list = self.limit[name][index]
      q_dict = {
          key: q_dict[key] for key in q_list
      }
    else:
      q_dict = {
          key: value for (key, value) in q_dict.items()
          if value <= self.limit[name][index]
      }
      q_list = list(q_dict.keys())

    # didn't found a match in groups, create one.

    if len(q_list) == 1:
      q_name = hp.Fixed(self._n(head + "_quantizer", i_list), q_list[0])
    else:
      q_name = hp.Choice(self._n(head + "_quantizer", i_list), q_list)

    if found_pattern:
      if name not in self.groups:
        self.groups[name] = {index: (q_name, q_dict[q_name])}
      else:
        self.groups[name][index] = (q_name, q_dict[q_name])

    return (q_name, q_dict[q_name])

  def quantize_model(self, hp):
    """Quantize model by hyperparameter search and extracting size schema."""

    # configuration for quantization.
    q_dict = {}

    model = clone_model(self.model, self.custom_objects)

    fanin = []

    filter_range = [0.5, 0.75, 1.0, 1.5, 2.0]

    # network_filters=hp.Choice(...) should only be defined if we are sure
    # current blocks has any layer that need filter sweep.
    # Otherwise, when no layer needs filter sweep and a hp variable is defined,
    # there will be uneffective trials that loop around the network
    # filter range, even though none of the filter sweep was ever applied to
    # any layers. Therfore, we use filter_sweep_enabled to mark if any layer
    # in current block needs filter sweep.
    kernel_quantizer_dict = {}
    filter_sweep_enabled = False
    for layer in model.layers:
      if layer.__class__.__name__ in REGISTERED_LAYERS:
        kernel_quantizer, bits = self._get_quantizer(
            hp, layer.name + "_kernel", layer.name, layer.__class__.__name__,
            is_kernel=True)

        kernel_quantizer_dict[layer.name] = (kernel_quantizer, bits)

        # kernel_quantizer is not None ->  layer in the current block need
        # to be quantized
        if kernel_quantizer:
          if (
              not filter_sweep_enabled and self.tune_filters in
              ["layer", "block"]
              and not self.tune_filters_exceptions.search(layer.name) and
              layer.__class__.__name__ in
              ["Dense", "Conv1D", "Conv2D", "Conv2DTranspose"]
          ):
            filter_sweep_enabled = True

        if layer.__class__.__name__ in SEQUENCE_LAYERS:
          recurrent_quantizer, _ = self._get_quantizer(
            hp, layer.name + "_recurrent_kernel", layer.name, layer.__class__.__name__,
            is_kernel=True)

        if layer.__class__.__name__ in ["SeparableConv1D", "SeparableConv2D"]:
          pointwise_quantizer, _ = self._get_quantizer(
            hp, layer.name + "_pointwise_kernel", layer.name, layer.__class__.__name__,
            is_kernel=True)

    if self.tune_filters == "block" and filter_sweep_enabled:
      network_filters = hp.Choice(
          "network_filters",
          values=filter_range,
          default=1.0
      )
    else:
      network_filters = 1.0

    for layer_id, layer in enumerate(model.layers):

      # we can use these indexes to disable some layers, like the last
      # layer

      if self.layer_indexes is not None and layer_id not in self.layer_indexes:
        continue

      layer_d = {}

      if layer.__class__.__name__ in Q_LAYERS:
        weights = layer.get_weights()[0]
        if (
            layer.get_quantizers()[0] and
            hasattr(layer.get_quantizers()[0], "bits")
        ):
          bits = layer.get_quantizers()[0].bits
        else:
          bits = 8
        fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.)
        
      if layer.__class__.__name__ in REGISTERED_LAYERS:
        # difference between depthwise and the rest is just the name
        # of the kernel.
        if layer.__class__.__name__ in [
            "DepthwiseConv2D", "SeparableConv1D", "SeparableConv2D"
        ]:
          kernel_name = "depthwise_quantizer"
        else:
          kernel_name = "kernel_quantizer"

        # sample kernel quantizer.
        (kernel_quantizer, bits) = kernel_quantizer_dict[layer.name]

        if not kernel_quantizer:
          continue

        # process fanin here

        if bits < 8:
          weights = layer.get_weights()[0]
          fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.)

        # we only want to do that if we are going to quantize layer
        if (
            self.tune_filters in ["layer", "block"] and
            not self.tune_filters_exceptions.search(layer.name) and
            layer.__class__.__name__ in [
                "Dense", "Conv1D", "Conv2D", "Conv2DTranspose",
                "SeparableConv1D", "SeparableConv2D"
            ]
        ):
          if self.tune_filters == "layer":
            layer_filters = hp.Choice(
                "network_filters_" + layer.name,
                values=filter_range,
                default=1.0
            )
          else:
            layer_filters = network_filters

          if layer.__class__.__name__ == "Dense":
            layer.units = max(int(layer.units * layer_filters), 1)
          elif layer.__class__.__name__ in [
              "Conv1D", "Conv2D", "Conv2DTranspose",
              "SeparableConv1D", "SeparableConv2D"
          ]:
            layer.filters = max(int(layer.filters * layer_filters), 1)

        layer_d[kernel_name] = kernel_quantizer

        if layer.__class__.__name__ in SEQUENCE_LAYERS:
          layer_d['recurrent_quantizer'] = recurrent_quantizer

        if layer.__class__.__name__ in ["SeparableConv1D", "SeparableConv2D"]:
          layer_d['pointwise_quantizer'] = pointwise_quantizer

        if layer.__class__.__name__ in ["LSTM", "GRU", "Bidirectional"]:
          layer_d['recurrent_activation'], _  = self._get_quantizer(
              hp, layer.name + "_recurrent_activation", layer.name,
              layer.__class__.__name__, is_kernel=False)

        # if we use bias, sample quantizer.
        if layer.__class__.__name__ == "Bidirectional":
          layer_d["bias_quantizer"], bits = self._get_quantizer(
              hp, layer.name + "_bias", layer.name, layer.__class__.__name__,
              is_kernel=False)
          layer_d["activation"], bits = self._get_quantizer(
              hp, layer.name + "_activation", layer.name,
              layer.__class__.__name__, is_kernel=False)
          q_dict[layer.name] = layer_d 
        else:
          if layer.use_bias:
            layer_d["bias_quantizer"], bits = self._get_quantizer(
                hp, layer.name + "_bias", layer.name, layer.__class__.__name__,
                is_kernel=False)

          # if activation is not linear/softmax we need to process it.
          if layer.activation is None:
            is_softmax = False
            is_linear = False
          else:
            if isinstance(layer.activation, six.string_types):
              is_softmax = layer.activation == "softmax"
              is_linear = layer.activation == "linear"
            else:
              is_softmax = layer.activation.__name__ == "softmax"
              is_linear = layer.activation.__name__ == "linear"

          if not is_softmax and not is_linear:
            layer_d["activation"], bits = self._get_quantizer(
                hp, layer.name + "_activation", layer.name,
                layer.__class__.__name__, is_kernel=False)

          q_dict[layer.name] = layer_d

      elif layer.__class__.__name__ in ["Reshape"]:
        # we cannot handle fine tuning filters per layer right now.
        assert self.tune_filters in ["none", "block"]

        # we need to make sure this pattern exists, this should only occur for
        # "scheduler", so the name will be complete and not a pattern.

        if (
            self.tune_filters == "none" or
            layer.name not in self.limit or
            self.tune_filters_exceptions.search(layer.name)
        ):
          continue

        if K.image_data_format() == "channels_last":
          layer.target_shape = layer.target_shape[:-1] + (
              min(int(layer.target_shape[-1] * network_filters), 1),)
        else:
          layer.target_shape = (int(layer.target_shape[0] * network_filters),
                                ) + layer.target_shape[1:]

      elif layer.__class__.__name__ in ["Activation"]:
        if isinstance(layer.activation, six.string_types):
          is_linear = layer.activation == "linear"
          is_softmax = layer.activation == "softmax"
        else:
          is_linear = layer.activation.__name__ == "linear"
          is_softmax = layer.activation.__name__ == "softmax"

        # if it is a linear activation, we will notify the
        # quantizer we are searching for linear type of
        # quantizers

        if not is_softmax:
          activation, bits = self._get_quantizer(
              hp, layer.name + "_activation", layer.name,
              layer.__class__.__name__, is_kernel=False,
              is_linear=is_linear)

          if not activation:
            continue

          # look at documentation on model_quantize
          q_dict[layer.name] = activation
      elif layer.__class__.__name__ in self.limit:
        # mark it for conversion
        q_dict[layer.name] = {}
      else:
        for pattern in self.limit:
          if re.match(pattern, layer.name):
            q_dict[layer.name] = {}
            break

    q_model = model_quantize(
        model, q_dict, self.activation_bits,
        custom_objects=self.custom_objects,
        transfer_weights=self.transfer_weights)

    return q_model, fanin

  def build(self, hp):
    """Builds hyperparameterized quantized model."""

    self.groups = {}

    # we are not using the fanin right now.

    q_model, _ = self.quantize_model(hp)

    # transfer weights from previous run as we know we will not
    if self.learning_rate_optimizer:
      # if learning_rate_optimizer, we try to transfer weights from previous run
      print("... freezing layers {}.".format(", ".join(self.frozen_layers)))
      for layer_name in self.frozen_layers:
        o_weights = self.model.get_layer(layer_name).get_weights()
        layer = q_model.get_layer(layer_name)
        # don't know if setting trainable to False is good or not yet
        # try to do "soft-freeze" by transferring weights. More experiments
        # needed before we decide what to do.
        # layer.trainable = False
        weights = layer.get_weights()
        # because we can be changing number of layers, we do not know
        # if we can really use some of the weights or not.
        equal_layer = True
        for w in range(len(o_weights)):
          if o_weights[w].shape != weights[w].shape:
            equal_layer = False
            break
        if equal_layer:
          layer.set_weights(o_weights)

    self.trial_size = self.target.get_trial(q_model)

    # we will use a boosted accuracy computation

    delta = self.target.delta()

    # by default, we use the first metric specified by the
    # user to be the target metric.
    if not self.metrics:
      score_metric = None
    elif isinstance(self.metrics, dict):
      if not self.head_name:
      # if head_name not provided, find the first metric from the dict
        score_key = list(self.metrics.keys())[0]
      else:
        # find the metric assoicated with the head_name
        score_key = self.head_name
      score_metric = self.metrics[score_key]
      if isinstance(score_metric, list):
        score_metric = score_metric[0]
    elif isinstance(self.metrics, list):
      score_metric = self.metrics[0]

    self.score = AutoQKHyperModel.adjusted_score(
        self, delta, score_metric)

    # some papers suggest that we use learning_rate * sqrt(fanin) / layer
    # we cannot do that right now, but we can definitely do that
    # if we are quantizing one layer at a time
    #
    # https://arxiv.org/pdf/1511.00363.pdf

    # we use the magic number to smooth out the average
    total_factor = self.target.get_total_factor()
    delta_lr = 1.0 + (total_factor < 0) * total_factor

    # we assume model has been compiled at least.

    lr = float(self.model.optimizer.lr.numpy())

    # we assume that delta_lr can lower lr to accommodate
    # for more quantization
    #
    # if learning rate scheduler is used, we assume the callback to manage
    # learning rate. Just set it to constant.

    if self.learning_rate_optimizer:
      lr_range = list(lr * np.linspace(delta_lr, 1.1, 5))
      lr_choice = hp.Choice("learning_rate", lr_range)
      self.model.optimizer.learning_rate = lr_choice
    else:
      lr_choice = lr
      print("learning_rate: {}".format(lr))

    optimizer = self.model.optimizer

    q_model.summary()

    metrics = self.metrics

    # extend metrics by including score and trial_size metrics
    if self.extend_model_metrics:
      ext_metrics = copy.deepcopy(metrics)
      if isinstance(ext_metrics, dict):
        # for dict, add trial_size_metric and score metric to target output
        if not self.head_name:
          # if head_name not provided, find the first metric from the dict
          score_key = list(ext_metrics.keys())[0]
        else:
          # find the metric assoicated with the head_name
          score_key = self.head_name
        score_metric = ext_metrics[score_key]
        if isinstance(score_metric, list):
          score_metric += [self.trial_size_metric(self.trial_size), self.score]
        else:
          score_metric = [score_metric]
          score_metric += [self.trial_size_metric(self.trial_size), self.score]
        ext_metrics[score_key] = score_metric
      else:
        ext_metrics += [
            self.trial_size_metric(self.trial_size),
            self.score]
      metrics = ext_metrics

    q_model.compile(
        optimizer=optimizer,
        loss=self.model.loss,
        metrics=metrics
    )
    self.q_model = q_model

    # this just prints a summary of the quantization for debugging
    # purposes

    self.target.print_stats()
    print_qmodel_summary(q_model)

    return q_model

  @staticmethod
  def adjusted_score(hyper_model, delta, metric_function=None):
    def score(y_true, y_pred):
      y_t_rank = len(y_true.shape.as_list())
      y_p_rank = len(y_pred.shape.as_list())
      y_t_last_dim = y_true.shape.as_list()[-1]
      y_p_last_dim = y_pred.shape.as_list()[-1]

      is_binary = y_p_last_dim == 1
      is_sparse_categorical = (
          y_t_rank < y_p_rank or y_t_last_dim == 1 and y_p_last_dim > 1)

      if isinstance(metric_function, six.string_types):
        if metric_function in ["accuracy", "acc"]:
          if is_binary:
            metric = binary_accuracy(y_true, y_pred)
          elif is_sparse_categorical:
            metric = sparse_categorical_accuracy(y_true, y_pred)
          else:
            metric = categorical_accuracy(y_true, y_pred)
        else:
          metric = categorical_accuracy(y_true, y_pred)
      else:
        metric = metric_function(y_true, y_pred)

      return K.cast(metric * (1.0 + delta), K.floatx())

    if not metric_function:
      metric_function = "accuracy"

    return score

  @staticmethod
  def trial_size_metric(trial_size):
    def trial(y_true, y_pred):  # pylint: disable=unused-argument
      return K.cast(trial_size, K.floatx())
    return trial


class AutoQKeras:
  """Performs autoquantization in Keras model.

     Arguments:
       model: Model to be quantized.
       metrics: List of metrics to be used.
       custom_objects: Custom objects used by Keras during quantization.
       goal: Metric to compute secondary goal of search (bits or energy)
       output_dir: name of output directory to store results.
       mode: random, hyperband or bayesian used by keras_tuner.
       custom_tuner: The Keras Tuner class to use to search hyperparams
       transfer_weights: if true, transfer weights from unquantized model.
       frozen_layers: if true, these layers will not be quantized but
         weights transferred from original model.
       activation_bits: parameter to be used by 'model_quantize'.
       limit: limit the number of bits in quantizers specified as a dictionary.
       tune_filters: one of "block", "layer", "none" for tuning entire
         network, each layer separately, or no tuning.
       tune_filters_exceptions: name of layers that will not be tuned.
       layer_indexes: indexes of layers we will quantize.
       learning_rate_optimizer: if true, user will provide lr scheduler
         callback.
       quantization_config: file name of dictionary containing configuration of
         quantizers for kernel, bias and activation.
       head_name: specify which head to calcuate score/trial-size from in
         autoqkeras
       score_metric: Str. Optional metric name to use to evaluate the trials.
         Defaults to val_score
       tuner_kwargs: parameters for keras_tuner depending on whether
         mode is random, hyperband or baeysian. Please refer to the
         documentation of kerstuner Tuners.
  """

  def __init__(
      self, model, metrics=None, custom_objects=None, goal=None,
      output_dir="result", mode="random", custom_tuner=None,
      transfer_weights=False, frozen_layers=None, activation_bits=4,
      limit=None, tune_filters="none",
      tune_filters_exceptions=None, learning_rate_optimizer=False,
      layer_indexes=None, quantization_config=None, overwrite=True,
      head_name=None, score_metric=None, **tuner_kwargs):

    # Collect input arguments to AutoQKeras for usage by custom tuner
    autoqkeras_input_args = locals()

    if not metrics:
      metrics = []

    if not custom_objects:
      custom_objects = {}

    # goal: { "type": ["bits", "energy"], "params": {...} } or ForgivingFactor
    #   type
    # For type == "bits":
    #   delta_p: increment (in %) of the accuracy if trial is smaller.
    #   delta_n: decrement (in %) of the accuracy if trial is bigger.
    #   rate: rate of decrease/increase in model size in terms of bits.
    #   input_bits; size of input tensors.
    #   output_bits; size of output tensors.
    #   stress: parameter to reduce reference size to force tuner to
    #     choose smaller models.
    #   config: configuration on what to compute for each layer
    #     minimum configuration is { "default": ["parameters", "activations"] }

    # use simplest one - number of bits
    if not goal:
      goal = {
          "type": "bits",
          "params": {
              "delta_p": 8.0,
              "delta_n": 8.0,
              "rate": 2.0,
              "stress": 1.0,
              "input_bits": 8,
              "output_bits": 8,
              "ref_bits": 8,
              "config": {
                  "default": ["parameters", "activations"]
              }
          }
      }

    self.overwrite = overwrite

    # for multi-head model, we need to specify which head(/output) that
    # score and trial metric needs to calculate from
    self.head_name = head_name

    # if we have not created it already, create new one.
    if not isinstance(goal, ForgivingFactor):
      target = forgiving_factor[goal["type"]](**goal["params"])
    else:
      target = goal

    # if no metrics were specified, we want to make sure we monitor at least
    # accuracy.
    if not metrics:
      metrics = ["acc"]

    self.hypermodel = AutoQKHyperModel(
        model, metrics, custom_objects, target,
        transfer_weights=transfer_weights,
        frozen_layers=frozen_layers,
        activation_bits=activation_bits,
        limit=limit,
        tune_filters=tune_filters,
        tune_filters_exceptions=tune_filters_exceptions,
        layer_indexes=layer_indexes,
        learning_rate_optimizer=learning_rate_optimizer,
        head_name=head_name,
        quantization_config=quantization_config
    )

    # right now we create unique results directory
    idx = 0
    name = output_dir
    if self.overwrite:
      while os.path.exists(name):
        idx += 1
        name = output_dir + "_" + str(idx)
    output_dir = name
    self.output_dir = output_dir

    if score_metric is None:
      if self.head_name:
        score_metric = "val_" + self.head_name + "_score"
      else:
        score_metric = "val_score"
    assert mode in ["random", "bayesian", "hyperband"]
    if custom_tuner is not None:
      self.tuner = custom_tuner(
          self.hypermodel,
          autoqkeras_config=autoqkeras_input_args,
          objective=kt.Objective(score_metric, "max"),
          project_name=output_dir,
          **tuner_kwargs)
    elif mode == "random":
      self.tuner = RandomSearch(
          self.hypermodel,
          objective=kt.Objective(score_metric, "max"),
          project_name=output_dir,
          **tuner_kwargs)
    elif mode == "bayesian":
      self.tuner = BayesianOptimization(
          self.hypermodel,
          objective=kt.Objective(score_metric, "max"),
          project_name=output_dir,
          **tuner_kwargs)
    elif mode == "hyperband":
      self.tuner = Hyperband(
          self.hypermodel,
          objective=kt.Objective(score_metric, "max"),
          project_name=output_dir,
          **tuner_kwargs)
    else:
      pass

    self.tuner.search_space_summary()

  def _has_earlystopping(self, callbacks):
    """Check if EarlyStopping has been defined or not."""
    if callbacks is None:
      return False

    for callback in callbacks:
      if isinstance(callback, tf.keras.callbacks.EarlyStopping):
        return True
    return False

  def history(self, number_of_trials=-1):
    """Returns the history of the model search."""
    trials = self.tuner.oracle.get_best_trials(number_of_trials)
    state = [trial.get_state() for trial in trials]

    result = {}
    result["score"] = [
        state[i]["score"] for i in range(len(state))
        if trials[i].score is not None
    ]
    for i in range(len(state)):
      if trials[i].score is not None:
        keys = state[i]["metrics"]["metrics"].keys()

        for key in keys:
          if key != "score" and not key.startswith(
              "val_") and key != "loss" and key != "trial":

            cur_accuracy = state[i]["metrics"]["metrics"][key][
                "observations"][0]["value"][0]
            if "val_" + key in state[i]["metrics"]["metrics"].keys():
              cur_val_accuracy = state[i]["metrics"]["metrics"]["val_" + key][
                  "observations"][0]["value"][0]
            else:
              cur_val_accuracy = None

            # only update result if both key and val_key exist
            if cur_val_accuracy:
              if key not in result.keys():
                result[key] = [cur_accuracy]
                result["val_" + key] = [cur_val_accuracy]
              else:
                result[key].append(cur_accuracy)
                result["val_" + key].append(cur_val_accuracy)

    if self.head_name:
      trial_from_output = self.head_name + "_trial"
    else:
      trial_from_output = "trial"
    result["trial_size"] = [
        state[i]["metrics"]["metrics"][trial_from_output]["observations"][0]
        ["value"][0] for i in range(len(state)) if trials[i].score is not None
    ]

    return result

  def fit(self, *fit_args, **fit_kwargs):
    """Invokes tuner fit algorithm."""

    callbacks = fit_kwargs.get("callbacks", None)

    if callbacks is None:
      callbacks = []

    epochs = fit_kwargs.get("epochs", None)

    if epochs is None:
      epochs = 10

    if not self._has_earlystopping(callbacks):
      callbacks = callbacks + [
          tf.keras.callbacks.EarlyStopping(
              "val_loss", patience=min(20, epochs//5))
      ]
      fit_kwargs["callbacks"] = callbacks

    self.tuner.search(*fit_args, **fit_kwargs)

  @staticmethod
  def get_best_lr(qmodel):
    """Extracts best lr of model."""
    return qmodel.optimizer.lr.numpy()

  def get_best_model(self):
    params = self.tuner.get_best_hyperparameters()[0]

    q_model = self.tuner.hypermodel.build(params)

    self.learning_rate = q_model.optimizer.lr.numpy()

    return q_model

  def get_learning_rate(self):
    return self.learning_rate


class AutoQKerasScheduler:
  """Performs autoquantization one layer/group at a time.

     Arguments:
       model: Model to be quantized.
       metrics: List of metrics to be monitored.
       custom_objects: Custom objects used by Keras during quantization.
       goal: Metric to compute secondary goal of search (bits or energy)
       output_dir: name of output directory to store results.
       mode: random, hyperband or bayesian used by keras_tuner.
       transfer_weights: if true, transfer weights from unquantized model.
       activation_bits: parameter to be used by 'model_quantize'.
       limit: limit the number of bits in quantizers specified as a dictionary.
       tune_filters: one of "block", "layer", "none" for tuning entire
         network, each layer separately, or no tuning.
       tune_filters_exceptions: name of layers that will not be tuned.
       layer_indexes: indexes of layer to be quantized.
       learning_rate_optimizer: if true, user will provide lr scheduler
         callback.
       blocks: list of re patterns specifygin group configuration for layers.
       schedule_block: "sequential" or "cost". Schedule blocks using the
         order of the groups or decreasing cost (energy or bits).
       quantization_config: file name of dictionary containing configuration of
         quantizers for kernel, bias and activation.
       debug: if True, fit will just print the groups for debugging purposes.
       head_name: specify which head to calcuate score/trial-size from in
         autoqkeras
       tuner_kwargs: parameters for keras_tuner depending on whether
         mode is random, hyperband or baeysian. Please refer to the
         documentation of kerstuner Tuners.
  """

  def __init__(
      self, model, metrics=None, custom_objects=None, goal=None,
      output_dir="result", mode="random", transfer_weights=False,
      activation_bits=4, limit=None, tune_filters="none",
      tune_filters_exceptions=None, layer_indexes=None,
      learning_rate_optimizer=False, blocks=None, schedule_block="sequential",
      quantization_config=None, overwrite=True, debug=False, head_name=None,
      **tuner_kwargs):

    if not metrics:
      metrics = []

    if not custom_objects:
      custom_objects = {}

    # goal: { "type": ["bits", "energy"], "params": {...} }
    # For type == "bits":
    #   delta_p: increment (in %) of the accuracy if trial is smaller.
    #   delta_n: decrement (in %) of the accuracy if trial is bigger.
    #   rate: rate of decrease/increase in model size in terms of bits.
    #   input_bits; size of input tensors.
    #   output_bits; size of output tensors.
    #   stress: parameter to reduce reference size to force tuner to
    #     choose smaller models.
    #   config: configuration on what to compute for each layer
    #     minimum configuration is { "default": ["parameters", "activations"] }

    # use simplest one - number of bits
    if not goal:
      goal = {
          "type": "bits",
          "params": {
              "delta_p": 8.0,
              "delta_n": 8.0,
              "rate": 2.0,
              "stress": 1.0,
              "input_bits": 8,
              "output_bits": 8,
              "ref_bits": 8,
              "config": {
                  "default": ["parameters", "activations"]
              }
          }
      }

    self.target = forgiving_factor[goal["type"]](**goal["params"])

    self.model = model
    self.metrics = metrics
    self.custom_objects = custom_objects
    self.mode = mode
    self.transfer_weights = transfer_weights
    self.activation_bits = activation_bits
    self.limit = limit
    self.tune_filters = tune_filters
    self.tune_filters_exceptions = tune_filters_exceptions
    self.layer_indexes = layer_indexes
    self.learning_rate_optimizer = learning_rate_optimizer
    self.blocks = blocks
    self.schedule_block = schedule_block
    self.quantization_config = quantization_config
    self.tuner_kwargs = tuner_kwargs
    self.debug = debug
    self.head_name = head_name

    self.autoqk = None
    self.learning_rate = model.optimizer.lr.numpy()
    self.overwrite = overwrite

    assert self.schedule_block in ["sequential", "cost"]

    # right now we create unique results directory
    idx = 0
    name = output_dir
    if self.overwrite:
      while os.path.exists(name):
        idx += 1
        name = output_dir + "_" + str(idx)
    output_dir = name
    self.output_dir = output_dir
    self.next_block = self.get_next_block(overwrite)
    if self.next_block > 0:
      strategy = self.tuner_kwargs.get("distribution_strategy", None)
      if strategy:
        with strategy.scope():
          self.model = tf.keras.models.load_model(
              os.path.join(
                  self.output_dir, "model_block_" + str(self.next_block - 1)),
              custom_objects=self.custom_objects)
      else:
        self.model = tf.keras.models.load_model(
            os.path.join(
                self.output_dir, "model_block_" + str(self.next_block - 1)),
            custom_objects=self.custom_objects)
      print("Load model completed")

  def get_next_block(self, overwrite):
    """Get the next block id to be worked on."""
    if overwrite:
      return 0
    else:
      try:
        with tf.io.gfile.GFile(os.path.join(self.output_dir, "scheduler.json"),
                               "r") as f:
          scheduler_json = f.read()
        scheduler = json.loads(scheduler_json)
        return scheduler["next_block"]
      except:  # pylint: disable=bare-except
        return 0

  def get_limit(self, model, pattern):
    """Apply patterned group to limit to obtain new limit set."""
    limit = self.limit
    new_limit = {}
    new_pattern = collections.defaultdict(list)

    for layer_name in self.grouped_patterns[pattern]:
      layer = model.get_layer(layer_name)
      layer_class_name = layer.__class__.__name__

      target_quantizers = limit.get(layer_class_name, -1)
      for limit_pattern in limit:
        if re.match(limit_pattern, layer_name):
          target_quantizers = limit[limit_pattern]
          new_pattern[limit_pattern].append(layer_name)
          layer_name = limit_pattern
          break
      if target_quantizers != -1:
        new_limit[layer_name] = target_quantizers

    for key in new_pattern:
      # grouped pattern in regex need to be ^(word1|word2|...)$ instead of
      # ^word1|word2|...$; otherwise it cause non-exact match,
      # e.g., fc.*_0 and fc.*_0_relu were miss-matched
      new_key = "^" + "(" + "|".join(new_pattern[key]) + ")" + "$"
      new_limit[new_key] = new_limit[key]
      if new_key != key:
        del new_limit[key]

    return new_limit

  def fit(self, *fit_args, **fit_kwargs):
    """Invokes tuner fit algorithm."""

    self.history = []
    self.compute_block_costs(self.blocks, self.model)

    if self.tuner_kwargs.get("max_trials", None):
      max_trials = float(self.tuner_kwargs["max_trials"])

    lr = self.model.optimizer.lr.numpy()

    model = self.model

    frozen_layers = []

    for i, (pattern, cost) in enumerate(self.retrieve_max_block()):

      # now create new limit pattern
      if not self.overwrite:
        if i < self.next_block:
          print("Resume tuning. Skipping block ", i)
          continue

      print("... block cost: {:.0f} / {:.0f}".format(cost, self.reference_size))

      if self.tuner_kwargs.get("max_trials", None):
        self.tuner_kwargs["max_trials"] = int(
            max(10, max_trials * cost / self.reference_size))
        print("... adjusting max_trials for this block to {}".format(
            self.tuner_kwargs["max_trials"]))

      limit = self.get_limit(model, pattern)
      new_frozen_layers = self.grouped_patterns[pattern]

      # if dictionary is empty we did not match anything.
      # we have a bug in the patterns specified by the
      # user.

      assert limit

      print("Pattern {} is : {}".format(i, limit))

      if self.debug:
        frozen_layers = frozen_layers + new_frozen_layers
        continue

      self.autoqk = AutoQKeras(
          model, self.metrics,
          custom_objects=self.custom_objects,
          goal=self.target,
          output_dir=self.output_dir + "/" + str(i),
          mode=self.mode,
          transfer_weights=self.transfer_weights,
          frozen_layers=frozen_layers,
          activation_bits=self.activation_bits,
          limit=limit,
          tune_filters=self.tune_filters,
          tune_filters_exceptions=self.tune_filters_exceptions,
          layer_indexes=self.layer_indexes,
          learning_rate_optimizer=self.learning_rate_optimizer,
          quantization_config=self.quantization_config,
          overwrite=self.overwrite,
          head_name=self.head_name,
          **self.tuner_kwargs)

      self.autoqk.fit(*fit_args, **fit_kwargs)

      self.autoqk.tuner.results_summary()

      self.history.append(self.autoqk.history())

      model = self.autoqk.get_best_model()
      self.learning_rate = model.optimizer.lr.numpy()

      # restore learning rate
      # this is just a placeholder for the optimizer.

      model.compile(
          model.optimizer,
          loss=self.model.loss,
          metrics=self.model.metrics)

      frozen_layers = frozen_layers + new_frozen_layers

      filename = self.output_dir + "/model_block_" + str(i)
      model.save(filename)
      self.next_block = i + 1

      # update scheduler json
      with tf.io.gfile.GFile(os.path.join(self.output_dir, "scheduler.json"),
                             "w") as f:
        f.write(json.dumps({"next_block": self.next_block}))

    if self.debug:
      return

    self.best_model = model

    # make all layers trainable again
    for layer_name in frozen_layers:
      layer = model.get_layer(layer_name)
      layer.trainable = True

  def compute_block_costs(self, patterns, model):
    """Computes costs for each block."""

    # get block cost for original model
    self.reference_size = self.target.get_reference(model)
    self.model_size = self.target.get_reference_stats()

    # first group layers into the patterns

    groups = {pattern: [] for pattern in patterns}

    for layer_id, layer in enumerate(model.layers):
      if (
          self.layer_indexes is not None and
          layer_id not in self.layer_indexes
      ):
        continue

      for pattern in groups:
        if re.match(pattern, layer.name):
          groups[pattern].append(layer.name)

    self.grouped_patterns = groups

    # now compute cost for each group

    self.costs = []
    for pattern in patterns:  # self.grouped_patterns:
      total = 0
      for layer in self.grouped_patterns[pattern]:
        if layer in self.model_size:
          total += self.model_size[layer]["total"]
      self.costs.append((pattern, total))

    # the costs will be sorted by the total cost of the group
    if self.schedule_block == "cost":
      self.costs = sorted(self.costs, key=lambda cost_tuple: -cost_tuple[1])

  def retrieve_max_block(self):
    for cost in self.costs:
      yield cost

  def get_history(self):
    """Returns the history of the model search."""
    return self.history

  def get_best_model(self):
    """Returns the best model."""

    # check if we have run fit first.
    if not self.autoqk:
      return None

    self.autoqk.hypermodel.target.print_stats()
    print_qmodel_summary(self.best_model)

    return self.best_model

  def get_learning_rate(self):
    return self.learning_rate


================================================
FILE: qkeras/autoqkeras/examples/run/get_data.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Extracts sample dataset from tfds."""

import numpy as np
from tensorflow.keras.utils import to_categorical
import tensorflow_datasets as tfds


def get_data(dataset_name, fast=False):
  """Returns dataset from tfds."""
  ds_train = tfds.load(name=dataset_name, split="train", batch_size=-1)
  ds_test = tfds.load(name=dataset_name, split="test", batch_size=-1)

  dataset = tfds.as_numpy(ds_train)
  x_train, y_train = dataset["image"].astype(np.float32), dataset["label"]

  dataset = tfds.as_numpy(ds_test)
  x_test, y_test = dataset["image"].astype(np.float32), dataset["label"]

  if len(x_train.shape) == 3:
    x_train = x_train.reshape(x_train.shape + (1,))
    x_test = x_test.reshape(x_test.shape + (1,))

  x_train /= 256.0
  x_test /= 256.0

  x_mean = np.mean(x_train, axis=0)

  x_train -= x_mean
  x_test -= x_mean

  nb_classes = np.max(y_train) + 1
  y_train = to_categorical(y_train, nb_classes)
  y_test = to_categorical(y_test, nb_classes)

  print(x_train.shape[0], "train samples")
  print(x_test.shape[0], "test samples")

  if fast:
    i_train = np.arange(x_train.shape[0])
    np.random.shuffle(i_train)
    i_test = np.arange(x_test.shape[0])
    np.random.shuffle(i_test)

    s_x_train = x_train[i_train[0:fast]]
    s_y_train = y_train[i_train[0:fast]]
    s_x_test = x_test[i_test[0:fast]]
    s_y_test = y_test[i_test[0:fast]]
    return ((s_x_train, s_y_train), (x_train, y_train), (s_x_test, s_y_test),
            (x_test, y_test))
  else:
    return (x_train, y_train), (x_test, y_test)

if __name__ == "__main__":
  get_data("mnist")
  get_data("fashion_mnist")
  get_data("cifar10", fast=1000)
  get_data("cifar100")


================================================
FILE: qkeras/autoqkeras/examples/run/get_model.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from qkeras.autoqkeras.examples.run.networks import ConvBlockNetwork  # pylint: disable=line-too-long

def get_model(dataset):
  """Returns a model for the demo of AutoQKeras."""
  if dataset == "mnist":
    model = ConvBlockNetwork(
        shape=(28, 28, 1),
        nb_classes=10,
        kernel_size=3,
        filters=[16, 32, 48, 64, 128],
        dropout_rate=0.2,
        with_maxpooling=False,
        with_batchnorm=True,
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
    ).build()

  elif dataset == "fashion_mnist":
    model = ConvBlockNetwork(
        shape=(28, 28, 1),
        nb_classes=10,
        kernel_size=3,
        filters=[16, [32]*3, [64]*3],
        dropout_rate=0.2,
        with_maxpooling=True,
        with_batchnorm=True,
        use_separable="mobilenet",
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
        use_xnornet_trick=True
    ).build()

  elif dataset == "cifar10":
    model = ConvBlockNetwork(
        shape=(32, 32, 3),
        nb_classes=10,
        kernel_size=3,
        filters=[16, [32]*3, [64]*3, [128]*3],
        dropout_rate=0.2,
        with_maxpooling=True,
        with_batchnorm=True,
        use_separable="mobilenet",
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
        use_xnornet_trick=True
    ).build()

  elif dataset == "cifar100":
    model = ConvBlockNetwork(
        shape=(32, 32, 3),
        nb_classes=100,
        kernel_size=3,
        filters=[16, [32]*3, [64]*3, [128]*3, [256]*3],
        dropout_rate=0.2,
        with_maxpooling=True,
        with_batchnorm=True,
        use_separable="mobilenet",
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
        use_xnornet_trick=True
    ).build()

  model.summary()

  return model


================================================
FILE: qkeras/autoqkeras/examples/run/networks/__init__.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from .conv_block import ConvBlockNetwork


================================================
FILE: qkeras/autoqkeras/examples/run/networks/conv_block.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import os
from tensorflow.initializers import *  # pylint: disable=wildcard-import
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import DepthwiseConv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import *  # pylint: disable=wildcard-import
from qkeras import *  # pylint: disable=wildcard-import


class ConvBlockNetwork(object):
  """Creates Convolutional block type of network."""

  def __init__(
      self,
      shape,
      nb_classes,
      kernel_size,
      filters,
      dropout_rate=0.0,
      with_maxpooling=True,
      with_batchnorm=True,
      kernel_initializer="he_normal",
      bias_initializer="zeros",
      use_separable=False,
      use_xnornet_trick=False
  ):
    """Creates class.

    Args:
      shape: shape of inputs.
      nb_classes: number of output classes.
      kernel_size: kernel_size of network.
      filters: sizes of filters (if entry is a list, we create a block).
      dropout_rate: dropout rate if > 0.
      with_maxpooling: if true, use maxpooling.
      with_batchnorm: with BatchNormalization.
      kernel_initializer: kernel_initializer.
      bias_initializer: bias and beta initializer.
      use_separable: if "dsp", do conv's 1x3 + 3x1. If "mobilenet",
        use MobileNet separable convolution. If False or "none", perform single
        conv layer.
      use_xnornet_trick: use bn+act after max pool to enable binary
        to avoid saturation to largest value.
    """

    self.shape = shape
    self.nb_classes = nb_classes
    self.kernel_size = kernel_size
    self.filters = filters
    self.dropout_rate = dropout_rate
    self.with_maxpooling = with_maxpooling
    self.with_batchnorm = with_batchnorm
    self.kernel_initializer = kernel_initializer
    self.bias_initializer = bias_initializer
    self.use_separable = use_separable
    self.use_xnornet_trick = use_xnornet_trick

  def build(self):
    """Builds model."""
    x = x_in = Input(self.shape, name="input")
    for i in range(len(self.filters)):
      if len(self.filters) > 1:
        name_suffix_list = [str(i)]
      else:
        name_suffix_list = []
      if not isinstance(self.filters[i], list):
        filters = [self.filters[i]]
      else:
        filters = self.filters[i]
      for j in range(len(filters)):
        if len(filters) > 1:
          name_suffix = "_".join(name_suffix_list + [str(j)])
        else:
          name_suffix = "_".join(name_suffix_list)
        if self.use_separable == "dsp":
          kernels = [(1, self.kernel_size), (self.kernel_size, 1)]
        else:
          kernels = [(self.kernel_size, self.kernel_size)]
        for k, kernel in enumerate(kernels):
          strides = 1
          if (
              not self.with_maxpooling and j == len(filters)-1 and
              k == len(kernels)-1
          ):
            strides = 2
          if self.use_separable == "dsp":
            kernel_suffix = (
                "".join([str(k) for k in kernel]) + "_" + name_suffix)
          elif self.use_separable == "mobilenet":
            depth_suffix = (
                "".join([str(k) for k in kernel]) + "_" + name_suffix)
            kernel_suffix = "11_" + name_suffix
          else:
            kernel_suffix = name_suffix
          if self.use_separable == "mobilenet":
            x = DepthwiseConv2D(
                kernel,
                padding="same", strides=strides,
                use_bias=False,
                name="conv2d_dw_" + depth_suffix)(x)
            if self.with_batchnorm:
              x = BatchNormalization(name="conv2d_dw_bn_" + depth_suffix)(x)
            x = Activation("relu", name="conv2d_dw_act_" + depth_suffix)(x)
            kernel = (1, 1)
            strides = 1
          x = Conv2D(
              filters[j], kernel,
              strides=strides, use_bias=not self.with_batchnorm,
              padding="same",
              kernel_initializer=self.kernel_initializer,
              bias_initializer=self.bias_initializer,
              name="conv2d_" + kernel_suffix)(x)
          if not (
              self.with_maxpooling and self.use_xnornet_trick and
              j == len(filters)-1 and k == len(kernels)-1
          ):
            if self.with_batchnorm:
              x = BatchNormalization(
                  beta_initializer=self.bias_initializer,
                  name="bn_" + kernel_suffix)(x)
            x = Activation("relu", name="act_" + kernel_suffix)(x)
      if self.with_maxpooling:
        x = MaxPooling2D(2, 2, name="mp_" + name_suffix)(x)
        # this is a trick from xnornet to enable full binary or ternary
        # networks to be after maxpooling.
        if self.use_xnornet_trick:
          x = BatchNormalization(
              beta_initializer=self.bias_initializer,
              name="mp_bn_" + name_suffix)(x)
          x = Activation("relu", name="mp_act_" + name_suffix)(x)
      if self.dropout_rate > 0:
        x = Dropout(self.dropout_rate, name="drop_" + name_suffix)(x)

    if x.shape.as_list()[1] > 1:
      x = Flatten(name="flatten")(x)
      x = Dense(
          self.nb_classes,
          kernel_initializer=self.kernel_initializer,
          bias_initializer=self.bias_initializer,
          name="dense")(x)
      x = Activation("softmax", name="softmax")(x)
    else:
      x = Conv2D(
          self.nb_classes, 1, strides=1, padding="same",
          kernel_initializer=self.kernel_initializer,
          bias_initializer=self.bias_initializer,
          name="dense")(x)
      x = Activation("softmax", name="softmax")(x)
      x = Flatten(name="flatten")(x)

    model = Model(inputs=[x_in], outputs=[x])

    return model

if __name__ == "__main__":
  import os

  os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  os.environ["CUDA_VISIBLE_DEVICES"] = ""

  model = ConvBlockNetwork(
      shape=(64, 64, 1),
      nb_classes=10,
      kernel_size=3,
      filters=[16, [32]*3, 48, 64, 128],
      dropout_rate=0.0,
      with_maxpooling=False,
      with_batchnorm=True,
      use_separable="mobilenet",
      use_xnornet_trick=True
  ).build()

  model.summary()


================================================
FILE: qkeras/autoqkeras/examples/run/plot_history.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Plots history of runs when running in scheduler mode."""

import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

filenames = glob.glob("log_*.csv")
filenames.sort()

block_sizes = int(np.ceil(np.sqrt(len(filenames))))

for i in range(len(filenames)):
  history = pd.read_csv(filenames[i])
  title = "block_" + str(i)
  fig = plt.subplot(block_sizes, block_sizes, i + 1, title=title)
  ax1 = fig
  ax1.set_xlabel("trial")
  ax1.set_ylabel("score / accuracy")
  plt1 = ax1.plot(history["score"], "ro-", label="score")
  plt2 = ax1.plot(history["accuracy"], "go-", label="accuracy")
  plt3 = ax1.plot(history["val_accuracy"], "bo-", label="val_accuracy")

  ax2 = ax1.twinx()
  ax2.set_ylabel("energy", color="m")
  plt4 = ax2.plot(history["trial_size"], "mo-", label="trial_size")

  plts = plt1+plt2+plt3+plt4
  labs = [l.get_label() for l in plts]

  ax1.legend(plts, labs, loc=0)
plt.show()


================================================
FILE: qkeras/autoqkeras/forgiving_metrics/__init__.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from .forgiving_bits import ForgivingFactorBits
from .forgiving_energy import ForgivingFactorPower
from .forgiving_factor import ForgivingFactor

forgiving_factor = {
    "bits": ForgivingFactorBits,
    "energy": ForgivingFactorPower
}


================================================
FILE: qkeras/autoqkeras/forgiving_metrics/forgiving_bits.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements forgiving factor metrics bit model size in bits."""

import numpy as np
import six
from qkeras.autoqkeras.forgiving_metrics.forgiving_factor import ForgivingFactor   # pylint: disable=line-too-long
from qkeras import get_quantizer


class ForgivingFactorBits(ForgivingFactor):
  """Implements forgiving factor with target as number of bits."""

  def __init__(
      self, delta_p, delta_n, rate, stress=1.0,
      input_bits=8, output_bits=8, ref_bits=8, config=None):
    self.stress = stress
    self.input_bits = input_bits
    self.output_bits = output_bits
    self.ref_bits = ref_bits
    self.ref_size = {}
    self.config = config if config else {}

    super().__init__(delta_p, delta_n, rate)

  def _param_size(self, layer):
    """Computes size of parameters of a layer in bits."""
    t_size = self.ref_bits
    parameter_size = 0
    # we only compute parameter sizes for these layers, and BatchNormalization
    # is a special case because it exports mean and beta that is absorbed by
    # previous or next layer. As mean and beta will be compressed into a single
    # value, we actually only need to take care of the shape.
    if layer.__class__.__name__ in [
        "Dense", "Conv2D", "Conv1D", "DepthwiseConv2D"]:
      for w in layer.get_weights():
        parameter_size += t_size * np.prod(w.shape)
    elif layer.__class__.__name__ in [
        "QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D"]:
      for i, w in enumerate(layer.get_weights()):
        if layer.get_quantizers()[i]:
          bits = layer.get_quantizers()[i].bits
        else:
          bits = t_size
        parameter_size += bits * np.prod(w.shape)
    elif layer.__class__.__name__ in ["BatchNormalization"]:
      # scale
      index = -1
      parameter_size += t_size * np.prod(layer.get_weights()[index].shape)
      # center (bias)
      if layer.center:
        index = int(bool(layer.scale))
        parameter_size += t_size * np.prod(layer.get_weights()[index].shape)
    elif layer.__class__.__name__ in ["QBatchNormalization"]:
      # scale
      index = -1
      bits = 6
      parameter_size += bits * np.prod(layer.get_weights()[index].shape)
      # center (bias)
      if layer.center:
        bits = 5
        index = int(bool(layer.scale))
        parameter_size += bits * np.prod(layer.get_weights()[index].shape)
    return parameter_size

  def _act_size(self, layer):
    """Computes size of activations of a layer in bits."""
    i_size = self.input_bits
    o_size = self.output_bits
    t_size = self.ref_bits
    output_size = np.prod(layer.output.shape[1:])
    # we compute activation sizes for inputs and outputs
    if layer.__class__.__name__ in ["InputLayer"]:
      return i_size * output_size
    elif layer.__class__.__name__ in [
        "Dense", "Conv2D", "Conv1D", "DepthwiseConv2D"]:
      if layer.activation is not None and layer.activation.__name__ != "linear":
        return t_size * output_size
      else:
        return 0
    elif layer.__class__.__name__ in [
        "QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D"]:
      if layer.activation is None:
        is_softmax = False
        is_linear = False
      else:
        if isinstance(layer.activation, six.string_types):
          is_softmax = layer.activation == "softmax"
          is_linear = layer.activation == "linear"
        elif hasattr(layer.activation, "__name__"):
          is_softmax = layer.activation.__name__ == "softmax"
          is_linear = layer.activation.__name__ == "linear"
        else:
          is_softmax = False
          is_linear = False

        if is_softmax:
          bits = o_size
        elif is_linear:
          bits = 0
        else:
          assert not isinstance(layer.activation, six.string_types)
          if hasattr(layer.activation, "bits"):
            bits = layer.activation.bits
          else:
            bits = t_size

        return bits * np.prod(layer.output.shape.as_list()[1:])
    elif layer.__class__.__name__ in ["QActivation", "Activation"]:
      if isinstance(layer.activation, six.string_types):
        is_linear = layer.activation == "linear"
        is_softmax = layer.activation == "softmax"
        is_sigmoid = layer.activation == "sigmoid"
      else:
        is_linear = layer.activation.__name__ == "linear"
        is_softmax = layer.activation.__name__ == "softmax"
        is_sigmoid = layer.activation.__name__ == "sigmoid"

      if is_linear:
        bits = 0
      elif is_softmax or is_sigmoid:
        bits = o_size
      else:
        if isinstance(layer.activation, six.string_types):
          activation = get_quantizer(layer.activation)
        else:
          activation = layer.activation
        if hasattr(activation, "bits"):
          bits = activation.bits
        else:
          bits = t_size
      return bits * output_size
    return 0

  def compute_model_size(self, model):
    """Computes size of model."""

    a_size = 0
    p_size = 0
    total_size = 0
    model_size_dict = {}
    for layer in model.layers:
      layer_name = layer.__class__.__name__
      layer_config = self.config.get(
          layer_name, self.config.get("default", None))
      if layer_config:
        parameters = self._param_size(layer)
        activations = self._act_size(layer)
        p_weight = ("parameters" in layer_config)
        a_weight = ("activations" in layer_config)
        total = p_weight * parameters + a_weight * activations
        model_size_dict[layer.name] = {
            "parameters": parameters,
            "activations": activations,
            "total": total
        }
        a_size += a_weight * activations
        p_size += p_weight * parameters
        total_size += total

    return (total_size, p_size, a_size, model_size_dict)

  def get_reference(self, model):
    if not hasattr(self, "reference_size"):
      cached_result = self.compute_model_size(model)
      self.reference_size = cached_result[0] * self.stress
      self.ref_p = cached_result[1]
      self.ref_a = cached_result[2]
      self.reference_size_dict = cached_result[3]

    return self.reference_size

  def get_reference_stats(self):
    return self.reference_size_dict

  def get_trial(self, model):
    """Computes size of quantization trial."""

    result = self.compute_model_size(model)
    self.trial_size = result[0]
    self.total_p_bits = result[1]
    self.total_a_bits = result[2]
    self.trial_size_dict = result[3]

    return self.trial_size

  def get_total_factor(self):
    """we adjust the learning rate by size reduction."""
    ref_total = self.ref_a + self.ref_p
    trial_total = self.total_a_bits + self.total_p_bits
    return (trial_total - ref_total) / ref_total

  def print_stats(self):
    """Prints statistics of current model."""
    str_format = (
        "stats: delta_p={} delta_n={} rate={} trial_size={} reference_size={}\n"
        "       delta={:.2f}%"
    )

    print(
        str_format.format(
            self.delta_p, self.delta_n, self.rate, self.trial_size,
            int(self.reference_size), 100*self.delta())
    )

    a_percentage = np.round(
        100.0 * (self.total_a_bits - self.ref_a) / self.ref_a, 2)
    p_percentage = np.round(
        100.0 * (self.total_p_bits - self.ref_p) / self.ref_p, 2)
    ref_total = self.ref_a + self.ref_p
    trial_total = self.total_a_bits + self.total_p_bits
    total_percentage = np.round(
        100.0 * (trial_total - ref_total) / ref_total, 2)

    print(
        (
            "       a_bits={}/{} ({:.2f}%) p_bits={}/{} ({:.2f}%)\n"
            "       total={}/{} ({:.2f}%)"
        ).format(
            int(self.total_a_bits), int(self.ref_a), a_percentage,
            int(self.total_p_bits), int(self.ref_p), p_percentage,
            int(trial_total), int(ref_total), total_percentage
        ))


================================================
FILE: qkeras/autoqkeras/forgiving_metrics/forgiving_energy.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements forgiving factor metrics for energy consumption."""

import json
import numpy as np
from qkeras.autoqkeras.forgiving_metrics.forgiving_factor import ForgivingFactor   # pylint: disable=line-too-long
from qkeras.qtools import run_qtools
from qkeras.qtools import settings as qtools_settings


class ForgivingFactorPower(ForgivingFactor):
  """Get Power cost of a given model."""

  def __init__(self, delta_p, delta_n, rate, stress=1.0, **kwargs):

    # input parameters:
    # delta_p, delta_n, rate: same as parent class
    # stress: stress level to shift reference curve
    # process: technology process to use in configuration (horowitz, ...)
    # parameters_on_memory: whether to store parameters in dram, sram, or fixed
    # activations_on_memory: store activations in dram, sram
    # min_sram_size: minimum sram size in number of bits
    # rd_wr_on_io: whether load data from dram to sram (consider sram as a cache
    #   for dram. If false, we will assume data will be already in SRAM
    # config_json: if None, use qtools/config_json by default
    #   define default source quantizers;
    #   default quantizers for intermediate variables if no quantizer provided
    #   parameters for energy calculation
    # source_quantizers: quantizer for model input
    # trained_model: whether model has been trained already, which is
    #   needed to compute tighter bounds for qBatchNorm Power estimation.
    # reference_internal: size to use for weight/bias/activation in
    #   get_reference energy calculation (int8, fp16, fp32)
    # reference_accumulator: accumulator and multiplier type in get_reference
    #   energy calculation
    # keras_layer_quantizer: quantizer for keras layers in hybrid models

    super().__init__(delta_p, delta_n, rate)

    self.stress = stress
    # process: horowitz... - must be present in config_json
    self.process = kwargs.get("process", "horowitz")
    # parameters_on_memory: fixed, sram, dram
    self.parameters_on_memory = kwargs.get(
        "parameters_on_memory", ["fixed"] * 2)
    # activations_on_memory: sram, dram
    self.activations_on_memory = kwargs.get(
        "activations_on_memory", ["dram"] * 2
    )
    self.min_sram_size = kwargs.get("min_sram_size", [0] * 2)
    # rd_wr_on_io: true/false
    self.rd_wr_on_io = kwargs.get("rd_wr_on_io", [True] * 2)
    self.config_json = kwargs.get("config_json", None)
    self.source_quantizers = kwargs.get("source_quantizers", None)
    # trained_model: true/false
    self.trained_model = kwargs.get("trained_model", False)
    # reference_internal: int8, fp16, fp32
    self.reference_internal = kwargs.get("reference_internal", "fp32")
    # reference_internal: int8, int16, int32, fp16, fp32
    self.reference_accumulator = kwargs.get("reference_accumulator", "fp32")

    self.reference_size = None

    # energy_dict is a dictionary that lists energy consumption for each layer
    # format:
    #  {
    #     "layer0_name":
    #     {
    #        "mem_cost": 148171,
    #        "op_cost": 0
    #     },
    #     "layer1_name":
    #     {
    #         "mem_cost": 179923,
    #         "op_cost": 34
    #     },
    #     ...
    #
    #     "total_cost": 328129
    #  }

    self.ref_energy_dict = None
    self.trial_energy_dict = None

    assert self.parameters_on_memory[0] in ["dram", "sram", "fixed"]
    assert self.parameters_on_memory[1] in ["dram", "sram", "fixed"]
    assert self.activations_on_memory[0] in ["dram", "sram", "fixed"]
    assert self.activations_on_memory[1] in ["dram", "sram", "fixed"]
    assert self.reference_internal in ["fp16", "fp32", "int8"]
    assert self.reference_accumulator in ["int16", "int32", "fp16", "fp32"]

  def get_reference(self, model):
    # we only want to compute reference once
    if self.reference_size is not None:
      return self.reference_size * self.stress

    q = run_qtools.QTools(
        model, process=self.process,
        source_quantizers=self.reference_internal,
        is_inference=self.trained_model,
        weights_path=None,
        keras_quantizer=self.reference_internal,
        keras_accumulator=self.reference_accumulator,
        for_reference=True)

    energy_dict = q.pe(
        weights_on_memory=self.parameters_on_memory[0],
        activations_on_memory=self.activations_on_memory[0],
        min_sram_size=self.min_sram_size[0],
        rd_wr_on_io=self.rd_wr_on_io[0])

    self.ref_energy_dict = energy_dict
    self.reference_size = q.extract_energy_sum(
        qtools_settings.cfg.include_energy, energy_dict)

    self.reference_energy_profile = q.extract_energy_profile(
        qtools_settings.cfg.include_energy, energy_dict)

    return self.reference_size * self.stress

  def get_trial(self, model):
    """Computes size of quantization trial."""

    q = run_qtools.QTools(
        model, process=self.process,
        source_quantizers=self.source_quantizers,
        is_inference=self.trained_model,
        weights_path=None,
        keras_quantizer=self.reference_internal,
        keras_accumulator=self.reference_accumulator,
        for_reference=False)

    energy_dict = q.pe(
        weights_on_memory=self.parameters_on_memory[1],
        activations_on_memory=self.activations_on_memory[1],
        min_sram_size=self.min_sram_size[1],
        rd_wr_on_io=self.rd_wr_on_io[1])

    self.trial_energy_dict = energy_dict
    # self.trial_size = energy_dict["total_cost"]
    self.trial_size = q.extract_energy_sum(
        qtools_settings.cfg.include_energy, energy_dict)

    self.trial_energy_profile = q.extract_energy_profile(
        qtools_settings.cfg.include_energy, energy_dict)

    return self.trial_size

  def get_total_factor(self):
    """we adjust the learning rate by size reduction."""
    return (self.trial_size - self.reference_size) / self.reference_size

  def get_reference_stats(self):
    return self.reference_energy_profile

  def get_trial_stats(self):
    return self.trial_energy_profile

  def print_stats(self, verbosity=0):
    """Prints statistics of current model."""

    delta = self.delta()

    if (self.ref_energy_dict and self.trial_energy_dict):
      str_format = (
          "stats: delta_p={} delta_n={} rate={} trial_size={} "
          "reference_size={}\n"
          "       delta={:.2f}%"
      )

      print(
          str_format.format(
              self.delta_p, self.delta_n, self.rate, self.trial_size,
              int(self.reference_size), 100 * delta)
      )

    if verbosity > 0 and self.ref_energy_dict:
      print("Reference Cost Distribution:")
      dict_to_json = json.dumps(self.ref_energy_dict, indent=4)
      print(dict_to_json)

    if verbosity > 0 and self.trial_energy_dict:
      print("Trial Cost Distribution:")
      dict_to_json = json.dumps(self.trial_energy_dict, indent=4)
      print(dict_to_json)

    if (self.ref_energy_dict and self.trial_energy_dict):
      print("Total Cost Reduction:")
      reduction_percentage = np.round(
          100.0 * (self.trial_size - self.reference_size) /
          self.reference_size, 2)

      print(
          ("       {} vs {} ({:.2f}%)").format(
              int(self.trial_size), int(self.reference_size),
              reduction_percentage
          ))


================================================
FILE: qkeras/autoqkeras/forgiving_metrics/forgiving_factor.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements forgiving factor metrics."""

import numpy as np


class ForgivingFactor:
  """Base class. Should never be invoked."""

  def __init__(self, delta_p, delta_n, rate):
    self.delta_p = np.float32(delta_p) / 100.0
    self.delta_n = np.float32(delta_n) / 100.0
    self.rate = np.float32(rate)

  def get_reference(self, model):
    """Computes reference size of model."""

    raise Exception("class not implemented.")

  def get_trial(self, model, schema):
    """Computes size of quantization trial."""

    raise Exception("class not implemented.")

  def delta(self):
    return np.where(
        self.trial_size < self.reference_size,
        self.delta_p * (np.log(self.reference_size/self.trial_size) /
                        np.log(self.rate)),
        self.delta_n * (np.log(self.reference_size/self.trial_size) /
                        np.log(self.rate)))


================================================
FILE: qkeras/autoqkeras/quantization_config.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Definition of default quantization configuration."""

default_quantization_config = {
    "kernel": {
        "binary": 1,
        "stochastic_binary": 1,
        "ternary": 2,
        "stochastic_ternary": 2,
        "quantized_bits(2,1,1,alpha=1.0)": 2,
        "quantized_bits(4,0,1)": 4,
        "quantized_bits(8,0,1)": 8,
        "quantized_po2(4,1)": 4
    },
    "bias": {
        "quantized_bits(4,0,1)": 4,
        "quantized_bits(8,3,1)": 8,
        "quantized_po2(4,8)": 4
    },
    "activation": {
        "binary": 1,
        "binary(alpha='auto_po2')": 1,
        "ternary": 2,
        "quantized_relu(3,1)": 3,
        "quantized_relu(4,2)": 4,
        "quantized_relu(8,2)": 8,
        "quantized_relu(8,4)": 8,
        "quantized_relu(16,8)": 16,
        "quantized_relu_po2(4,4)": 4
    },
    "linear": {
        "binary": 1,
        "ternary": 2,
        "quantized_bits(4,1)": 4,
        "quantized_bits(8,2)": 8,
        "quantized_bits(16,10)": 16,
        "quantized_po2(6,4)": 6
    }
}


================================================
FILE: qkeras/autoqkeras/tests/test_forgiving_factor.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import pytest
from tensorflow.keras.layers import *   # pylint: disable=wildcard-import
from tensorflow.keras.models import Model
from qkeras import *   # pylint: disable=wildcard-import
from qkeras.autoqkeras.forgiving_metrics import ForgivingFactorBits   # pylint: disable=line-too-long
from qkeras.utils import model_quantize


def get_model():
  """Returns sample model."""
  xi = Input((28, 28, 1), name="input")   # pylint: disable=undefined-variable
  x = Conv2D(32, 3, strides=1, padding="same", name="c1")(xi)   # pylint: disable=undefined-variable
  x = BatchNormalization(name="b1")(x)   # pylint: disable=undefined-variable
  x = Activation("relu", name="a1")(x)   # pylint: disable=undefined-variable
  x = MaxPooling2D(2, 2, name="mp1")(x)   # pylint: disable=undefined-variable
  x = QConv2D(32, 3, kernel_quantizer="binary", bias_quantizer="binary",   # pylint: disable=undefined-variable
              strides=1, padding="same", name="c2")(x)
  x = QBatchNormalization(name="b2")(x)   # pylint: disable=undefined-variable
  x = QActivation("binary", name="a2")(x)   # pylint: disable=undefined-variable
  x = MaxPooling2D(2, 2, name="mp2")(x)   # pylint: disable=undefined-variable
  x = QConv2D(32, 3, kernel_quantizer="ternary", bias_quantizer="ternary",   # pylint: disable=undefined-variable
              strides=1, padding="same", activation="binary", name="c3")(x)
  x = Flatten(name="flatten")(x)   # pylint: disable=undefined-variable
  x = Dense(1, name="dense", activation="softmax")(x)   # pylint: disable=undefined-variable

  model = Model(inputs=xi, outputs=x)

  return model


def test_forgiving_factor_bits():
  """Tests forgiving factor bits."""
  delta_p = 8.0
  delta_n = 8.0
  rate = 2.0
  stress = 1.0
  input_bits = 8
  output_bits = 8
  ref_bits = 8

  config = {
      "QDense": ["parameters", "activations"],
      "Dense": ["parameters", "activations"],
      "QConv2D": ["parameters", "activations"],
      "Conv2D": ["parameters", "activations"],
      "DepthwiseConv2D": ["parameters", "activations"],
      "QDepthwiseConv2D": ["parameters", "activations"],
      "Activation": ["activations"],
      "QActivation": ["activations"],
      "QBatchNormalization": ["parameters"],
      "BatchNormalization": ["parameters"],
      "default": ["activations"],
  }

  model = get_model()

  ffb = ForgivingFactorBits(
      delta_p, delta_n, rate, stress,
      input_bits, output_bits, ref_bits,
      config
  )

  cached_result = ffb.compute_model_size(model)
  ref_size = cached_result[0]
  ref_p = cached_result[1]
  ref_a = cached_result[2]

  assert ref_size == 258544
  assert ref_p == 43720
  assert ref_a == 214824


def test_new_forgiving_factor():
  """Tests forgiving factor."""
  delta_p = 8.0
  delta_n = 8.0
  rate = 2.0
  stress = 1.0
  input_bits = 8
  output_bits = 8
  ref_bits = 8

  config = {
      "QDense": ["parameters", "activations"],
      "Dense": ["parameters", "activations"],
      "QConv2D": ["parameters", "activations"],
      "Conv2D": ["parameters", "activations"],
      "DepthwiseConv2D": ["parameters", "activations"],
      "QDepthwiseConv2D": ["parameters", "activations"],
      "Activation": ["activations"],
      "QActivation": ["activations"],
      "QBatchNormalization": ["parameters"],
      "BatchNormalization": ["parameters"],
      "default": ["activations"]
  }

  model = get_model()

  model.use_legacy_config = True

  ffb = ForgivingFactorBits(
      delta_p, delta_n, rate, stress,
      input_bits, output_bits, ref_bits,
      config
  )

  cached_result = ffb.compute_model_size(model)
  ref_size = cached_result[0]
  ref_p = cached_result[1]
  ref_a = cached_result[2]
  ref_size_dict = cached_result[3]

  assert ref_size == 258544
  assert ref_p == 43720
  assert ref_a == 214824

  q_dict = {
      "c1": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "quantized_bits(4)"
      }
  }

  q_model = model_quantize(model, q_dict, 4)

  cached_result = ffb.compute_model_size(q_model)
  trial_size_dict = cached_result[3]

  for name in trial_size_dict:
    if name != "c1":
      assert trial_size_dict[name] == ref_size_dict[name]
  assert trial_size_dict["c1"]["parameters"] == 416

if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: qkeras/autoqkeras/utils.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements utility functions for support of auto-quantization."""

import json
import tensorflow as tf


Q_SEQUENCE_LAYERS = ["QSimpleRNN", "QLSTM", "QGRU", "QBidirectional"]

def print_qmodel_summary(q_model):
  """Prints quantized model summary."""

  for layer in q_model.layers:
    if (layer.__class__.__name__ == "QActivation" or
        layer.__class__.__name__ == "QAdaptiveActivation"):
      print("{:20} {}".format(layer.name, str(layer.activation)))
    elif (
        hasattr(layer, "get_quantizers") and
        layer.__class__.__name__ != "QBatchNormalization"
    ):
      print("{:20} ".format(layer.name), end="")
      if "Dense" in layer.__class__.__name__:
        print("u={} ".format(layer.units), end="")
      elif layer.__class__.__name__ in [
          "Conv2D", "QConv2D", "Conv1D", "QConv1D",
          "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
        print("f={} ".format(layer.filters), end="")
      quantizers = layer.get_quantizers()
      for q in range(len(quantizers)):
        if quantizers[q] is not None:
          print("{} ".format(str(quantizers[q])), end="")
      if hasattr(layer, "recurrent_activation"):
        print("recurrent act={}".format(layer.recurrent_activation), end="")
      if (
          layer.activation is not None and
          not (
              hasattr(layer.activation, "__name__") and
              layer.activation.__name__ == "linear"
          )
      ):
        print("act={}".format(layer.activation), end="")
      print()
    elif layer.__class__.__name__ == "QBatchNormalization":
      print("{:20} QBN, mean={}".format(layer.name,
          str(tf.keras.backend.eval(layer.moving_mean))), end="")
      print()
    elif layer.__class__.__name__ == "BatchNormalization":
      print("{:20} is normal keras bn layer".format(layer.name), end="")
      print()

  print()


def get_quantization_dictionary(q_model):
  """Returns quantization dictionary."""

  q_dict = {}
  for layer in q_model.layers:
    if hasattr(layer, "get_quantization_config"):
      q_dict[layer.name] = layer.get_quantization_config()

  return q_dict


def save_quantization_dict(fn, q_model):
  """Saves quantization dictionary as json object in disk."""
  q_dict = get_quantization_dictionary(q_model)
  json_dict = json.dumps(q_dict)

  f = open(fn, "w")
  f.write(json_dict + "\n")
  f.close()


================================================
FILE: qkeras/b2t.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements total/partial Binary to Thermometer decoder."""

from tensorflow.keras.utils import to_categorical
import numpy as np


def BinaryToThermometer(
    x, classes, value_range, with_residue=False, merge_with_channels=False,
    use_two_hot_encoding=False):

  """Converts binary to one-hot (with scales).

  Given input matrix x with values (for example) 0, 1, 2, 3, 4, 5, 6, 7, create
  a number of classes as follows:

  classes=2, value_range=8, with_residue=0

  A true one-hot representation, and the remaining bits are truncated, using
  one bit representation.

  0 - [1,0] 1 - [1,0] 2 - [1,0] 3 - [1,0]
  4 - [0,1] 5 - [0,1] 6 - [0,1] 7 - [0,1]

  classes=2, value_range=8, with_residue=1

  In this case, the residue is added to the one-hot class, and the class will
  use 2 bits (for the remainder) + 1 bit (for the one hot)

  0 - [1,0] 1 - [1.25,0] 2 - [1.5,0] 3 - [1.75,0]
  4 - [0,1] 5 - [0,1.25] 6 - [0,1.5] 7 - [0,1.75]

  Arguments:
    x: the input vector we want to convert. typically its dimension will be
      (B,H,W,C) for an image, or (B,T,C) or (B,C) for for a 1D signal, where
      B=batch, H=height, W=width, C=channels or features, T=time for time
      series.
    classes: the number of classes to (or log2(classes) bits) to use of the
      values.
    value_range: max(x) - min(x) over all possible x values (e.g. for 8 bits,
      we would use 256 here).
    with_residue: if true, we split the value range into two sets and add
      the decimal fraction of the set to the one-hot representation for partial
      thermometer representation.
    merge_with_channels: if True, we will not create a separate dimension
      for the resulting matrix, but we will merge this dimension with
      the last dimension.
    use_two_hot_encoding: if true, we will distribute the weight between
      the current value and the next one to make sure the numbers will always
      be < 1.

  Returns:
    Converted x with classes with the last shape being C*classes.

  """

  # just make sure we are processing floats so that we can compute fractional
  # values

  x = x.astype(np.float32)

  # the number of ranges are equal to the span of the original values
  # divided by the number of target classes.
  #
  # for example, if value_range is 256 and number of classes is 16, we have
  # 16 values (remaining 4 bits to redistribute).

  ranges = value_range/classes
  x_floor = np.floor(x / ranges)

  if use_two_hot_encoding:
    x_ceil = np.ceil(x / ranges)

  if with_residue:
    x_mod_f = (x - x_floor * ranges) / ranges

  # convert values to categorical. if use_two_hot_encoding, we may
  # end up with one more class because we need to distribute the
  # remaining bits to the saturation class. For example, if we have
  # value_range = 4 (0,1,2,3) and classes = 2, if we use_two_hot_encoding
  # we will have the classes 0, 1, 2, where for the number 3, we will
  # allocate 0.5 to bin 1 and 0.5 to bin 2 (namelly 3 = 0.5 * (2**2 + 2**1)).

  xc_f = to_categorical(x_floor, classes + use_two_hot_encoding)

  if with_residue:
    xc_f_m = xc_f == 1

    if use_two_hot_encoding:
      xc_c = to_categorical(x_ceil, classes + use_two_hot_encoding)
      xc_c_m = xc_c == 1
      if np.any(xc_c_m):
        xc_c[xc_c_m] = x_mod_f.reshape(xc_c[xc_c_m].shape)
      if np.any(xc_f_m):
        xc_f[xc_f_m] = (1.0 - x_mod_f.reshape(xc_f[xc_f_m].shape))
      xc_f += xc_c
    else:
      if np.any(xc_f_m):
        xc_f[xc_f_m] += x_mod_f.reshape(xc_f[xc_f_m].shape)

  if merge_with_channels and len(xc_f.shape) != len(x.shape):
    sz = xc_f.shape
    sz = sz[:-2] + (sz[-2] * sz[-1],)
    xc_f = xc_f.reshape(sz)

  return xc_f


================================================
FILE: qkeras/base_quantizer.py
================================================
# Copyright 2025 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import tensorflow.compat.v2 as tf
import tensorflow.keras.backend as K


def _create_variable_name(attr_name, var_name=None):
  """Creates variable name.

  Arguments:
    attr_name: string. attribute name
    var_name: string. variable name

  Returns:
    string. variable name
  """

  if var_name:
    return var_name + "/" + attr_name

  # This naming scheme is to solve a problem of a layer having more than
  # one quantizer can have multiple qnoise_factor variables with the same
  # name of "qnoise_factor".
  return attr_name + "_" + str(K.get_uid(attr_name))


class BaseQuantizer(tf.Module):
  """Base quantizer.

  Defines behavior all quantizers should follow.
  """

  def __init__(self):
    self.built = False

  def build(self, var_name=None, use_variables=False):
    if use_variables:
      if hasattr(self, "qnoise_factor"):
        self.qnoise_factor = tf.Variable(
            lambda: tf.constant(self.qnoise_factor, dtype=tf.float32),
            name=_create_variable_name("qnoise_factor", var_name=var_name),
            dtype=tf.float32,
            trainable=False,
        )
    self.built = True

  def _set_trainable_parameter(self):
    pass

  def update_qnoise_factor(self, qnoise_factor):
    """Update qnoise_factor."""
    if isinstance(self.qnoise_factor, tf.Variable):
      # self.qnoise_factor is a tf.Variable.
      # This is to update self.qnoise_factor during training.
      self.qnoise_factor.assign(qnoise_factor)
    else:
      if isinstance(qnoise_factor, tf.Variable):
        # self.qnoise_factor is a numpy variable, and qnoise_factor is a
        # tf.Variable.
        self.qnoise_factor = qnoise_factor.eval()
      else:
        # self.qnoise_factor and qnoise_factor are numpy variables.
        # This is to set self.qnoise_factor before building
        # (creating tf.Variable) it.
        self.qnoise_factor = qnoise_factor

  # Override not to expose the quantizer variables.
  @property
  def variables(self):
    return ()

  # Override not to expose the quantizer variables.
  @property
  def trainable_variables(self):
    return ()

  # Override not to expose the quantizer variables.
  @property
  def non_trainable_variables(self):
    return ()


================================================
FILE: qkeras/bn_folding_utils.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for folding batchnorm with qconv/qdense layers."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import networkx as nx
import tensorflow as tf
from six.moves import range
from tensorflow.keras.models import clone_model
from tensorflow.keras.models import Model
from tensorflow.keras import Input

from .qconvolutional import QConv2D
from .qconvolutional import QDepthwiseConv2D
from .qtools import generate_layer_data_type_map as gen_map
from .qtools import qgraph


def convert_folded_layer_to_unfolded(layer):
  """Replace a source batchnorm folded layer with a non-folded layer.

  Args:
    layer: keras/qkeras layer type. Source layer to be replaced with

  Returns:
    new layer instance
  """

  # get layer config from the composite layer
  config = layer.get_config()
  # set layer config for QConv2D layer by first creating a tmp
  # QConv2D object and generate template for its config
  if layer.__class__.__name__ == "QConv2DBatchnorm":
    new_layer = QConv2D(filters=1, kernel_size=(2, 2), use_bias=True)
  elif layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm":
    new_layer = QDepthwiseConv2D(kernel_size=(2, 2), use_bias=True)
  else:
    # TODO(lishanok): will extend to QDense in the future
    assert ValueError, "%s is not supported!" % layer.__class__.__name__

  new_layer_cfg = new_layer.get_config()

  # set qconv2d config according to the values in the composite layer
  for (key, _) in new_layer_cfg.items():
    if key in config.keys():
      new_layer_cfg[key] = config[key]

  # in case use_bias is False in the composite layer,
  #  we need to set it True because we have folded bias
  new_layer_cfg["use_bias"] = True

  # create a non-folded, e.g., qconv2d layer from config and replace
  # old layer with it
  if layer.__class__.__name__ == "QConv2DBatchnorm":
    new_layer = QConv2D.from_config(new_layer_cfg)
  elif layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm":
    new_layer = QDepthwiseConv2D.from_config(new_layer_cfg)
  else:
    raise ValueError("Unsupported layer conversion {}".format(layer.name))

  return new_layer


def unfold_model(model):
  """Convert a model with batchnorm folded layer to a normal model.

  "Normal" here refers to a model without composite folded layer such as
  QConv2DBatchnorm layer.
  This function replace the folded layers with a normal QConv/QDense
  layer. It aslo sets the weights in the normal layer with the folded weights
  in the folded layer. Model architecture could be either sequential or
  non-sequential.

  Arguments:
    model: keras object, model with folded layers.

  Returns:
    A model that replaces folded layers (e.g., QConv2DBatchnorm) with normal
      qkeras layers (e.g., QConv2D). This model can be passed on to hardware
      generator so that hardware doesn't see batch normalization
      parameters.
  """

  def _convert_folded_layer(layer):
    if layer.__class__.__name__ in [
        "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
      new_layer = convert_folded_layer_to_unfolded(layer)
    else:
      new_layer = layer.__class__.from_config(layer.get_config())

    new_layer.build(layer.input_shape)
    return new_layer

  def _clone_weights(src_layer, new_layer):
    if (src_layer.__class__.__name__ == "QConv2DBatchnorm") and (
        new_layer.__class__.__name__ == "QConv2D"):
      src_weights = src_layer.get_folded_weights()
      # transfer weights from folded layer to the target layer
      folded_kernel_quantized = (
          src_weights[0].numpy())
      folded_bias_quantized = (
          src_weights[1].numpy())
      new_layer.set_weights([folded_kernel_quantized, folded_bias_quantized])

    elif (src_layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm") and (
        new_layer.__class__.__name__ == "QDepthwiseConv2D"):
      # transfer weights from folded layer to the target layer
      src_weights = src_layer.get_folded_weights()
      folded_depthwise_kernel_quantized = src_weights[0].numpy()
      folded_bias_quantized = src_weights[1].numpy()
      new_layer.set_weights(
          [folded_depthwise_kernel_quantized, folded_bias_quantized])
    else:
      new_layer.set_weights(src_layer.get_weights())

  inp = Input(shape=model.input_shape[1:])
  cloned_model = clone_model(
      model, input_tensors=inp, clone_function=_convert_folded_layer)

  # replace weights
  for (src_layer, new_layer) in zip(model.layers, cloned_model.layers):
    _clone_weights(src_layer, new_layer)

  return cloned_model


def populate_bias_quantizer_from_accumulator(model, source_quantizers):
  """Populate the bias quantizer from accumulator type.

  When user set bias_quantizer=None for layers(e.g.,
  QConv2DBatchnorm), this function generates the accumulator type of
  the layer MAC op and set it as the bias quantizer.
  Such step is skipped if user provided a specific bias quantizer type.

  Args:
    model: keras/qkeras model object. If the model doesn't contain any batchnorm
      folded layer or if the bias quanizer type in the folded layer is already
      given, no operation needed. Else we generate the bias quantizer type and
      set it in model.

    source_quantizers: list of qkeras quantizers. A list of quantizer types
      for model inputs.

  Returns:
    keras model object
  """
  default_quantizer = "quantized_bits(8, 0, 1)"

  # if source_quantizers is None, CreateGraph will use default_quantizer
  (graph, source_quantizer_list) = qgraph.CreateGraph(
      model, source_quantizers, default_quantizer)
  qgraph.GraphPropagateActivationsToEdges(graph)

  # generate the quantizer types of each layer. For folded layers, if bias
  # quantizer is not given by user, this function will generate the accumulator
  # type and set it as the bias quantizer type.
  is_inference = False
  keras_quantizer = "quantized_bits(8, 0, 1)"
  keras_accumulator = "quantized_bits(8, 0, 1)"
  for_reference = False
  layer_map = gen_map.generate_layer_data_type_map(
      graph, source_quantizer_list, is_inference,
      keras_quantizer, keras_accumulator, for_reference)

  for layer in model.layers:
    # TODO(lishanok): extend to other layer types if necessary
    if layer.__class__.__name__ in [
        "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
      if not layer.bias_quantizer:
        # if user didn't specify the bias quantizer, we set it as the
        # MAC accumulator type of the current layer's MAC operation
        qtools_bias_quantizer = layer_map["layer_data_type_map"][
            layer].bias_quantizer

        if tf.is_tensor(qtools_bias_quantizer.int_bits):
          qtools_bias_quantizer.int_bits = (
              qtools_bias_quantizer.int_bits.numpy())

        layer.bias_quantizer = (
            qtools_bias_quantizer.convert_to_qkeras_quantizer())

        layer.bias_quantizer_internal = layer.bias_quantizer
        if layer.__class__.__name__ == "QConv2DBatchnorm":
          layer.quantizers = [layer.kernel_quantizer_internal,
                              layer.bias_quantizer_internal]
        elif layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm":
          layer.quantizers = [layer.depthwise_quantizer_internal,
                              layer.bias_quantizer_internal]
  return model


================================================
FILE: qkeras/callbacks.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

import os
import tensorflow as tf


class QNoiseScheduler(tf.keras.callbacks.Callback):
  """Schedules the gradual quantization noise training for each step (or epoch).

     It updates the qnoise_factor in the quantizers to gradually introduce the
     quantization noise during training.

     The idea was adopted from "https://arxiv.org/pdf/1903.01061.pdf"
  """

  def __init__(self,
               start,
               finish,
               freq_type="epoch",
               update_freq=1,
               initial_step_or_epoch=0,
               exponent=3.0,
               use_ste=True,
               log_dir=None):
    """Initializes this QNoiseScheduler.

    Args:
      start: Int. The step (epoch) to start the gradual training.
      finish: Int. The step (epoch) to finish the gradual training. When the
        start and the finish are equal, the qnoise_factor will be 1.0 in the
        beginning of the training.
      freq_type: Str. "step" or "epoch". It sets the qnoise_factor update
        frequency type.
      update_freq: Int. Updating frequency of the qnoise_factor.
      initial_step_or_epoch:  Int. Step or epoch at which to start training.
      exponent: Float. It is the exponent in the qnoise_factor calculation. It
        controls the rate of the gradual qnoise_factor change.
      use_ste: Bool. Whether to use "straight-through estimator" (STE) method or
        not.
      log_dir: Str. log directory to save qnoise_factor every epoch end.
    """
    super().__init__()

    self.start = start
    self.finish = finish
    if start > finish:
      raise ValueError(
          ("start {} must be greater than finish {}").format(start, finish))
    supported_freq_type = ["step", "epoch"]
    if freq_type not in supported_freq_type:
      raise ValueError(("Invalid frequency type {}. only {} are "
                        "supported.").format(freq_type, supported_freq_type))
    self.freq_type = freq_type
    self.update_freq = update_freq
    self.initial_step_or_epoch = initial_step_or_epoch
    self.exponent = exponent
    self.qnoise_factor = None
    self.use_ste = use_ste
    self.quantizers = None
    self.summary_writer = None
    if log_dir:
      self.summary_writer = tf.summary.create_file_writer(log_dir)
    self.num_iters = np.array(0, dtype="int64")

  def calculate_qnoise_factor(self, freq):
    """Returns calculated qnoise_factor based on the current step (epoch) and
    the schedule parameters.

    Args:
      freq: The current step (or epoch) to calculate the qnoise_factor.

    Returns:
      qnoise_factor : calculated qnoise_factor.
    """
    if freq < self.start:
      qnoise_factor = 0.0
    elif freq <= self.finish and self.start != self.finish:
      val = float(self.finish - freq) / float(self.finish - self.start)
      qnoise_factor = 1.0 - np.power(val, self.exponent)
    else:
      qnoise_factor = 1.0

    return qnoise_factor

  def set_qnoise_factor(self, quantizer, qnoise_factor):
    """Set self.qnoise_factor and update the qnoise_factor of the quantizer."""

    # Updating the qnoise_factor of the quantizer.
    quantizer.update_qnoise_factor(qnoise_factor)
    # Updating the qnoise_factor of the callback.
    self.qnoise_factor = qnoise_factor

  def set_quantizers(self):
    """Set quantizers to update the qnoise_factor.

    This must be called before building the quantizers.
    """
    for quantizer in self.quantizers:
      if hasattr(quantizer, "use_ste"):
        quantizer.use_ste = self.use_ste
      if hasattr(quantizer, "use_variables"):
        quantizer.use_variables = True
      if hasattr(quantizer, "built"):
        # If the quantizer has been built but not using tf.Variable then it
        # builds again to create tf.Variables.
        if quantizer.built and not isinstance(quantizer.qnoise_factor,
                                                 tf.Variable):
          quantizer.build(use_variables=True)

      # Set the qnoise_factor to 0.0 to pretrain without quantization.
      self.set_qnoise_factor(quantizer, qnoise_factor=0.0)

  def get_quantizers(self, model):
    """Returns a list of quantizers with qnoise_factor in the model.

    Args:
      model: model to get a list of quantizers with qnoise_factor.

    Returns:
      A list of quantizers with the qnoise_factor variable.
    """
    all_quantizers = []
    for layer in model.layers:
      # A list of attributes holding the quantizer(s).
      for attr in ["quantizers", "quantizer"]:
        if hasattr(layer, attr):
          quantizers = getattr(layer, attr)
          quantizers = quantizers if attr == "quantizers" else [quantizers]
          for quantizer in quantizers:
            if hasattr(quantizer, "qnoise_factor"):
              all_quantizers.append(quantizer)

    return all_quantizers

  def update_qnoise_factor(self, freq):
    """Update the qnoise_factor of the model.

    Args:
      freq: The current step (epoch) to calculate the qnoise_factor.
    """
    # Update the qnoise_factor at the frequency of self.update_freq.
    if freq % self.update_freq != 0:
      self.num_iters += 1
      return

    new_qnoise_factor = self.calculate_qnoise_factor(freq)
    for quantizer in self.quantizers:
      # Updates the qnoise factors of the quantizers in the model.
      self.set_qnoise_factor(quantizer, new_qnoise_factor)
    self.num_iters += 1

  def on_train_begin(self, logs=None):
    if not self.quantizers:
      # Build a list of quantizers which is used for updating qnoise_factor.
      self.quantizers = self.get_quantizers(self.model)
      self.set_quantizers()

  def on_epoch_begin(self, epoch, logs=None):
    if self.freq_type == "epoch":
      self.update_qnoise_factor(self.initial_step_or_epoch + self.num_iters)

  def on_epoch_end(self, epoch, logs=None):
    if self.summary_writer:
      with self.summary_writer.as_default():
        tf.summary.scalar("qnoise_factor", data=self.qnoise_factor, step=epoch)

  def on_train_batch_begin(self, batch, logs=None):
    if self.freq_type == "step":
      self.update_qnoise_factor(self.initial_step_or_epoch + self.num_iters)


================================================
FILE: qkeras/codebook.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
""" Clustering based quantizers """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from sklearn.cluster import KMeans
from tqdm import tqdm


def create_in_out_table(km, quantizer):
  """Create [in, out] table needed to map compressed activations to codebook
  values. Given v: in_table[out_table[v]] => codebook value of v

  Arguments:
    km: KMeans model
    quantizer: quantizer function to apply to out_table

  Returns
    in_table: conversion of compressed table indexes to n-bit numbers
    out_table: conversion of n-bit output activations to compressed table
      indexes
  """
  in_table = km.cluster_centers_.flatten()
  qrange = quantizer.range().reshape(-1, 1).astype(np.float32)
  out_table = km.predict(qrange).ravel()
  return in_table, out_table


def activation_compression(model, compile_config, activation_indexes, bits,
                           X_train, y_train, X_test, y_test, sample_size=1.0):
  """This function applies clustering based non-uniform quantization inspired by
  https://arxiv.org/pdf/1911.02079.pdf

  model: Keras model
  compile_config: Dictionary of arguments to be passed to model.compile()
    for all submodels
  activation_indexes: Index list of layers to be quantized. This will
    used to split the model and create submodels
  bits: Number of bits to compress activations to. This will
    results in 2**bits codebook values
  X_train, y_train: training data used to fit clustering algorithm
  X_test, y_test: validation data
  sample_size:
    fraction of training data activations to be used when computing
    codebook values

  Returns:
    cb_tables: [in, out] tables. See create_in_out_table docs
    models: list of keras submodels
    km_models: list of KMeans fitted models
  """
  assert len(activation_indexes) > 0
  assert 0.0 < sample_size <= 1.0
  # n_init=10 maintains the same behavior as legacy versions of sklearn. This
  # was changed to "auto" in sklearn 1.4.
  km_models = [KMeans(2**bits, n_init=10)] * len(activation_indexes)
  cb_tables = [[]] * len(activation_indexes)
  models = []
  x = x_in = model.layers[0].output
  for i in range(1, len(model.layers)):
    layer = model.layers[i]
    x = layer(x)
    if i in activation_indexes or i == len(model.layers) - 1:
      print("\nCreating submodel...")
      models.append(Model([x_in], [x]))
      x = x_in = Input(layer.output[0].shape,
                       batch_size=layer.output.shape[0],
                       dtype=layer.output.dtype)
      models[-1].compile(**compile_config)
      print(models[-1].summary())
  print('\nsample_size: ', sample_size)
  x = X_train
  for i, model in enumerate(models[:-1]):
    print(f'fitting km[{i}]...')
    x = model.predict(x)
    km = km_models[i]
    temp = x.flatten().reshape(-1, 1)
    if sample_size < 1.0:
      idxs = np.random.choice(x.shape[0], size=int(sample_size * x.shape[0]))
      temp = temp[idxs]
    km.fit(temp)
    quantizer = getattr(model.layers[-1], 'quantizer',
                        getattr(model.layers[-1], 'activation'))
    km.cluster_centers_ = quantizer(km.cluster_centers_).numpy()
    km.cluster_centers_.sort(axis=0)
    cb_tables[i] = create_in_out_table(km, quantizer)
  x = X_test
  for i, model in enumerate(models[:-1]):
    x = model.predict(x)
    km = km_models[i]
    preds = km.predict(x.flatten().reshape(-1, 1))
    x = km.cluster_centers_[preds].reshape(x.shape)
    n_unique = np.unique(x.flatten()).shape[0]
    print(f"Number of unique activations: {n_unique}")
    assert n_unique <= 2**bits

  print('\nEvaluating...')
  models[-1].evaluate(x, y_test, verbose=2)
  return cb_tables, models, km_models


def weight_compression(weights, bits, axis=0, quantizer=None):
  """Creates an in, out table that maps weight values to their codebook values.
  Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf

  Arguments:
    weights: Numpy array
    bits: Number of bits to compress weights to. This will
      results in 2**bits codebook values
    axis: axis to apply quantization by
    quantizer: quantizer function that will be applied to codebook values

  Returns:
    index_table: array of indices that maps to codebook values for all weights
    codebook_table: array of codebook values
  """
  assert bits <= 8
  n = 2**bits
  index_table = []
  codebook_table = np.zeros((weights.shape[axis], n))
  km_models = [None] * weights.shape[axis]

  for i, w in tqdm(enumerate(np.split(weights, weights.shape[axis], axis))):
    original_shape = w.shape
    w = w.ravel()
    km = KMeans(n, n_init=10)
    km.fit(w.reshape(-1, 1))
    if quantizer:
      km.cluster_centers_ = quantizer(km.cluster_centers_).numpy()
    km.cluster_centers_.sort(axis=0)

    km_models[i] = km
    codebook_table[i, :] = km.cluster_centers_.flatten()
    preds = km.predict(w.reshape(-1, 1))
    index_table.append(preds.reshape(original_shape))

  index_table = np.concatenate(index_table, axis)
  return index_table, codebook_table


def two_tier_embedding_compression(embeddings, bits, quantizer=None):
  """ Creates tables that maps embedding values to their codebook values.
  Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf

  Arguments:
    weights: Numpy array
    bits: Number of bits to compress weights to. This will
      results in 2**bits codebook values
    quantizer: quantizer function that will be applied to codebook values

  Returns:
    index_table: array of indices that maps to codebook values
    cluster_index_table: array that maps each row to the codebook table
      index
    codebook_table: array of codebook values
    quantized_embeddings: Numpy array MxN of quantized weights
  """
  assert bits <= 8
  n = 2**bits
  quantized_embeddings = embeddings.copy()
  index_table = np.zeros(embeddings.shape, dtype=np.uint8)
  cluster_index_table = np.zeros(index_table.shape[0], dtype=np.uint8)
  codebook_table = np.zeros((n, n))

  km1 = KMeans(n, n_init=10)
  km1.fit(embeddings)
  tier1 = km1.predict(embeddings)

  km_models = [0] * n
  block_sizes = [0] * n
  for block_label in tqdm(range(n)):
    mask = block_label == tier1
    indices = np.arange(embeddings.shape[0])[mask]
    block = embeddings[mask]
    km2 = KMeans(n, n_init=10)
    km2.fit(block.flatten().reshape(-1, 1))
    if quantizer:
      km2.cluster_centers_ = quantizer(km2.cluster_centers_).numpy()
    km2.cluster_centers_.sort(axis=0)

    km_models[block_label] = km2
    codebook_table[block_label, :] = km2.cluster_centers_.flatten()
    cluster_index_table[indices] = block_label
    block_sizes[block_label] = block.shape[0]
    for i in indices:
      preds = km2.predict(embeddings[i, :].reshape(-1, 1))
      index_table[indices, :] = preds
      quantized_embeddings[i, :] = km2.cluster_centers_[preds].flatten()
  print('block_sizes:', block_sizes)
  return index_table, cluster_index_table, codebook_table, quantized_embeddings


================================================
FILE: qkeras/estimate.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Definition of quantization package."""

# Some parts of the code were taken from
#
# https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow
#
# and follows several papers.
#
#    https://arxiv.org/pdf/1609.07061.pdf
#

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from collections import defaultdict

import numpy as np
import tensorflow.compat.v1 as tf
from absl import logging

from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.models import Model

from .qlayers import QActivation
from .qlayers import QAdaptiveActivation
from .qlayers import QDense
from .qconvolutional import QConv1D
from .qconvolutional import QConv2D
from .qconvolutional import QDepthwiseConv2D
from .qconvolutional import QSeparableConv2D
from .qpooling import QAveragePooling2D
from .quantizers import quantized_bits
from .quantizers import quantized_relu
from .quantizers import quantized_tanh
from .quantizers import quantized_ulaw
from .bn_folding_utils import unfold_model
from .utils import get_model_sparsity


def analyze_accumulator(in_model, x, verbose=False):
  """Analyzes the distribution of weights to specify size of accumulators.

     Computes the maximum number of bits for the accumulator assuming the
     inputs have a distribution given by the dictionary x.

     for each output channel i:
       max_positive_value[i] = sum(w[i]) + bias[i] for the positive weights
       max_negative_value[i] = sum(w[i]) + bias[i] for the negative weights

     max_value = max(
            max_positive_value[i] * positive(x) +
            max_negative_value[i] * negative(x),

         - (max_negative_value[i] * positive(x) +
            max_positive_value[i] * negative(x))
     )

     accumulator_size = ceil( log2( max_value ) )

     x right now is a dictionary of the form:

     { layer_name: (min_value, max_value) }

     in the future, we want to provide a sample and compute this automatically

  Arguments:
    in_model: keras model object, model to be evaluated
    x: dictionary of the form: { layer_name: (min_value, max_value) }
       input distribution
    verbose: boolean, if true, print statistics messages

  Returns:
    dictionary containing { layer_name: accumulator_size }
  """

  # this function converts a folded model to a "normal" model. It replace folded
  # layers (e.g., QConv2dBatchnorm) layer with qconv2d layer whenever possible.
  model = unfold_model(in_model)

  acc_sizes = {}

  for layer in model.layers:
    if (isinstance(layer, QDepthwiseConv2D) or
        isinstance(layer, QConv2D) or
        isinstance(layer, QConv1D) or
        isinstance(layer, QDense)):
      weights = layer.get_weights()
      k = weights[0]
      if layer.use_bias:
        b = weights[1]
      else:
        b = np.zeros((k.shape[-1],), dtype=np.float32)

      all_bits = []
      nbits = []
      for i in range(k.shape[1]):
        # compute sum of positive weights
        npp = np.sum(k[..., i] * (k[..., i] > 0)) + (b[i] > 0) * b[i]

        # compute sum of negative weights
        nnn = np.sum(k[..., i] * (k[..., i] < 0)) + (b[i] < 0) * b[i]

        # largest value is
        #   npp * largest positive - nnn * largest_negative or
        #   nnn * largest_positive - npp * largest_negative

        x_min = x[layer.name][0]
        x_max = x[layer.name][1]

        n1 = npp * (x_max > 0) * x_max + nnn * (x_min < 0) * x_min
        n0 = - (nnn * (x_max > 0) * x_max + npp * (x_min < 0) * x_min)

        if n1 > n0:
          nbits.append(n1)
        else:
          nbits.append(n0)

        all_bits.append((n1, n0))

      max_bits = int(np.ceil(np.log2(max(nbits))))
      acc_sizes[layer.name] = max_bits

      if verbose:
        print()
        print(layer.name, "- input range:", x[layer.name])
        print("  max value:", np.amax(k))
        print("  min value:", np.amin(k))
        print("  most positive sum:", np.amax(np.array(all_bits)[:, 0]))
        print("  most negative sum:", -np.amax(np.array(all_bits)[:, 1]))
        print("  number of bits:", max_bits)

  if verbose:
    print()

  return acc_sizes


def analyze_accumulator_from_sample(
    in_model, x_sample, mode="conservative", verbose=False):
  """Extracts range of inputs of quantized layers from samples."""

  # mode is one of "conservative", "sampled"
  if mode not in ["conservative", "sampled"]:
    ValueError("'mode' has to be 'conservative' or 'sampled'")

  # this function converts a folded model to a "normal" model. It replace folded
  # layers (e.g., QConv2DBatchnorm) layer with qconv2d layer whenever possible.
  model = unfold_model(in_model)

  # get layer names of quantized layers (QDense and QConv2D)
  layer_names = [
      layer.name for layer in model.layers
      if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or
          isinstance(layer, QConv1D) or isinstance(layer, QDense))
  ]

  # sampled mode: just apply x_sample and check the outputs

  if mode == "sampled":

    outputs = [
        layer.output for layer in model.layers
        if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or
            isinstance(layer, QConv1D) or isinstance(layer, QDense))
    ]

    eval_outputs = Model(inputs=model.inputs, outputs=outputs)

    # predict values for all inputs to quantized layers

    values = eval_outputs.predict(x_sample)

    acc_sizes = {}

    for name, value in zip(layer_names, values):
      max_value = np.amax(np.abs(value))
      if max_value != 0:
        acc_sizes[name] = int(np.ceil(np.log2(max_value)))
      else:
        acc_sizes[name] = 0

    return acc_sizes

  # get inputs of quantized layers (QDense and QConv2D
  # we use Activation("linear") to trick keras and tensorflow
  # to avoid direct connections of inputs and any other
  # artifacts.

  outputs = [
      Activation("linear")(layer.input) for layer in model.layers
      if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or
          isinstance(layer, QConv1D) or isinstance(layer, QDense))
  ]

  eval_inputs = Model(inputs=model.inputs, outputs=outputs)

  # predict values for all inputs to quantized layers

  values = eval_inputs.predict(x_sample)

  x_dict = {}

  for name, value in zip(layer_names, values):
    x_dict[name] = (np.amin(value), np.amax(value))

  return analyze_accumulator(model, x_dict, verbose)


def get_quant_mode(quant):
  """Returns the quantizer mode, number of bits and if it is a signed number."""

  #  qb(n)[0] +/-,exp[1] t(-1,0,+1)[2] b(-1,+1)[3] b(0,1)[4]
  #  entry is tuple:
  #    (instance name, mode #(above), number of bits (-1 means check class),
  #     sign bit)

  modes = [
      # depending on the number of bits, quantized_bits may be 2, 2
      ("quantized_bits", 0, -1, 1),
      ("bernoulli", 4, 1, 0),
      ("stochastic_ternary", 2, 2, 1),
      ("ternary", 2, 2, 1),
      ("stochastic_binary", 3, 1, 1),
      ("binary", 3, 1, 1),
      # depending on the number of bits, quantized_relu may be 4, 1
      ("quantized_relu", 0, -1, 0),
      # depending on the number of bits, quantized_tanh may be 2, 2
      ("quantized_ulaw", 0, -1, 1),
      ("quantized_tanh", 0, -1, 1),
      ("quantized_po2", 1, -1, 1),
      ("quantized_relu_po2", 1, -1, 0),
      ("float", 5, 32, 1)
  ]

  for (inst, mode, bits, sign) in modes:
    if not quant or getattr(quant, "__name__", None) == "linear":
      # if quantizer not specified or linear, we use float type
      if inst == "float":
        return (mode, bits, sign)

    elif quant.__class__.__name__ == inst:
      if bits == -1:
        bits = int(quant.bits)
        if (
            isinstance(quant, quantized_bits) or
            isinstance(quant, quantized_tanh) or
            isinstance(quant, quantized_ulaw)):
          if bits == 2 and int(quant.integer) == 1:
            mode = 2
        elif isinstance(quant, quantized_relu):
          if bits == 1 and int(quant.integer) == 1:
            mode = 4
      return (mode, bits, sign)
  raise ValueError("Quantizer {} Not Found".format(quant))


def get_operation_type(layer, output_cache):
  """Checks quantizers around layer and weights to get operation type.

  Determines operator strenght according to the following table.
                                      x
                     qb(n)   +/-,exp  t(-1,0,+1) b(-1,+1) b(0,1) float
      qb(n)            *     << >>,-     ?,-       ?,-       ?    *
      +/-,exp        << >>,-   +         ?,-        ^      ?,-    *
    w t(-1,0,+1)      ?,-     ?,-        ?,^       ?,^      ^     *
      b(-1,+1)        ?,-      ^         ?,^        ^       ^     *
      b(0,1)           ?      ?,-         ^         ^       ^     *
      float           *        *          *        *        *     *

  Arguments:
    layer: layer in Keras to determine the operation strength.
    output_cache: cache of input tensor bit sizes.

  Returns:
    One of "mult", "fmult", "adder", "barrel", "mux", "xor".
    Note: "mult" represents quantized bit multiplier, "fmult" represents
          floating point multiplier.
  """

  wx_table = [
      ["mult", "barrel", "mux", "mux", "mux", "fmult"],
      ["barrel", "adder", "mux", "xor", "mux", "fmult"],
      ["mux", "mux", "mux", "mux", "xor", "fmult"],
      ["mux", "xor", "mux", "xor", "xor", "fmult"],
      ["mux", "mux", "xor", "xor", "xor", "fmult"],
      ["fmult", "fmult", "fmult", "fmult", "fmult", "fmult"],
  ]

  # check if this is a quantized layers (QDense, QConv, QDepthwise)
  if hasattr(layer, "get_quantizers"):
    w_quant = layer.get_quantizers()[0]
    w_mode, w_bits, w_sign = get_quant_mode(w_quant)
    if w_mode == "float":
      logging.warning("%s kernel is unquantized!", layer.name)

    # for the input, get tensor input and search the cache that associates
    # the quantizer with a tensor
    if output_cache.get(layer.input.experimental_ref(), None) is not None:
      x_mode, x_bits, x_sign = get_quant_mode(
          output_cache.get(layer.input.experimental_ref()))
      if x_mode == "float":
        logging.warning("%s input is unquantized!", layer.name)
    else:
      print("cannot determine presently model for {}".format(layer.name))
      return "null", (w_mode, -1), (w_bits, -1), (w_sign, -1)
    mode = wx_table[w_mode][x_mode]
    return mode, (w_mode, x_mode), (w_bits, x_bits), (w_sign, x_sign)

  raise ValueError("Cannot find suitable quantization candidates for {}".format(
      layer.name))


def create_activation_cache(model):
  """Creates an activation cache for the tensors of a model."""

  input_quantizer = quantized_relu(8, 0)

  output_cache = {}

  # If using a Sequential model, the input layer is hidden. Therefore, add the
  # input quantization to the cache if the first layer is not an input layer
  if not isinstance(model.layers[0], InputLayer):
    output_cache[model.layers[0].input.experimental_ref()] = input_quantizer

  # cache graph tensors' activations

  for l in model.layers:
    output_cache[l.output.experimental_ref()] = l
    if isinstance(l, QActivation) or isinstance(l, QAdaptiveActivation) :
      output_cache[l.output.experimental_ref()] = l.quantizer
    elif isinstance(l, InputLayer):
      # assume the input is 8-bit positive value
      output_cache[l.output.experimental_ref()] = input_quantizer
    elif l.__class__.__name__ in [
        "QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D"
    ]:
      output_cache[l.output.experimental_ref()] = l.activation
    else:
      if isinstance(l.input, list):
        # right now, we just get the first one - we assume this is the leading
        # one.
        all_q = [
            output_cache.get(l.input[i].experimental_ref())
            for i in range(len(l.input))
        ]
        q = all_q[0]
      else:
        q = output_cache.get(l.input.experimental_ref(), None)
      output_cache[l.output.experimental_ref()] = q
      if q is None:
        raise ValueError("Unknown operation in {}".format(l.name))

  return output_cache


def extract_model_operations(in_model):
  """Determines types of operations for convolutions."""

  model = unfold_model(in_model)
  cache_q = create_activation_cache(model)
  cache_o = {}

  operations = {}

  for layer in model.layers:

    if layer.__class__.__name__ == "InputLayer":
      continue

    if isinstance(layer.input, list):
      input_shape = [
          cache_o.get(layer.input[i].experimental_ref(),
                      layer.input[i].get_shape())
          for i in range(len(layer.input))
      ]
    else:
      input_shape = cache_o.get(layer.input.experimental_ref(),
                                layer.input.get_shape())

    # Check if the inputs are a list of Dimensions
    if isinstance(input_shape, list):
      # Iterate though all of the input shapes and extract the dimension values
      for i, dim in enumerate(input_shape):
        if isinstance(dim[0], tf.Dimension):
          shape = [None]
          for j in range(1, len(dim)):
            shape.append(dim[j] if isinstance(dim[j], int) else dim[j].value)
          input_shape[i] = tuple(shape)

    output_shape = layer.compute_output_shape(input_shape)

    cache_o[layer.output.experimental_ref()] = output_shape

    if layer.__class__.__name__ not in ["QDense", "QConv2D", "QConv1D",
                                        "QDepthwiseConv2D", "QSeparableConv1D",
                                        "QSeparableConv2D"]:
      continue

    if layer.__class__.__name__ in ["QConv2D"]:

      _, _, _, channels_i = input_shape

      _, height_o, width_o, channels_o = output_shape

      weight = layer.get_weights()[0]


      kernel_h, kernel_w, _, _ = weight.shape

      number_of_operations = (
          height_o * width_o * channels_o * kernel_h * kernel_w * channels_i)

      number_of_weights = (kernel_h * kernel_w * channels_o * channels_i)

      number_of_bias = 0
      if len(layer.get_weights()) > 1:
        number_of_bias = layer.get_weights()[1].shape[0]

      weight_quant, bias_quant = layer.get_quantizers()
      weight_type = get_quant_mode(weight_quant)
      bias_type = get_quant_mode(bias_quant)

      if weight_type[0] == "float":
        logging.warning("%s kernel is unquantized!", layer.name)
      if bias_type[0] == "float":
        logging.warning("%s bias is unquantized!", layer.name)

    elif layer.__class__.__name__ in ["QConv1D"]:

      _, _, channels_i = input_shape

      _, time_o, channels_o = output_shape

      weight = layer.get_weights()[0]

      kernel_length, _, _ = weight.shape

      number_of_operations = (
          time_o * channels_o * kernel_length * channels_i)

      number_of_weights = (kernel_length * channels_o * channels_i)
      number_of_bias = 0
      if len(layer.get_weights()) > 1:
        number_of_bias = layer.get_weights()[1].shape[0]

      weight_quant, bias_quant = layer.get_quantizers()
      weight_type = get_quant_mode(weight_quant)
      bias_type = get_quant_mode(bias_quant)

      if weight_type[0] == "float":
        logging.warning("%s kernel is unquantized!", layer.name)
      if bias_type[0] == "float":
        logging.warning("%s bias is unquantized!", layer.name)

    elif layer.__class__.__name__ in ["QDepthwiseConv2D"]:

      _, _, _, channels_i = input_shape

      _, height_o, width_o, channels_o = output_shape

      weight_1 = layer.get_weights()[0]

      kernel_h, kernel_w, _, _ = weight_1.shape

      number_of_operations = (
          kernel_h * kernel_w * height_o * width_o * channels_i)

      number_of_weights = (kernel_h * kernel_w * channels_o * channels_i)

      number_of_bias = 0
      if len(layer.get_weights()) > 1:
        number_of_bias = layer.get_weights()[1].shape[0]

      weight_quant, bias_quant = layer.get_quantizers()
      weight_type = get_quant_mode(weight_quant)
      bias_type = get_quant_mode(bias_quant)

      if weight_type[0]  == "float":
        logging.warning("%s kernel is unquantized!", layer.name)
      if bias_type[0]  == "float":
        logging.warning("%s bias is unquantized!", layer.name)

    elif layer.__class__.__name__ in ["QSeparableConv1D"]:

      _, _, channels_i = input_shape

      _, time_o, channels_o = output_shape

      weight_1 = layer.get_weights()[0]

      kernel_length, _, _ = weight_1.shape

      number_of_operations = (
          kernel_length * time_o * channels_i + 
          time_o * channels_o)

      number_of_weights = [
        kernel_length * channels_i, 
        channels_o * channels_i]

      number_of_bias = 0
      if len(layer.get_weights()) > 2:
        number_of_bias = layer.get_weights()[2].shape[0]

      depthwise_quant, pointwise_quant, bias_quant = layer.get_quantizers()
      depthwise_type = get_quant_mode(depthwise_quant)
      pointwise_type = get_quant_mode(pointwise_quant)
      weight_type = [depthwise_type, pointwise_type]
      bias_type = get_quant_mode(bias_quant)

      if depthwise_type[0] == "float":
        logging.warning("%s depthwise kernel is unquantized!", layer.name)
      if pointwise_type[0] == "float":
        logging.warning("%s pointwise kernel is unquantized!", layer.name)
      if bias_type[0] == "float":
        logging.warning("%s bias is unquantized!", layer.name)

    elif layer.__class__.__name__ in ["QSeparableConv2D"]:

      _, _, _, channels_i = input_shape

      _, height_o, width_o, channels_o = output_shape

      weight_1 = layer.get_weights()[0]

      kernel_h, kernel_w, _, _ = weight_1.shape

      number_of_operations = (
          kernel_h * kernel_w * height_o * width_o * channels_i + 
          height_o * width_o * channels_o)

      number_of_weights = [
        kernel_h * kernel_w * channels_i,
        channels_o * channels_i]

      number_of_bias = 0
      if len(layer.get_weights()) > 2:
        number_of_bias = layer.get_weights()[2].shape[0]

      depthwise_quant, pointwise_quant, bias_quant = layer.get_quantizers()
      depthwise_type = get_quant_mode(depthwise_quant)
      pointwise_type = get_quant_mode(pointwise_quant)
      weight_type = [depthwise_type, pointwise_type]
      bias_type = get_quant_mode(bias_quant)

      if depthwise_type[0] == "float":
        logging.warning("%s depthwise kernel is unquantized!", layer.name)
      if pointwise_type[0] == "float":
        logging.warning("%s pointwise kernel is unquantized!", layer.name)
      if bias_type[0] == "float":
        logging.warning("%s bias is unquantized!", layer.name)

    elif layer.__class__.__name__ in ["QDense"]:

      # Find the input and output shapes out of all possible dimensions.
      # Usually the first shape dimension will be the batch size, and the second
      # shape dimension will be the number of channels. However, if the
      # Dense layer is in Squeeze-and-Excite, the first shape dimension
      # will be the batch size, the second and third shape dimension will be the
      # spatial sizes (should both be 1), and the fourth shape dimensions will
      # be the number of channels
      ishape = np.array([i for i in input_shape if i is not None])
      assert sum(ishape > 1) == 1, "Tensor shape has multiple >1 size dims"
      size_i = np.max(ishape)

      oshape = np.array([i for i in output_shape if i is not None])
      assert sum(oshape > 1) == 1, "Tensor shape has multiple >1 size dims"
      size_o = np.max(oshape)

      number_of_operations = int(size_i * size_o)

      number_of_weights = size_i * size_o
      number_of_bias = 0
      if len(layer.get_weights()) > 1:
        number_of_bias = layer.get_weights()[1].shape[0]

      weight_quant, bias_quant = layer.get_quantizers()
      weight_type = get_quant_mode(weight_quant)
      bias_type = get_quant_mode(bias_quant)

      if weight_type[0] == "float":
        logging.warnings("%s kernel is unquantized!", layer.name)
      if bias_type[0] == "float":
        logging.warnings("%s bias is unquantized!", layer.name)

    # "number_of_operations" is tensor_shape.Dimension type
    operations[layer.name] = {
        "type":
            get_operation_type(layer, cache_q),
        "number_of_operations":
            number_of_operations if isinstance(number_of_operations, int) else
            number_of_operations.value,
        "number_of_weights":
            number_of_weights,
            # if isinstance(number_of_weights, int) else number_of_weights.value,
        "number_of_bias":
            number_of_bias,
            # if isinstance(number_of_bias, int) else number_of_bias.value,
        "type_of_weights":
            weight_type,
        "type_of_bias":
            bias_type,
    }

  return operations


def print_qstats(model):
  """Prints quantization statistics for the model."""

  model_ops = extract_model_operations(model)

  ops_table = defaultdict(lambda: 0)

  print("")
  print("Number of operations in model:")
  for name in sorted(model_ops):
    mode, _, sizes, signs = model_ops[name]["type"]
    number = model_ops[name]["number_of_operations"]
    sign = "s" if sum(signs) > 0 else "u"
    op_name = sign + mode + "_" + str(sizes[0]) + "_" + str(sizes[1])
    ops_table[op_name] += number
    print("    {:30}: {:5} ({})".format(str(name), str(number), str(op_name)))

  print("")
  print("Number of operation types in model:")
  for key in sorted(ops_table.keys()):
    if ops_table[key] > 0:
      print("    {:30}: {}".format(key, ops_table[key]))

  print("")
  print("Weight profiling:")
  total_bits = 0
  for name in sorted(model_ops):
    weight_type = model_ops[name]["type_of_weights"]
    n_weights = model_ops[name]["number_of_weights"]
    if isinstance(weight_type, list):
      for i, (w_type, w_number) in enumerate(zip(weight_type, n_weights)):
        _, w_sizes, _ = w_type
        total_bits += w_number * w_sizes
        print("    {:30} : {:5} ({}-bit unit)".format(
            str(name) + "_weights_" + str(i), str(w_number), str(w_sizes)))
    else:
      _, w_sizes, _ = weight_type
      total_bits += n_weights * w_sizes
      print("    {:30} : {:5} ({}-bit unit)".format(
          str(name) + "_weights", str(n_weights), str(w_sizes)))
    _, b_sizes, _ = model_ops[name]["type_of_bias"]
    b_number = model_ops[name]["number_of_bias"]
    total_bits += b_number * b_sizes
    print("    {:30} : {:5} ({}-bit unit)".format(
        str(name) + "_bias", str(b_number), str(b_sizes)))
  print("    " + ("-"*40))
  print("    {:30} : {:5}".format("Total Bits", total_bits))

  print("")
  print("Weight sparsity:")
  total_sparsity, per_layer = get_model_sparsity(model, per_layer=True)
  for layer in per_layer:
    print("    {:30} : {:.4f}".format(str(layer[0]), layer[1]))
  print("    " + ("-"*40))
  print("    {:30} : {:.4f}".format("Total Sparsity", total_sparsity))


================================================
FILE: qkeras/experimental/quantizers/__init__.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exports experimental quantizers."""

import tensorflow as tf

from qkeras.experimental.quantizers.quantizers_po2 import quantized_bits_learnable_po2  
from qkeras.experimental.quantizers.quantizers_po2 import quantized_bits_msqe_po2  

__version__ = "0.9.0"


================================================
FILE: qkeras/experimental/quantizers/quantizers_po2.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Power-of-2 quantizers based on https://arxiv.org/pdf/2210.03671.pdf.

  Example usages:
  < MSQE-based quantizer >
    Default (using the second moments MSQE optimization and the outlier mask):
      quantized_bits_msqe_po2(bits=4)
    Per-channel quantization:
      quantized_bits_msqe_po2(bits=4, scale_axis=3, per_channel_scale=True)

  < Gradient-based (learnable) quantizer >
    Default (using the MSQE round (Round-to-Lower-MSQE)):
      quantized_bits_learnable_po2(bits=4)
    Per-channel quantization:
      quantized_bits_learnable_po2(bits=4, scale_axis=3, per_channel_scale=True)
    Relu activation (the MSQE round is not supported for non-variable tensors):
      quantized_bits_learnable_po2(bits=4, keep_negative=False,
      use_second_moments_msqe_opt=False, use_po2_scale_msqe_round=False)
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import re
import numpy as np
from six.moves import range
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Layer


def _update_ema_variable(variable, new_val, ema_decay, is_initialized,
                         should_update):
  """Updates exponentially moving average (EMA) of a tf.Variable.

    This function directly updates the variable.

  Args:
    variable: A tf.Variable to be updated.
    new_val: A tensor with a new value to update 'variable'. Its shape is same
      as 'variable'.
    ema_decay: A scalar python float or tensor. EMA decay factor.
    is_initialized: A scalar tensor indicating whether 'variable' has been
      initialized or not.
    should_update: A scalar python bool or tensor indicating whether to update
      'variable' or not.
  """
  if not tf.is_tensor(should_update):
    should_update = tf.convert_to_tensor(should_update)

  val_to_update = ema_decay * variable + (1.0 - ema_decay) * new_val
  val_to_update = tf.cond(is_initialized, lambda: val_to_update,
                          lambda: new_val)
  val_to_update = tf.cond(should_update, lambda: val_to_update,
                          lambda: variable)
  variable.assign(val_to_update)


def _get_scaling_axis(scale_axis, len_axis):
  """Gets the axis to perform scaling with.

  Args:
    scale_axis: an integer scalar tensor or None to get which axis to calculate
      scale from. If None, the scaling axis is set based on the image data
      format.
    len_axis: an integer scalar tensor of the dimension of the tensor to be
      quantized.

  Returns:
    A list of axes to be quantized together.
  """

  if scale_axis is not None:
    axis = list(range(scale_axis))
    axis += list(range(scale_axis + 1, len_axis))
  else:
    if K.image_data_format() == "channels_last":
      axis = list(range(len_axis - 1))
    else:
      axis = list(range(1, len_axis))
  return axis


def _get_msqe_scale(x,
                    q,
                    scale_axis=None,
                    per_channel_scale=True,
                    msqe_weight=None):
  """Gets scaling factor for scaling the tensor per channel.

  It uses a linear least squares method to find the scaling factor.
  (https://en.wikipedia.org/wiki/Linear_least_squares)

  Args:
     x: A tensor object. Its elements are in float.
     q: A tensor object. Its elements are in quantized format of x.
     scale_axis: which axis to calculate scale from
     per_channel_scale: A bool. Whether to perform per-channel scaling or not.
     msqe_weight: A tensor object or None. Its elements are in float, which are
     used to perform weighted least squares optimization. If None, it performs
     non-weighted least squares optimization.

  Returns:
    A scaling factor tensor or scalar for scaling tensor per channel or per
    layer.
  """
  # in different tensorflow version (e.g., 2.4)
  # x.shape is a tuple which doesn't have as_list() method
  try:
    x_shape = x.shape.as_list()
  except AttributeError:
    x_shape = list(x.shape)

  len_axis = len(x_shape)

  if msqe_weight is not None:
    sqrt_msqe_weight = tf.math.sqrt(msqe_weight)
    x = tf.math.multiply(x, sqrt_msqe_weight)
    q = tf.math.multiply(q, sqrt_msqe_weight)

  if not per_channel_scale:
    qx = K.mean(q * x, keepdims=True)
    qq = K.mean(q * q, keepdims=True)
  else:
    if len_axis > 1:
      axis = _get_scaling_axis(scale_axis, len_axis)
      qx = K.mean(tf.math.multiply(q, x), axis=axis, keepdims=True)
      qq = K.mean(tf.math.multiply(q, q), axis=axis, keepdims=True)
    else:
      # No summing (averaging) along the channel axis to get per-channel
      # scales.
      qx = tf.math.multiply(q, x)
      qq = tf.math.multiply(q, q)

  scale = qx / (qq + K.epsilon())

  # Rounds the exponent to the nearest integer for power-of-2 scale.
  return K.pow(2.0, tf.math.rint(K.log(scale + K.epsilon()) / np.log(2.0)))


class BaseQuantizerPO2(Layer):  # pylint: disable=invalid-name
  """This is the base class from which all power-of-2 quantizers inherit, which
  is based on the reference paper (https://arxiv.org/pdf/2210.03671.pdf).

  Attributes:
    bits: Integer, number of bits to perform quantization.
    keep_negative: Boolean, if true, it keeps negative values and sets the
      quantization levels symmetrically around 0. If false, negative numbers is
      clipped to 0.
    scale_axis: Integer, which axis to calculate scale from.
    per_channel_scale: Boolean, whether to perform per-channel (true) or
      per-layer (false) quantization.
    init_scale: Float or None, initial scale factor to initialize the scale with
      (if None, it will be initialized based on the first inputs.).
    use_second_moments_msqe_opt: Bool, whether to use the second moments based
      MSQE optimization or not. The second moments is used as a weighting factor
      to calculate the quantization error.
    second_moments_ema_decay: Float, EMA decay factor for the second moments
      update.
    use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight.
    use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask.
    use_stable_scale_exponent: Bool, whether to use exponentially moving
      averaged ("stable") scale exponent or not. Note: there is a tf.Variable
        (self.switch_to_stable_scale) that controls when to apply the stable
        scale exponent (i.e., if use_stable_scale_exponent is true and
        self.switch_to_stable_scale is false, the stable scale exponent is
        updated but not used.).
    stable_scale_ema_decay: Float, EMA decay factor for the stable scale update.
    is_gradient_based: Bool, whether to optimize the scale_exponent from the
      gradients or not (i.e, if true, self.scale_exponent is set to be
      "trainable".)
  """

  def __init__(self,
               bits=4,
               keep_negative=True,
               scale_axis=None,
               per_channel_scale=False,
               init_scale=None,
               use_second_moments_msqe_opt=False,
               second_moments_ema_decay=0.999,
               use_sqrt_of_msqe_weight=True,
               use_outlier_mask_msqe_weight=True,
               use_stable_scale_exponent=False,
               stable_scale_ema_decay=0.99,
               is_gradient_based=True,
               **kwargs):

    self.bits = bits
    self.keep_negative = keep_negative
    self.scale_axis = scale_axis
    self.per_channel_scale = per_channel_scale
    self.init_scale = init_scale
    self.use_second_moments_msqe_opt = use_second_moments_msqe_opt
    self.second_moments_ema_decay = second_moments_ema_decay
    self.use_sqrt_of_msqe_weight = use_sqrt_of_msqe_weight
    self.use_outlier_mask_msqe_weight = use_outlier_mask_msqe_weight
    self.use_stable_scale_exponent = use_stable_scale_exponent
    self.stable_scale_ema_decay = stable_scale_ema_decay
    self.is_gradient_based = is_gradient_based
    self.alpha = "auto_po2"

    # scale exponent to be learned.
    self.scale_exponent = None
    # Stores the power-of-2 scale factor used for quantization.
    self.scale = None
    # Axes to perform reduce sum (mean) operation.
    self.reduce_axes = None
    # Running averaged gradient variances of the input
    self.msqe_weight = None
    # A knob to switch to "stable_scale_exponent".
    self.switch_to_stable_scale = None
    # variable holding the running averaged scale exponent
    self.stable_scale_exponent = None
    # Indicator variable whether to update stable_scale_exponent or not. This
    # can be used as an indicator whether it is in training or not.
    self.should_update_stable_scale_exponent = None
    # The assignments from "kwargs" are to restore from the config.
    # The maximum quantization level of negative numbers.
    self.qn = kwargs.pop("qn") if "qn" in kwargs else None
    # The maximum quantization level of positive numbers.
    self.qp = kwargs.pop("qp") if "qp" in kwargs else None
    # Axes scaled together.
    self.scaled_axes = kwargs.pop(
        "scaled_axes") if "scaled_axes" in kwargs else None

    super().__init__(**kwargs)

  def build(self, input_shape):
    """Creates and initializes variables."""
    # Number of quantization levels.
    levels = tf.math.pow(2.0, tf.cast(self.bits, dtype=tf.float32)) - 1

    # Sets the number of quantization levels for the negative and positive
    # ranges.
    if self.keep_negative:
      # Sets them symmetric about 0 to reduce the quantization induced bias.
      self.qn = float((levels - 1.0) / 2.0)
      self.qp = float((levels - 1.0) / 2.0)
    else:
      self.qn = 0.0
      self.qp = float(levels)

    if self.init_scale is None:
      init_scale_exponent = 0.0
      init_scale = 1.0
    else:
      init_scale = self.init_scale + K.epsilon()
      init_scale_exponent = tf.math.log(init_scale) / tf.math.log(2.0)

    if self.scale_axis is None:
      self.scale_axis = self._get_scale_axis(input_shape)

    self.scaled_axes = self._get_scaled_axes(self.scale_axis, input_shape)

    if self.per_channel_scale:
      scale_exponent_shape = tf.TensorShape([
          input_shape[i] if i == self.scale_axis else 1
          for i in range(len(input_shape))
      ])
    else:
      scale_exponent_shape = [1 for i in range(len(input_shape))]

    # Creates the scale exponent variable to be learned.
    self.scale_exponent = tf.Variable(
        lambda: tf.constant(
            init_scale_exponent, shape=scale_exponent_shape, dtype=tf.float32),
        trainable=self.is_gradient_based,
        synchronization=tf.VariableSynchronization.ON_READ,
        aggregation=tf.compat.v1.VariableAggregation.MEAN,
        name="scale_exponent")

    # "self.scale" is not a trainable variable which gets assigned not learned.
    self.scale = tf.Variable(
        lambda: tf.constant(
            init_scale, shape=scale_exponent_shape, dtype=tf.float32),
        trainable=False,
        synchronization=tf.VariableSynchronization.ON_READ,
        aggregation=tf.compat.v1.VariableAggregation.MEAN,
        name="scale")

    self.reduce_axes = [
        i for i in range(len(self.scale_exponent.shape))
        if self.scale_exponent.shape[i] == 1
    ]

    if self.use_second_moments_msqe_opt:
      msqe_weight_shape = tf.TensorShape(
          [1 if s is None else s for s in input_shape])
      self.msqe_weight = tf.Variable(
          lambda: tf.ones(shape=msqe_weight_shape),
          trainable=False,
          dtype=tf.float32,
          name="msqe_weight")

    if self.use_stable_scale_exponent:
      self.stable_scale_exponent = tf.Variable(
          lambda: tf.zeros_like(self.scale_exponent),
          dtype=tf.float32,
          trainable=False,
          synchronization=tf.VariableSynchronization.ON_READ,
          aggregation=tf.compat.v1.VariableAggregation.MEAN,
          name="stable_scale_exponent")
      self.switch_to_stable_scale = tf.Variable(
          False, trainable=False, name="switch_to_stable_scale")
      self.should_update_stable_scale_exponent = tf.Variable(
          False, trainable=False, name="should_update_stable_scale_exponent")

    # Inidicator variable for initializing variables (e.g, the scale exponent
    # etc.).
    self.is_initialized = tf.Variable(
        False, trainable=False, name="is_initialized")

  def call(self, inputs, msqe_weight=None):
    """Returns a fake quantized tensor of 'inputs'.

    Args:
      inputs: A tensor to be fake quantized.
      msqe_weight: A tensor which is used in the scale optimization to weight
        the MSQE (Mean Squared Quantization Error) of individual input elements.
        Its shape is same as 'inputs' and its dtype is `float32` If None, it
        will be set by "self._get_msqe_weight" (this should be left as None
        unless you explicitly assign its value in a different way.).

    Returns:
      A tensor of fake quantized input. Its shape is same as 'inputs' and its
      dtype is `float32`.
    """
    if not self.keep_negative:
      # Quantize only positive values (e.g. relu activation).
      inputs = tf.keras.activations.relu(inputs)

    if self.use_second_moments_msqe_opt:
      return self._update_second_moments_msqe_weight(
          self._quantize(inputs, msqe_weight=msqe_weight), inputs)

    return self._quantize(inputs, msqe_weight=msqe_weight)

  def _quantize(self, inputs, msqe_weight=None):
    """Returns (fake) quantized inputs and optimizes the scaling factor.

    Args:
      inputs: A tensor to be fake quantized and used in optimizing the scaling
        factor.
      msqe_weight: A tensor or None, which is used in the MSQE optimizations.

    Returns:
      A tensor of fake quantized inputs.
    """
    # Initialize self.scale_exponent (it is initialized only once).
    self._initialize_scale_exponent(inputs)

    scale = self._get_scale(inputs, msqe_weight=msqe_weight)

    if self.use_stable_scale_exponent:
      # Only outputs the stable scale when 'self.switch_to_stable_scale' is set
      # to true, which is false by default.
      scale = self._get_stable_scale(scale)

    # Stores the scaling factors used for quantization.
    self.scale.assign(scale)

    # Perform rounding.
    inputs_rounded = self._round_quant(inputs / scale)

    # Perform clipping.
    inputs_clipped = self._clip_quant(inputs_rounded)
    inputs_quant = scale * inputs_clipped

    # Update initialization indicator.
    self.is_initialized.assign(True)

    return inputs_quant

  @tf.custom_gradient
  def _update_second_moments_msqe_weight(self, input_quantized, inputs):
    """Updates the second moments of the gradients respect to the inputs.

    Args:
      input_quantized: A tensor which is the output from 'self._quantize' method
        (fake quantized input).
      inputs: A tensor which is the input to 'self._quantize' method.

    Returns:
      'input_quantized', the upstream gradient of 'input_quantized', and the
      gradients (zeros) of 'inputs'
    """

    def grad(upstream_grad):
      """Calculates and updates the second moments of the gradients."""
      # Get a mask for clipped inputs (i.e., 1.0 for rounded inputs and
      # 0.0 for clipped inputs). self.scale is the previously used scaling
      # factors.
      clip_error_mask = self._get_clipped_inputs_mask(inputs, self.scale)
      # Calculate the second moments of the gradients respect to 'inputs' that
      # is clip_error_mask * upstream_grad.
      second_moments = clip_error_mask * upstream_grad * upstream_grad

      # Update the second moments
      _update_ema_variable(
          self.msqe_weight,
          second_moments,
          self.second_moments_ema_decay,
          self.is_initialized,
          should_update=True)
      return upstream_grad, tf.zeros_like(inputs)

    return input_quantized, grad

  @abc.abstractmethod
  def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None):
    """Returns power-of-2 scaling factors for quantization.

    Args:
      inputs: A tensor to be used to optimize the scale value.
      reduce_axes: A list of axes to be summed (averaged) over.
      msqe_weight: A tensor which is used in scale optimization to weight the
        MSQE (Mean Squared Quantization Error) of individual input elements. Its
        shape is same as 'inputs' and its dtype is `float32`.

    Returns:
      A tensor of power-of-2 scaling factors. Its shape is same as
      'self.scale_exponent' and its dtype is `float32`.
    """
    raise NotImplementedError

  @abc.abstractmethod
  def _get_init_scale_exponent(self, inputs):
    """Returns a scale exponent tensor to initialize "self.scale_exponent".

    Args:
      inputs: A tensor to be used to calculate initial scale exponent values.

    Returns:
      A tensor of scale exponent. Its shape is same as 'self.scale_exponent' and
      its dtype is `float32`.
    """
    raise NotImplementedError

  @abc.abstractmethod
  def _get_outlier_mask(self, inputs):
    """Returns a tensor to suppress outliers in the input for MSQE optimizations.

    Args:
      inputs: A tensor to be used to generate the outlier mask.

    Returns:
      A tensor to mask out the outliers of the inputs. Its shape is same as
      'inputs' and its dtype is `float32`.
    """
    raise NotImplementedError

  def _get_msqe_weight(self, inputs=None):
    """Returns weighting factors for MSQE optimizations.

    Args:
      inputs: A tensor to be used to generate the outlier mask.

    Returns:
      A tensor to be used as weighting factors for MSQE optimizations or None.
      Note: it is assumed that when 'None' is returned, no weighting factors
      will be applied for MSQE optimizations.

    Raises:
      ValueError: if 'inputs' is None when self.use_outlier_mask_msqe_weight is
      True.
    """
    if self.use_outlier_mask_msqe_weight and inputs is None:
      raise ValueError(
          f"inputs must not be None if self.use_outlier_mask_msqe_weight is"
          f" True.")

    if self.msqe_weight is None:
      # Only returns the outlier mask
      return self._get_outlier_mask(
          inputs) if self.use_outlier_mask_msqe_weight else None

    msqe_weight = self.msqe_weight

    if self.use_sqrt_of_msqe_weight:
      # To use square rooted msqe_weight
      msqe_weight = tf.math.sqrt(msqe_weight)

    if self.use_outlier_mask_msqe_weight:
      # Returns the outlier mask modulated msqe_weight
      msqe_weight = msqe_weight * self._get_outlier_mask(inputs)

    return msqe_weight

  def _get_stable_scale(self, scale):
    """Updates and returns power-of-2 'stable' scaling factors.

    It updates the exponential moving average (EMA) of the scale exponent when
    self.should_update_stable_scale_exponent is true and
    self.switch_to_stable_scale is false, and returns scaling factor based on
    the stable (EMAed) scale exponent when self.switch_to_stable_scale is set
    true else returns passed-in 'scale'.

    Args:
      scale: A tensor of power-of-2 scaling factors.

    Returns:
      A tensor of power-of-2 scaling factors.
    """
    # Freezes updating exponential moving average of self.stable_scale_exponent
    # when self.should_update_stable_scale_exponent is false or
    # self.switch_to_stable_scale is set True.
    should_update = tf.logical_and(self.should_update_stable_scale_exponent,
                                   not self.switch_to_stable_scale)
    # Update the stable (EMAed) scale exponent.
    # Note: when 'self.is_initialized' is false, 'self.stable_scale_exponent' is
    # assigned with the scale exponent of the 'scale' input otherwise it is
    # updated with exponential moving average.
    stable_scale = self._update_stable_scale_exponent(scale, should_update,
                                                      self.is_initialized)
    # Use the stable scale only when self.switch_to_stable_scale is set True.
    scale = tf.cond(self.switch_to_stable_scale, lambda: stable_scale,
                    lambda: scale)
    return scale

  def _update_stable_scale_exponent(self, scale, should_update, is_initialized):
    """Updates and returns stable (EMAed) power-of-2 scaling factors.

    It performs exponential moving average on the scale exponent, not on the
    scale itself.

    Args:
      scale: a tensor to be used to update exponential moving average of scale
        exponents.
      should_update: A bool. Whether to update exponential moving average of
        scale exponents.
      is_initialized: A bool. Whether to initialize the stable scale exponent.

    Returns:
      A tensor of (stable) power-of-2 scaling factors
    """
    scale_exponent = self._get_po2_scale_exponent(scale)
    _update_ema_variable(
        self.stable_scale_exponent,
        scale_exponent,
        ema_decay=self.stable_scale_ema_decay,
        is_initialized=is_initialized,
        should_update=should_update)
    return tf.math.pow(2.0, tf.math.rint(self.stable_scale_exponent))

  def _initialize_scale_exponent(self, inputs):
    """Initializes the scale exponent only once.

    It only initializes 'self.scale_exponent' once when there is no preset
    initial scaling factor (i.e., self.init_scale is None).

    Args:
      inputs: A tensor, where the initial scale exponent is based on.
    """
    update_cond = tf.math.logical_and(not self.is_initialized,
                                      self.init_scale is None)
    scale_exponent_to_init = tf.cond(
        update_cond,
        lambda: tf.stop_gradient(self._get_init_scale_exponent(inputs)),
        lambda: self.scale_exponent)
    self.scale_exponent.assign(scale_exponent_to_init)

  def _get_clipped_inputs_mask(self, inputs, scale):
    """Returns a tensor to mask out the clipped inputs.

    The mask has 1.0 for the rounded inputs and 0.0 for the clipped inputs.

    Args:
      inputs: A tensor to get the clipping mask from.
      scale: A tensor of the scaling factor.

    Returns:
      A tensor to mask out the clipped inputs.
    """
    inputs_rounded = tf.math.rint(inputs / scale)
    clip_error_mask = tf.math.logical_and(
        tf.less_equal(inputs_rounded, self.qp),
        tf.greater_equal(inputs_rounded, -self.qn))
    return tf.cast(clip_error_mask, tf.float32)

  def _get_scale_axis(self, input_shape):
    """Returns the scaling axis based on the input shape.

    Args:
      input_shape: a tuple of integers which is the size of the input channels.

    Returns:
      A scalar value.
    """
    if K.image_data_format() == "channels_last":
      scale_axis = (len(input_shape) - 1) if len(input_shape) else 0
    else:
      scale_axis = 1 if input_shape[0] is None else 0
    return scale_axis

  def _get_scaled_axes(self, scale_axis, input_shape):
    """Returns the axes scaled together.

    Args:
      scale_axis: an integer of the scaling axis.
      input_shape: a tuple of integers which is the size of the input channels.

    Returns:
      A list of integers.
    """
    if self.per_channel_scale:
      scaled_axes = list(range(scale_axis))
    else:
      scaled_axes = list(range(len(input_shape)))
    return scaled_axes

  def _clip_quant(self, inputs):
    """Returns clipped inputs (scale-normalized) by the quantization levels.

    Args:
      inputs: A tensor (scale-normalized input value).

    Returns:
      A tensor clipped by the quantization levels.
    """
    return tf.minimum(tf.maximum(inputs, -self.qn), self.qp)

  def _round_quant(self, inputs):
    """Returns rounded inputs using a straight-through estimator (STE).

    Args:
      inputs: A tensor to be rounded.

    Returns:
      A tensor through a straight-through estimator.
    """
    return inputs + tf.stop_gradient(-inputs + tf.math.rint(inputs))

  def _simple_quantize(self, inputs, scale, should_return_q=False):
    """Returns quantized inputs without a straight-through estimator (STE).

    Args:
      inputs: A tensor to be quantized.
      scale: A tensor of the scaling factor.
      should_return_q: if true, quantized inputs in integer will be also
        returned.

    Returns:
      A tensor of fake quantized inputs (, a tensor of quantized inputs)
    """
    inputs_rounded = tf.math.rint(inputs / scale)
    inputs_clipped = self._clip_quant(inputs_rounded)
    if should_return_q:
      return scale * inputs_clipped, inputs_clipped
    else:
      return scale * inputs_clipped

  def _get_po2_scale(self, scale):
    """Returns power-of-2 constrained scaling factors.

    Args:
      scale: A tensor to be power-of-2 constrained.

    Returns:
      A tensor (power-of-2 constrained scaling factor).
    """
    return tf.math.pow(2.0, self._get_po2_scale_exponent(scale))

  def _get_po2_scale_exponent(self, scale):
    """Returns power-of-2 constrained scale exponent.

    Args:
      scale: A tensor to get power-of-2 scale exponent from.

    Returns:
      A tensor constrained to be in integer values.
    """
    scale_exponent = tf.math.log(scale + K.epsilon()) / tf.math.log(2.0)
    return tf.round(scale_exponent)

  def _calculate_msqe(self, x, xq, reduce_axes=None, msqe_weight=None):
    """Returns the mean squared quantization error (MSQE).

    Args:
      x: a tensor of the original inputs.
      xq: a tensor of the fake quantized inputs.
      reduce_axes: A list of axes to be summed (averaged) over or None. If None,
        self.reduce_axes will be used.
      msqe_weight: A tensor or None. If None, no weighting is applied in the
        MSQE calculation.

    Returns:
      A tensor of the MSQE
    """
    if reduce_axes is None:
      reduce_axes = self.reduce_axes
    msqe = tf.math.pow(x - xq, 2.0)
    if msqe_weight is not None:
      msqe *= msqe_weight
    return tf.reduce_sum(msqe, axis=reduce_axes, keepdims=True)

  def _calculate_msqe_inputs(self,
                             inputs,
                             scale,
                             reduce_axes=None,
                             msqe_weight=None):
    """Returns the mean squared quantization error (MSQE) of the inputs.

    Args:
      inputs: a tensor to calculate the MSQE from.
      scale: a tensor to scale (quantize) the input with.
      reduce_axes: A list of axes to be summed (averaged) over or None. If None,
        self.reduce_axes will be used.
      msqe_weight: A tensor or None. If None, no weighting is applied in the
        MSQE calculation.

    Returns:
      A tensor of the MSQE
    """
    inputs_quant = self._simple_quantize(inputs, scale)
    return self._calculate_msqe(
        inputs, inputs_quant, reduce_axes=reduce_axes, msqe_weight=msqe_weight)

  def _least_squares_msqe_scale(self,
                                inputs,
                                scale,
                                reduce_axes=None,
                                msqe_weight=None,
                                num_lls_iters=3,
                                should_return_msqe=False):
    """Returns power-of-2 scaling factors from linear least squares regression.

    Args:
      inputs: a tensor to optimize the scaling factor from.
      scale: a tensor to be used as initial  quantize the input with.
      reduce_axes: A list of axes to be summed (averaged) over or None. If None,
        self.reduce_axes will be used.
      msqe_weight: A tensor or None. If None, no weighting is applied in the
        linear least squares regression.
      num_lls_iters: An integer. Number of linear least squares regression
        iterations.
      should_return_msqe: A bool. Whether to return the MSQE of the inputs.

    Returns:
      A tensor of power-of-2 scaling factors (, a tensor of the MSQE)
    """
    if reduce_axes is None:
      reduce_axes = self.reduce_axes

    best_scale = tf.identity(scale)
    xq, q = self._simple_quantize(inputs, best_scale, should_return_q=True)
    best_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)

    for _ in range(num_lls_iters):
      # performs linear least squares regression
      new_scale = _get_msqe_scale(
          x=inputs,
          q=q,
          scale_axis=self.scale_axis,
          per_channel_scale=self.per_channel_scale,
          msqe_weight=msqe_weight)
      xq, q = self._simple_quantize(inputs, new_scale, should_return_q=True)
      new_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)

      # Update the best scale and the best msqe
      best_scale = tf.where(new_msqe < best_msqe, new_scale, best_scale)
      best_msqe = tf.where(new_msqe < best_msqe, new_msqe, best_msqe)

    if should_return_msqe:
      return best_scale, best_msqe
    else:
      return best_scale

  def _line_search_msqe_scale(self,
                              inputs,
                              scale,
                              reduce_axes=None,
                              msqe_weight=None,
                              line_search_range=6,
                              should_return_msqe=False):
    """Returns power-of-2 scaling factors from line search.

    Args:
      inputs: a tensor to optimize the scaling factor from.
      scale: a tensor to be used as initial  quantize the input with.
      reduce_axes: A list of axes to be summed (averaged) over or None. If None,
        self.reduce_axes will be used.
      msqe_weight: A tensor or None. If None, no weighting is applied in the
        line search.
      line_search_range: An integer. Search range of the line search.
      should_return_msqe: A bool. Whether to return the MSQE of the inputs.

    Returns:
      A tensor of power-of-2 scaling factors (, a tensor of the MSQE)
    """
    if reduce_axes is None:
      reduce_axes = self.reduce_axes

    best_scale = tf.identity(scale)
    xq = self._simple_quantize(inputs, best_scale)
    best_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)
    best_scale_exponent = self._get_po2_scale_exponent(best_scale)

    # PO2 exponent search offsets
    end_range = line_search_range // 2 + 1
    po2_exponent_offsets = [i for i in range(-end_range+1,end_range) if i != 0]
    for exp_offset in po2_exponent_offsets:
      # Optimize scale
      new_scale = tf.math.pow(2.0, best_scale_exponent + exp_offset)
      xq = self._simple_quantize(inputs, new_scale)
      new_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight)
      # Update the best scale and msqe
      best_scale = tf.where(new_msqe < best_msqe, new_scale, best_scale)
      best_msqe = tf.where(new_msqe < best_msqe, new_msqe, best_msqe)

    if should_return_msqe:
      return best_scale, best_msqe
    else:
      return best_scale

  def _optimize_msqe_scale(self,
                           inputs,
                           scale,
                           reduce_axes=None,
                           msqe_weight=None,
                           num_lls_iters=None,
                           should_line_search=True,
                           line_search_range=None):
    """Returns optimized power-of-2 scaling factors.

    It performs an iterative linear least squares regression and an optional
    line search to find optimal power-of-2 scaling factors for the given inputs
    from the initial scaling factors ('scale').

    Args:
      inputs: a tensor to find optimal power-of-2 scaling factors for.
      scale: a tensor to be used as initial scaling factors.
      reduce_axes: A list of axes to be summed (averaged) over or None. If None,
        self.reduce_axes will be used.
      msqe_weight: A tensor or None. If None, no weighting is applied in the
        optimizations.
      num_lls_iters: An integer. Number of linear least squares regression
        iterations.
      should_line_search: A bool. Whether to perform a line search.
      line_search_range: An integer. Search range of the line search.

    Returns:
      A tensor of power-of-2 scaling factors, A tensor of the MSQE
    """
    if reduce_axes is None:
      reduce_axes = self.reduce_axes
    if num_lls_iters is None:
      num_lls_iters = self.num_lls_iters
    if line_search_range is None:
      line_search_range = self.line_search_range

    scale, msqe = self._least_squares_msqe_scale(
        inputs,
        scale,
        reduce_axes=self.reduce_axes,
        msqe_weight=msqe_weight,
        num_lls_iters=num_lls_iters,
        should_return_msqe=True)

    if should_line_search:
      scale, msqe = self._line_search_msqe_scale(
          inputs,
          scale,
          reduce_axes=self.reduce_axes,
          msqe_weight=msqe_weight,
          line_search_range=line_search_range,
          should_return_msqe=True)

    # Having an additional '_get_po2_scale' is just to make sure returning
    # scaling factors are in power-of-2.
    return self._get_po2_scale(scale), msqe

  def max(self):
    """Returns the maximum value that the quantizer can represent."""
    if hasattr(self, "is_initialized") and self.is_initialized.numpy():
      return self._get_scale() * self.qp
    else:
      return 1.0

  def min(self):
    """Returns the minimum value that the quantizer can represent."""
    if self.keep_negative:
      if hasattr(self, "is_initialized") and self.is_initialized.numpy():
        return self._get_scale() * (-self.qn)
      else:
        return -1.0
    else:
      return 0.0


class quantized_bits_learnable_po2(BaseQuantizerPO2):  # pylint: disable=invalid-name
  """Quantizes the number to a number of bits by learnable scale factors.
  For more details, see https://arxiv.org/abs/2210.03671.

  The implementation was inspired by "TRAINED QUANTIZATION THRESHOLDS FOR
  ACCURATE AND EFFICIENT FIXED-POINT INFERENCE OF DEEP NEURAL NETWORKS"
  (https://arxiv.org/pdf/1903.08066.pdf).

  Attributes:
    bits: Integer, number of bits to perform quantization.
    keep_negative: Boolean, if true, it keeps negative values and sets the
      quantization levels symmetrically around 0. If false, negative numbers is
      clipped to 0.
    scale_axis: Integer, which axis to calculate scale from.
    per_channel_scale: Boolean, whether to perform per-channel (true) or
      per-layer (false) quantization.
    init_scale: Float or None, initial scale factor to initialize the scale with
      (if None, it will be initialized based on the first inputs.).
    use_second_moments_msqe_opt: Bool, whether to use the second moments based
      MSQE optimization or not.
    second_moments_ema_decay: Float, EMA decay factor for the second moments
      update.
    use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight.
    use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask.
    use_stable_scale_exponent: Bool, whether to use exponentially moving
      averaged ("stable") scale exponent or not. Note: there is a tf.Variable
        (self.switch_to_stable_scale) that controls when to apply the stable
        scale exponent (i.e., if use_stable_scale_exponent is true and
        self.switch_to_stable_scale is false, the stable scale exponent is
        updated but not used.).
    stable_scale_ema_decay: Float, EMA decay factor for the stable scale update.
    min_init_scale: float or None. minimum initial scale value. If None, the
      initial scale value is not bounded by a minimum value. It is useful to
      prevent zero initial scale value for inputs with all zeros (e.g., bias).
    use_po2_scale_ceil: Bool, whether to use ceil function for constraining
      power-of-2 scale exponents. If false, round function is used instead.
    use_po2_scale_msqe_round: Bool, whether to use MSQE rounding function for
      constraining power-of-2 scale exponents. Note: MSQE rounding has
        precedence over ceil and round function.
  """

  def __init__(self,
               bits=4,
               keep_negative=True,
               scale_axis=None,
               per_channel_scale=False,
               init_scale=None,
               use_second_moments_msqe_opt=True,
               second_moments_ema_decay=0.999,
               use_sqrt_of_msqe_weight=True,
               use_outlier_mask_msqe_weight=True,
               use_stable_scale_exponent=False,
               stable_scale_ema_decay=0.99,
               min_init_scale=0.00001,
               use_po2_scale_ceil=True,
               use_po2_scale_msqe_round=True,
               **kwargs):

    self.min_init_scale = min_init_scale
    self.use_po2_scale_ceil = use_po2_scale_ceil
    self.use_po2_scale_msqe_round = use_po2_scale_msqe_round

    # An indicator variable to control usage of MSQE rounding function, which is
    # set to true by default (i.e, if use_po2_scale_msqe_round is true, MSQE
    # rounding is used by default based on self.switch_to_msqe_round.). It can
    # be used to delay using MSQE rounding.
    self.switch_to_msqe_round = None

    super().__init__(
        bits=bits,
        keep_negative=keep_negative,
        scale_axis=scale_axis,
        per_channel_scale=per_channel_scale,
        init_scale=init_scale,
        use_second_moments_msqe_opt=use_second_moments_msqe_opt,
        second_moments_ema_decay=second_moments_ema_decay,
        use_sqrt_of_msqe_weight=use_sqrt_of_msqe_weight,
        use_outlier_mask_msqe_weight=use_outlier_mask_msqe_weight,
        use_stable_scale_exponent=use_stable_scale_exponent,
        stable_scale_ema_decay=stable_scale_ema_decay,
        is_gradient_based=True,
        **kwargs)

  def __str__(self):
    # Convert Tensors to printable strings by converting to a numpy array and
    # then using regex to remove brackets when there is only one integer bit.
    ptn, repl = r"\[(\d)\]", r"\g<1>"
    bits = re.sub(
        ptn, repl,
        str(self.bits.numpy() if isinstance(self.bits, tf.Variable) else self
            .bits))

    flags = []
    flags.append("bits=" + str(int(bits)))
    flags.append("keep_negative=" + str(self.keep_negative))
    flags.append("scale_axis=" + str(self.scale_axis))
    flags.append("per_channel_scale=" + str(self.per_channel_scale))
    flags.append("init_scale=" + str(self.init_scale))
    flags.append("use_second_moments_msqe_opt=" +
                 str(self.use_second_moments_msqe_opt))
    flags.append("second_moments_ema_decay=" +
                 str(self.second_moments_ema_decay))
    flags.append("use_outlier_mask_msqe_weight=" +
                 str(self.use_outlier_mask_msqe_weight))
    flags.append("use_sqrt_of_msqe_weight=" + str(self.use_sqrt_of_msqe_weight))
    flags.append("use_stable_scale_exponent=" +
                 str(self.use_stable_scale_exponent))
    flags.append("stable_scale_ema_decay=" + str(self.stable_scale_ema_decay))
    flags.append("min_init_scale=" + str(self.min_init_scale))
    flags.append("use_po2_scale_ceil=" + str(self.use_po2_scale_ceil))
    flags.append("use_po2_scale_msqe_round=" +
                 str(self.use_po2_scale_msqe_round))
    return "quantized_bits_learnable_po2(" + ",".join(flags) + ")"

  def build(self, input_shape):
    """Creates and initializes variables."""
    super().build(input_shape)

    if self.use_po2_scale_msqe_round:
      self.switch_to_msqe_round = tf.Variable(
          True, trainable=False, name="switch_to_msqe_round")

  def _get_init_scale_exponent(self, inputs):
    """Returns inputs distribution based initial scale exponent values.

    Args:
      inputs: A tensor to be used to calculate initial scale exponent values.

    Returns:
      A tensor of initial scale exponent values.
    """
    std = tf.math.reduce_std(inputs, axis=self.reduce_axes, keepdims=True)
    # Uses 3 sigma percentile to get scale
    scale = 3.0 * std / tf.cast(self.qp, dtype=tf.float32)

    # Prevents zero scale values for inputs with all zeros (e.g., bias).
    if self.min_init_scale is not None:
      scale = tf.math.maximum(scale, self.min_init_scale)

    # Returns scale exponent
    return tf.math.log(scale) / tf.math.log(2.0)

  def _get_outlier_mask(self, inputs):
    """Returns a tensor to mask outliers in the input for MSQE optimizations.

    The outlier threshold is based on the (unconstrained) output dynamic range
    of the quantizer.

    Args:
      inputs: A tensor to be used to generate the outlier mask.

    Returns:
      A tensor to mask out the outliers of the inputs. Its shape is same as
      'inputs' and its dtype is `float32`.
    """
    # Calculates the output (unconstrained) dynamic range of the quantizer (i.e.
    # , self.scale_exponent is not power-of-2 constrained.).
    outlier_threshold = tf.math.pow(2.0, self.scale_exponent) * (self.qp + 0.5)
    return tf.where(
        abs(inputs) <= outlier_threshold,
        tf.ones_like(inputs, dtype=tf.float32),
        tf.zeros_like(inputs, dtype=tf.float32))

  def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None):
    """Returns power-of-2 scaling factors for quantization.

    Args:
      inputs: A tensor to be used for MSQE rounding. Note: ceil and round
        functions do not use the inputs.
      reduce_axes: A list of axes to be summed (averaged) over.
      msqe_weight: A tensor which is used in scale optimization to weight the
        MSQE (Mean Squared Quantization Error) of individual input elements. Its
        shape is same as 'inputs' and its dtype is `float32`.

    Returns:
      A tensor of power-of-2 scaling factors.
    """
    if self.use_po2_scale_ceil:
      scale_exponent = tf.math.ceil(self.scale_exponent)
    else:
      scale_exponent = tf.math.rint(self.scale_exponent)

    # MSQE rounding requires the inputs to optimize the scale exponent.
    if self.use_po2_scale_msqe_round and inputs is not None:
      scale_exponent_msqe = self.msqe_round(
          inputs=inputs,
          scale_exponent=self.scale_exponent,
          reduce_axes=reduce_axes,
          msqe_weight=msqe_weight)

      # Control when to use MSQE rounding. Note: self.switch_to_msqe_round is
      # set to true by default.
      scale_exponent = tf.cond(self.switch_to_msqe_round,
                               lambda: scale_exponent_msqe,
                               lambda: scale_exponent)

    # Apply STE
    scale_exponent = self.scale_exponent + tf.stop_gradient(scale_exponent -
                                                            self.scale_exponent)
    return tf.math.pow(2.0, scale_exponent)

  def msqe_round(self,
                 inputs,
                 scale_exponent,
                 reduce_axes=None,
                 msqe_weight=None):
    """Returns MSQE-wise optimum power-of-2 scale exponents.

    Args:
      inputs: A tensor, MSQE rounding is based on.
      scale_exponent: A tensor, learnable scale exponents which are not
        constrained in power-of-2.
      reduce_axes: A list of axes to be summed (averaged) over or None. If None,
        self.reduce_axes is used.
      msqe_weight: A tensor which is used to weight MSQE rounding or None. If
        None, a tensor (or None) from self._get_msqe_weight is used.

    Returns:
      A tensor of power-of-2 scale exponents.
    """
    if reduce_axes is None:
      reduce_axes = self.reduce_axes

    if msqe_weight is None:
      # Returned msqe_weight can be None.
      msqe_weight = self._get_msqe_weight(inputs)

    # floor
    scale_exponent_floor = tf.math.floor(scale_exponent)
    msqe_floor = self._calculate_msqe_inputs(
        inputs=inputs,
        scale=tf.math.pow(2.0, scale_exponent_floor),
        reduce_axes=reduce_axes,
        msqe_weight=msqe_weight)

    # ceil
    scale_exponent_ceil = tf.math.ceil(scale_exponent)
    msqe_ceil = self._calculate_msqe_inputs(
        inputs=inputs,
        scale=tf.math.pow(2.0, scale_exponent_ceil),
        reduce_axes=reduce_axes,
        msqe_weight=msqe_weight)

    return tf.where(msqe_floor < msqe_ceil, scale_exponent_floor,
                    scale_exponent_ceil)

  def get_config(self):
    config = {
        "bits": self.bits,
        "keep_negative": self.keep_negative,
        "scale_axis": self.scale_axis,
        "per_channel_scale": self.per_channel_scale,
        "init_scale": self.init_scale,
        "use_second_moments_msqe_opt": self.use_second_moments_msqe_opt,
        "second_moments_ema_decay": self.second_moments_ema_decay,
        "use_outlier_mask_msqe_weight": self.use_outlier_mask_msqe_weight,
        "use_sqrt_of_msqe_weight": self.use_sqrt_of_msqe_weight,
        "use_stable_scale_exponent": self.use_stable_scale_exponent,
        "stable_scale_ema_decay": self.stable_scale_ema_decay,
        "min_init_scale": self.min_init_scale,
        "use_po2_scale_ceil": self.use_po2_scale_ceil,
        "use_po2_scale_msqe_round": self.use_po2_scale_msqe_round,
        "qn": self.qn,
        "qp": self.qp,
        "scaled_axes": self.scaled_axes,
    }
    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))


class quantized_bits_msqe_po2(BaseQuantizerPO2):  # pylint: disable=invalid-name
  """Quantizes the number to a number of bits by MSQE based scaling factors.
  For more details, see https://arxiv.org/abs/2210.03671.

  Attributes:
    bits: Integer, number of bits to perform quantization.
    keep_negative: Boolean, if true, it keeps negative values and sets the
      quantization levels symmetrically around 0. If false, negative numbers is
      clipped to 0.
    scale_axis: Integer, which axis to calculate scale from.
    per_channel_scale: Boolean, whether to perform per-channel (true) or
      per-layer (false) quantization.
    init_scale: Float or None, initial scale factor to initialize the scale with
      (if None, it will be initialized based on the first inputs.).
    use_second_moments_msqe_opt: Bool, whether to use the second moments based
      MSQE optimization or not.
    second_moments_ema_decay: Float, EMA decay factor for the second moments
      update.
    use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight.
    use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask.
    use_stable_scale_exponent: Bool, whether to use exponentially moving
      averaged ("stable") scale exponent or not. Note: there is a tf.Variable
        (self.switch_to_stable_scale) that controls when to apply the stable
        scale exponent (i.e., if use_stable_scale_exponent is true and
        self.switch_to_stable_scale is false, the stable scale exponent is
        updated but not used.).
    stable_scale_ema_decay: Float, EMA decay factor for the stable scale update.
    outlier_mask_sigma: Float, sigma to apply for the outlier masking threshold.
    num_lls_iters: An integer. Number of linear least squares regression
      iterations.
    should_line_search: A bool. Whether to perform a line search.
    line_search_range: An integer. Search range of the line search.
  """

  def __init__(self,
               bits=4,
               keep_negative=True,
               scale_axis=None,
               per_channel_scale=False,
               init_scale=None,
               use_second_moments_msqe_opt=True,
               second_moments_ema_decay=0.999,
               use_sqrt_of_msqe_weight=True,
               use_outlier_mask_msqe_weight=True,
               use_stable_scale_exponent=False,
               stable_scale_ema_decay=0.99,
               outlier_mask_sigma=2.0,
               num_lls_iters=3,
               should_line_search=True,
               line_search_range=6,
               **kwargs):

    self.outlier_mask_sigma = outlier_mask_sigma
    self.num_lls_iters = num_lls_iters
    self.should_line_search = should_line_search
    self.line_search_range = line_search_range

    super().__init__(
        bits=bits,
        keep_negative=keep_negative,
        scale_axis=scale_axis,
        per_channel_scale=per_channel_scale,
        init_scale=init_scale,
        use_second_moments_msqe_opt=use_second_moments_msqe_opt,
        second_moments_ema_decay=second_moments_ema_decay,
        use_sqrt_of_msqe_weight=use_sqrt_of_msqe_weight,
        use_outlier_mask_msqe_weight=use_outlier_mask_msqe_weight,
        use_stable_scale_exponent=use_stable_scale_exponent,
        stable_scale_ema_decay=stable_scale_ema_decay,
        is_gradient_based=False,
        **kwargs)

  def __str__(self):
    # Convert Tensors to printable strings by converting to a numpy array and
    # then using regex to remove brackets when there is only one integer bit.
    ptn, repl = r"\[(\d)\]", r"\g<1>"
    bits = re.sub(
        ptn, repl,
        str(self.bits.numpy() if isinstance(self.bits, tf.Variable) else self
            .bits))

    flags = []
    flags.append("bits=" + str(int(bits)))
    flags.append("keep_negative=" + str(self.keep_negative))
    flags.append("scale_axis=" + str(self.scale_axis))
    flags.append("per_channel_scale=" + str(self.per_channel_scale))
    flags.append("init_scale=" + str(self.init_scale))
    flags.append("use_second_moments_msqe_opt=" +
                 str(self.use_second_moments_msqe_opt))
    flags.append("second_moments_ema_decay=" +
                 str(self.second_moments_ema_decay))
    flags.append("use_sqrt_of_msqe_weight=" + str(self.use_sqrt_of_msqe_weight))
    flags.append("use_outlier_mask_msqe_weight=" +
                 str(self.use_outlier_mask_msqe_weight))
    flags.append("use_stable_scale_exponent=" +
                 str(self.use_stable_scale_exponent))
    flags.append("stable_scale_ema_decay=" + str(self.stable_scale_ema_decay))
    flags.append("outlier_mask_sigma=" + str(self.outlier_mask_sigma))
    flags.append("num_lls_iters=" + str(self.num_lls_iters))
    flags.append("should_line_search=" + str(self.should_line_search))
    flags.append("line_search_range=" + str(self.line_search_range))
    return "quantized_bits_msqe_po2(" + ",".join(flags) + ")"

  def _get_init_scale_exponent(self, inputs):
    """Returns min and max of the inputs based initial scale exponent values.

    Args:
      inputs: A tensor to be used to calculate initial scale exponent values.

    Returns:
      A tensor of initial scale exponent values.
    """
    scale = K.max(
        abs(inputs), axis=self.scaled_axes, keepdims=True) / tf.cast(
            self.qp, dtype=tf.float32)
    return self._get_po2_scale_exponent(scale)

  def _get_outlier_mask(self, inputs):
    """Returns a tensor to mask outliers in the input for MSQE optimizations.

    The outlier threshold is based on the inputs distribution.

    Args:
      inputs: A tensor to be used to generate the outlier mask.

    Returns:
      A tensor to mask out the outliers of the inputs. Its shape is same as
      'inputs' and its dtype is `float32`.
    """
    std = tf.math.reduce_std(inputs, axis=self.reduce_axes, keepdims=True)
    outlier_threshold = self.outlier_mask_sigma * std
    return tf.where(
        abs(inputs) <= outlier_threshold, tf.ones_like(inputs),
        tf.zeros_like(inputs))

  def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None):
    """Returns power-of-2 scaling factors for quantization.

    Args:
      inputs: A tensor to be used to optimize the scale value.
      reduce_axes: A list of axes to be summed (averaged) over.
      msqe_weight: A tensor which is used in scale optimization to weight the
        MSQE (Mean Squared Quantization Error) of individual input elements. Its
        shape is same as 'inputs' and its dtype is `float32`.

    Returns:
      A tensor of power-of-2 scaling factors. Its shape is same as
      'self.scale_exponent' and its dtype is `float32`.
    """
    if inputs is None:
      return self._get_po2_scale(self.scale)

    if reduce_axes is None:
      reduce_axes = self.reduce_axes

    if msqe_weight is None:
      msqe_weight = self._get_msqe_weight(inputs)

    scale, _ = self._optimize_msqe_scale(
        inputs,
        tf.math.pow(2.0, tf.round(self.scale_exponent)),
        reduce_axes=reduce_axes,
        msqe_weight=msqe_weight,
        num_lls_iters=self.num_lls_iters,
        should_line_search=self.should_line_search,
        line_search_range=self.line_search_range,
    )
    self.scale_exponent.assign(self._get_po2_scale_exponent(scale))
    return scale

  def get_config(self):
    config = {
        "bits": self.bits,
        "keep_negative": self.keep_negative,
        "scale_axis": self.scale_axis,
        "per_channel_scale": self.per_channel_scale,
        "init_scale": self.init_scale,
        "use_second_moments_msqe_opt": self.use_second_moments_msqe_opt,
        "second_moments_ema_decay": self.second_moments_ema_decay,
        "use_sqrt_of_msqe_weight": self.use_sqrt_of_msqe_weight,
        "use_outlier_mask_msqe_weight": self.use_outlier_mask_msqe_weight,
        "use_stable_scale_exponent": self.use_stable_scale_exponent,
        "stable_scale_ema_decay": self.stable_scale_ema_decay,
        "outlier_mask_sigma": self.outlier_mask_sigma,
        "num_lls_iters": self.num_lls_iters,
        "should_line_search": self.should_line_search,
        "line_search_range": self.line_search_range,
        "qn": self.qn,
        "qp": self.qp,
        "scaled_axes": self.scaled_axes,
    }
    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))


================================================
FILE: qkeras/qconv2d_batchnorm.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Fold batchnormalization with previous QConv2D layers."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from six.moves import range
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

from .qconvolutional import QConv2D
from .quantizers import *
from tensorflow.python.framework import smart_cond as tf_utils
from tensorflow.python.ops import math_ops

tf.compat.v2.enable_v2_behavior()


# TODO(lishanok): Create an abstract folding parent class
class QConv2DBatchnorm(QConv2D):
  """Fold batchnormalization with a previous qconv2d layer."""

  def __init__(
      self,
      # qconv2d params
      filters,
      kernel_size,
      strides=(1, 1),
      padding="valid",
      data_format="channels_last",
      dilation_rate=(1, 1),
      activation=None,
      use_bias=True,
      kernel_initializer="he_normal",
      bias_initializer="zeros",
      kernel_regularizer=None,
      bias_regularizer=None,
      activity_regularizer=None,
      kernel_constraint=None,
      bias_constraint=None,
      kernel_quantizer=None,
      bias_quantizer=None,

      # batchnorm params
      axis=-1,
      momentum=0.99,
      epsilon=0.001,
      center=True,
      scale=True,
      beta_initializer="zeros",
      gamma_initializer="ones",
      moving_mean_initializer="zeros",
      moving_variance_initializer="ones",
      beta_regularizer=None,
      gamma_regularizer=None,
      beta_constraint=None,
      gamma_constraint=None,
      renorm=False,
      renorm_clipping=None,
      renorm_momentum=0.99,
      fused=None,
      trainable=True,
      virtual_batch_size=None,
      adjustment=None,

      # other params
      ema_freeze_delay=None,
      folding_mode="ema_stats_folding",
      **kwargs):
    """Initialize a composite layer that folds conv2d and batch normalization.

    The first group of parameters correponds to the initialization parameters
      of a qconv2d layer. check qkeras.qconvolutional.qconv2d for details.

    The 2nd group of parameters corresponds to the initialization parameters
      of a BatchNormalization layer. Check keras.layers.normalization.BatchNorma
      lizationBase for details.

    The 3rd group of parameters corresponds to the initialization parameters
      specific to this class.

      ema_freeze_delay: int. number of steps before batch normalization mv_mean
        and mv_variance will be frozen and used in the folded layer.
      folding_mode: string
        "ema_stats_folding": mimic tflite which uses the ema statistics to
          fold the kernel to suppress quantization induced jitter then performs
          the correction to have a similar effect of using the current batch
          statistics.
        "batch_stats_folding": use batch mean and variance to fold kernel first;
          after enough training steps switch to moving_mean and moving_variance
          for kernel folding.
    """

    # intialization the qconv2d part of the composite layer
    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        kernel_quantizer=kernel_quantizer,
        bias_quantizer=bias_quantizer,
        **kwargs
    )

    # initialization of batchnorm part of the composite layer
    self.batchnorm = layers.BatchNormalization(
        axis=axis, momentum=momentum, epsilon=epsilon, center=center,
        scale=scale, beta_initializer=beta_initializer,
        gamma_initializer=gamma_initializer,
        moving_mean_initializer=moving_mean_initializer,
        moving_variance_initializer=moving_variance_initializer,
        beta_regularizer=beta_regularizer,
        gamma_regularizer=gamma_regularizer,
        beta_constraint=beta_constraint, gamma_constraint=gamma_constraint,
        renorm=renorm, renorm_clipping=renorm_clipping, 
        renorm_momentum=renorm_momentum, fused=fused, trainable=trainable,
        virtual_batch_size=virtual_batch_size, adjustment=adjustment)

    self.ema_freeze_delay = ema_freeze_delay
    assert folding_mode in ["ema_stats_folding", "batch_stats_folding"]
    self.folding_mode = folding_mode

  def build(self, input_shape):
    super(QConv2DBatchnorm, self).build(input_shape)

    # self._iteration (i.e., training_steps) is initialized with -1. When
    # loading ckpt, it can load the number of training steps that have been
    # previously trainied. If start training from scratch.
    # TODO(lishanok): develop a way to count iterations outside layer
    self._iteration = tf.Variable(-1, trainable=False, name="iteration",
                                  dtype=tf.int64)

  def call(self, inputs, training=None):

    # numpy value, mark the layer is in training
    training = self.batchnorm._get_training_value(training)  # pylint: disable=protected-access

    # checking if to update batchnorm params
    if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0):
      # if ema_freeze_delay is None or a negative value, do not freeze bn stats
      bn_training = tf.cast(training, dtype=bool)
    else:
      bn_training = tf.math.logical_and(training, tf.math.less_equal(
          self._iteration, self.ema_freeze_delay))

    kernel = self.kernel

    # run conv to produce output for the following batchnorm
    conv_outputs = tf.keras.backend.conv2d(
        inputs,
        kernel,
        strides=self.strides,
        padding=self.padding,
        data_format=self.data_format,
        dilation_rate=self.dilation_rate)

    if self.use_bias:
      bias = self.bias
      conv_outputs = tf.keras.backend.bias_add(
          conv_outputs, bias, data_format=self.data_format)
    else:
      bias = 0

    _ = self.batchnorm(conv_outputs, training=bn_training)

    self._iteration.assign_add(tf_utils.smart_cond(
        training, lambda: tf.constant(1, tf.int64),
        lambda: tf.constant(0, tf.int64)))

    # calcuate mean and variance from current batch
    bn_shape = conv_outputs.shape
    ndims = len(bn_shape)
    reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis]
    keep_dims = len(self.batchnorm.axis) > 1
    mean, variance = self.batchnorm._moments(  # pylint: disable=protected-access
        math_ops.cast(conv_outputs, self.batchnorm._param_dtype),  # pylint: disable=protected-access
        reduction_axes,
        keep_dims=keep_dims)
    # get batchnorm weights
    gamma = self.batchnorm.gamma
    beta = self.batchnorm.beta
    moving_mean = self.batchnorm.moving_mean
    moving_variance = self.batchnorm.moving_variance

    if self.folding_mode == "batch_stats_folding":
      # using batch mean and variance in the initial training stage
      # after sufficient training, switch to moving mean and variance
      new_mean = tf_utils.smart_cond(
          bn_training, lambda: mean, lambda: moving_mean)
      new_variance = tf_utils.smart_cond(
          bn_training, lambda: variance, lambda: moving_variance)

      # get the inversion factor so that we replace division by multiplication
      inv = math_ops.rsqrt(new_variance + self.batchnorm.epsilon)
      if gamma is not None:
        inv *= gamma
      # fold bias with bn stats
      folded_bias = inv * (bias - new_mean) + beta

    elif self.folding_mode == "ema_stats_folding":
      # We always scale the weights with a correction factor to the long term
      # statistics prior to quantization. This ensures that there is no jitter
      # in the quantized weights due to batch to batch variation. During the
      # initial phase of training, we undo the scaling of the weights so that
      # outputs are identical to regular batch normalization. We also modify
      # the bias terms correspondingly. After sufficient training, switch from
      # using batch statistics to long term moving averages for batch
      # normalization.

      # use batch stats for calcuating bias before bn freeze, and use moving
      # stats after bn freeze
      mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)
      batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)

      if gamma is not None:
        mv_inv *= gamma
        batch_inv *= gamma
      folded_bias = tf_utils.smart_cond(
          bn_training,
          lambda: batch_inv * (bias - mean) + beta,
          lambda: mv_inv * (bias - moving_mean) + beta)
      # moving stats is always used to fold kernel in tflite; before bn freeze
      # an additional correction factor will be applied to the conv2d output
      inv = mv_inv
    else:
      assert ValueError

    # wrap conv kernel with bn parameters
    folded_kernel = inv * kernel
    # quantize the folded kernel
    if self.kernel_quantizer is not None:
      q_folded_kernel = self.kernel_quantizer_internal(folded_kernel)
    else:
      q_folded_kernel = folded_kernel

    # If loaded from a ckpt, bias_quantizer is the ckpt value
    # Else if bias_quantizer not specified, bias
    #   quantizer is None and we need to calculate bias quantizer
    #   type according to accumulator type. User can call
    #   bn_folding_utils.populate_bias_quantizer_from_accumulator(
    #      model, input_quantizer_list]) to populate such bias quantizer.
    if self.bias_quantizer_internal is not None:
      q_folded_bias = self.bias_quantizer_internal(folded_bias)
    else:
      q_folded_bias = folded_bias

    applied_kernel = q_folded_kernel
    applied_bias = q_folded_bias

    # calculate conv2d output using the quantized folded kernel
    folded_outputs = tf.keras.backend.conv2d(
        inputs,
        applied_kernel,
        strides=self.strides,
        padding=self.padding,
        data_format=self.data_format,
        dilation_rate=self.dilation_rate)
    if training is True and self.folding_mode == "ema_stats_folding":
      batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)
      y_corr = tf_utils.smart_cond(
          bn_training,
          lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) *
                   math_ops.rsqrt(variance + self.batchnorm.epsilon)),
          lambda: tf.constant(1.0, shape=moving_variance.shape))
      folded_outputs = math_ops.mul(folded_outputs, y_corr)

    folded_outputs = tf.keras.backend.bias_add(
        folded_outputs,
        applied_bias,
        data_format=self.data_format)
    if self.activation is not None:
      return self.activation(folded_outputs)

    return folded_outputs

  def get_config(self):
    base_config = super().get_config()
    bn_config = self.batchnorm.get_config()
    config = {"ema_freeze_delay": self.ema_freeze_delay,
              "folding_mode": self.folding_mode}
    name = base_config["name"]
    out_config = dict(
        list(base_config.items())
        + list(bn_config.items()) + list(config.items()))

    # names from different config override each other; use the base layer name
    # as the this layer's config name
    out_config["name"] = name
    return out_config

  def get_quantization_config(self):
    return {
        "kernel_quantizer": str(self.kernel_quantizer_internal),
        "bias_quantizer": str(self.bias_quantizer_internal),
        "activation": str(self.activation),
        "filters": str(self.filters)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_folded_weights(self):
    """Function to get the batchnorm folded weights.

    This function converts the weights by folding batchnorm parameters into
    the weight of QConv2D. The high-level equation:

    W_fold = gamma * W / sqrt(variance + epsilon)
    bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta
    """

    kernel = self.kernel
    if self.use_bias:
      bias = self.bias
    else:
      bias = 0

    # get batchnorm weights and moving stats
    gamma = self.batchnorm.gamma
    beta = self.batchnorm.beta
    moving_mean = self.batchnorm.moving_mean
    moving_variance = self.batchnorm.moving_variance
    # get the inversion factor so that we replace division by multiplication
    inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)
    if gamma is not None:
      inv *= gamma

    # wrap conv kernel and bias with bn parameters
    folded_kernel = inv * kernel
    folded_bias = inv * (bias - moving_mean) + beta

    return [folded_kernel, folded_bias]


================================================
FILE: qkeras/qconvolutional.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import warnings

import numpy as np
import tensorflow as tf
from tensorflow.keras import constraints
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import DepthwiseConv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import InputSpec
from tensorflow.keras.layers import SeparableConv1D
from tensorflow.keras.layers import SeparableConv2D

from .qlayers import get_auto_range_constraint_initializer
from .qlayers import QActivation
from .quantizers import get_quantized_initializer
from .quantizers import get_quantizer
from tensorflow.python.eager import context
from tensorflow.python.ops import array_ops
# from tensorflow.python.ops import array_ops
from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer


def deconv_output_length(
    input_length,
    filter_size,
    padding,
    output_padding=None,
    stride=0,
    dilation=1,
):
  """Determines output length of a transposed convolution given input length.

  Args:
      input_length: Integer.
      filter_size: Integer.
      padding: one of `"same"`, `"valid"`, `"full"`.
      output_padding: Integer, amount of padding along the output dimension.
        Can be set to `None` in which case the output length is inferred.
      stride: Integer.
      dilation: Integer.

  Returns:
      The output length (integer).
  """
  assert padding in {"same", "valid", "full"}
  if input_length is None:
    return None

  # Get the dilated kernel size
  filter_size = filter_size + (filter_size - 1) * (dilation - 1)
  pad = 0
  length = 0

  # Infer length if output padding is None, else compute the exact length
  if output_padding is None:
    if padding == "valid":
      length = input_length * stride + max(filter_size - stride, 0)
    elif padding == "full":
      length = input_length * stride - (stride + filter_size - 2)
    elif padding == "same":
      length = input_length * stride
  else:
    if padding == "same":
      pad = filter_size // 2
    elif padding == "valid":
      pad = 0
    elif padding == "full":
      pad = filter_size - 1

    length = (
        (input_length - 1) * stride + filter_size - 2 * pad + output_padding
    )
  return length


class QConv1D(Conv1D, PrunableLayer):
  """1D convolution layer (e.g. spatial convolution over images)."""

  # most of these parameters follow the implementation of Conv1D in Keras,
  # with the exception of kernel_range, bias_range, kernel_quantizer
  # and bias_quantizer, and kernel_initializer.
  #
  # kernel_quantizer: quantizer function/class for kernel
  # bias_quantizer: quantizer function/class for bias
  # kernel_range/bias_ranger: for quantizer functions whose values
  #   can go over [-1,+1], these values are used to set the clipping
  #   value of kernels and biases, respectively, instead of using the
  #   constraints specified by the user.
  #
  # we refer the reader to the documentation of Conv1D in Keras for the
  # other parameters.
  #

  def __init__(self,
               filters,
               kernel_size,
               strides=1,
               padding="valid",
               dilation_rate=1,
               activation=None,
               use_bias=True,
               kernel_initializer="he_normal",
               bias_initializer="zeros",
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               bias_quantizer=None,
               kernel_range=None,
               bias_range=None,
               **kwargs):

    if kernel_range is not None:
      warnings.warn("kernel_range is deprecated in QConv1D layer.")

    if bias_range is not None:
      warnings.warn("bias_range is deprecated in QConv1D layer.")

    self.kernel_range = kernel_range
    self.bias_range = bias_range

    self.kernel_quantizer = kernel_quantizer
    self.bias_quantizer = bias_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.kernel_quantizer_internal, self.bias_quantizer_internal
    ]

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))
    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        **kwargs
    )

  def call(self, inputs):
    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel

    outputs = tf.keras.backend.conv1d(
        inputs,
        quantized_kernel,
        strides=self.strides[0],
        padding=self.padding,
        data_format=self.data_format,
        dilation_rate=self.dilation_rate[0])

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      outputs = tf.keras.backend.bias_add(
          outputs, quantized_bias, data_format=self.data_format)

    if self.activation is not None:
      return self.activation(outputs)
    return outputs

  def get_config(self):
    config = {
        "kernel_quantizer": constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        "kernel_range": self.kernel_range,
        "bias_range": self.bias_range,
    }
    base_config = super(QConv1D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "kernel_quantizer":
            str(self.kernel_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "activation":
            str(self.activation),
        "filters" : str(self.filters)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.kernel]


class QConv2D(Conv2D, PrunableLayer):
  """2D convolution layer (e.g. spatial convolution over images)."""

  # most of these parameters follow the implementation of Conv2D in Keras,
  # with the exception of kernel_range, bias_range, kernel_quantizer
  # and bias_quantizer, and kernel_initializer.
  #
  # kernel_quantizer: quantizer function/class for kernel
  # bias_quantizer: quantizer function/class for bias
  # kernel_range/bias_ranger: for quantizer functions whose values
  #   can go over [-1,+1], these values are used to set the clipping
  #   value of kernels and biases, respectively, instead of using the
  #   constraints specified by the user.
  # mask: Optional mask for kernel weights.
  #
  # we refer the reader to the documentation of Conv2D in Keras for the
  # other parameters.
  #

  def __init__(
      self,
      filters,
      kernel_size,
      strides=(1, 1),
      padding="valid",
      data_format="channels_last",
      dilation_rate=(1, 1),
      activation=None,
      use_bias=True,
      kernel_initializer="he_normal",
      bias_initializer="zeros",
      kernel_regularizer=None,
      bias_regularizer=None,
      activity_regularizer=None,
      kernel_constraint=None,
      bias_constraint=None,
      kernel_range=None,
      bias_range=None,
      kernel_quantizer=None,
      bias_quantizer=None,
      mask=None,
      **kwargs,
  ):

    if kernel_range is not None:
      warnings.warn("kernel_range is deprecated in QConv2D layer.")

    if bias_range is not None:
      warnings.warn("bias_range is deprecated in QConv2D layer.")

    self.kernel_range = kernel_range
    self.bias_range = bias_range

    self.kernel_quantizer = kernel_quantizer
    self.bias_quantizer = bias_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.kernel_quantizer_internal, self.bias_quantizer_internal
    ]

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    if mask is not None:
      shape = mask.shape
      if len(shape) < 2:
        raise ValueError(
            "Expected shape to have rank at least 2 but provided shape has"
            f" rank {len(shape)}"
        )
      h, w = shape[0], shape[1]
      self._mask = np.reshape(
          mask, (h, w, 1, 1)
      )  # Extend the dimension to be 4D.
    else:
      self._mask = None

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        **kwargs
    )

  def convolution_op(self, inputs, kernel):
    return tf.keras.backend.conv2d(
        inputs,
        kernel,
        strides=self.strides,
        padding=self.padding,
        data_format=self.data_format,
        dilation_rate=self.dilation_rate,
    )

  @tf.function(jit_compile=True)
  def _jit_compiled_convolution_op(self, inputs, kernel):
    return self.convolution_op(inputs, kernel)

  def call(self, inputs):
    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel

    if self._mask is not None:
      # Apply mask to kernel weights if one is provided.
      quantized_kernel = quantized_kernel * self._mask

    # Grouped convolutions are not fully supported on the CPU for compiled
    # functions.
    #
    # This is a workaround taken from TF's core library. Remove when proper
    # support is added.
    # See definition of function "_jit_compiled_convolution_op" at
    # cs/third_party/py/tf_keras/layers/convolutional/base_conv.py for more
    # details.
    if self.groups > 1:
      outputs = self._jit_compiled_convolution_op(
          inputs, tf.convert_to_tensor(quantized_kernel)
      )
    else:
      outputs = self.convolution_op(inputs, quantized_kernel)

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      outputs = tf.keras.backend.bias_add(
          outputs, quantized_bias, data_format=self.data_format
      )

    if self.activation is not None:
      return self.activation(outputs)
    return outputs

  def get_config(self):
    config = {
        "kernel_quantizer": constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        "kernel_range": self.kernel_range,
        "bias_range": self.bias_range,
        "mask": self._mask.tolist() if self._mask is not None else None,
    }
    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))

  @classmethod
  def from_config(cls, config):
    mask = config.get("mask")
    if mask is not None:
      mask = np.array(mask)
    config["mask"] = mask
    return cls(**config)

  def get_quantization_config(self):
    return {
        "kernel_quantizer":
            str(self.kernel_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "activation":
            str(self.activation),
        "filters" : str(self.filters)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.kernel]


class QConv2DTranspose(Conv2DTranspose, PrunableLayer):
  """2D convolution layer (e.g. spatial convolution over images)."""

  # most of these parameters follow the implementation of Conv2DTranspose
  # in Keras, with the exception of kernel_quantizer and bias_quantizer
  # and kernel_initializer.
  #
  # kernel_quantizer: quantizer function/class for kernel
  # bias_quantizer: quantizer function/class for bias
  #
  # we refer the reader to the documentation of Conv2DTranspose in Keras for
  # the other parameters.
  #

  def __init__(self,
               filters,
               kernel_size,
               strides=(1, 1),
               padding='valid',
               output_padding=None,
               data_format=None,
               dilation_rate=(1, 1),
               activation=None,
               use_bias=True,
               kernel_initializer='glorot_uniform',
               bias_initializer='zeros',
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               bias_quantizer=None,
               **kwargs):

    self.kernel_quantizer = kernel_quantizer
    self.bias_quantizer = bias_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.kernel_quantizer_internal, self.bias_quantizer_internal
    ]

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        output_padding=None,
        data_format=data_format,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        **kwargs
    )

  def call(self, inputs):
    inputs_shape = array_ops.shape(inputs)
    batch_size = inputs_shape[0]
    if self.data_format == 'channels_first':
      h_axis, w_axis = 2, 3
    else:
      h_axis, w_axis = 1, 2

    height, width = inputs_shape[h_axis], inputs_shape[w_axis]
    kernel_h, kernel_w = self.kernel_size
    stride_h, stride_w = self.strides

    if self.output_padding is None:
      out_pad_h = out_pad_w = None
    else:
      out_pad_h, out_pad_w = self.output_padding

    # Infer the dynamic output shape:
    out_height = deconv_output_length(height,
                                      kernel_h,
                                      padding=self.padding,
                                      output_padding=out_pad_h,
                                      stride=stride_h,
                                      dilation=self.dilation_rate[0])
    out_width = deconv_output_length(width,
                                     kernel_w,
                                     padding=self.padding,
                                     output_padding=out_pad_w,
                                     stride=stride_w,
                                     dilation=self.dilation_rate[1])
    if self.data_format == 'channels_first':
      output_shape = (batch_size, self.filters, out_height, out_width)
    else:
      output_shape = (batch_size, out_height, out_width, self.filters)

    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel

    output_shape_tensor = array_ops.stack(output_shape)
    outputs = tf.keras.backend.conv2d_transpose(
        inputs,
        quantized_kernel,
        output_shape_tensor,
        strides=self.strides,
        padding=self.padding,
        data_format=self.data_format,
        dilation_rate=self.dilation_rate)

    if not context.executing_eagerly():
      # Infer the static output shape:
      out_shape = self.compute_output_shape(inputs.shape)
      outputs.set_shape(out_shape)

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      outputs = tf.keras.backend.bias_add(
          outputs,
          quantized_bias,
          data_format=self.data_format)

    if self.activation is not None:
      return self.activation(outputs)
    return outputs

  def get_config(self):
    config = {
        "kernel_quantizer": constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QConv2DTranspose, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.kernel]


class QSeparableConv1D(SeparableConv1D, PrunableLayer):
  """Depthwise separable 1D convolution."""

  # most of these parameters follow the implementation of SeparableConv1D
  # in Keras, with the exception of depthwise_quantizer, pointwise_quantizer
  # and bias_quantizer.
  #
  # depthwise_quantizer: quantizer function/class for depthwise spatial kernel
  # pointwise_quantizer: quantizer function/class for pointwise kernel
  # bias_quantizer: quantizer function/class for bias
  #
  # we refer the reader to the documentation of SeparableConv1D in Keras for
  # the other parameters.
  #

  def __init__(self,
               filters,
               kernel_size,
               strides=1,
               padding='valid',
               data_format=None,
               dilation_rate=1,
               depth_multiplier=1,
               activation=None,
               use_bias=True,
               depthwise_initializer='glorot_uniform',
               pointwise_initializer='glorot_uniform',
               bias_initializer='zeros',
               depthwise_regularizer=None,
               pointwise_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               depthwise_constraint=None,
               pointwise_constraint=None,
               bias_constraint=None,
               depthwise_quantizer=None,
               pointwise_quantizer=None,
               bias_quantizer=None,
               **kwargs):

    self.depthwise_quantizer = depthwise_quantizer
    self.pointwise_quantizer = pointwise_quantizer
    self.bias_quantizer = bias_quantizer

    self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer)
    self.pointwise_quantizer_internal = get_quantizer(self.pointwise_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.depthwise_quantizer_internal, "_set_trainable_parameter"):
      self.depthwise_quantizer_internal._set_trainable_parameter()

    if hasattr(self.pointwise_quantizer_internal, "_set_trainable_parameter"):
      self.pointwise_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.depthwise_quantizer_internal, self.pointwise_quantizer_internal,
        self.bias_quantizer_internal
    ]

    depthwise_constraint, depthwise_initializer = (
        get_auto_range_constraint_initializer(self.depthwise_quantizer_internal,
                                              depthwise_constraint,
                                              depthwise_initializer))

    pointwise_constraint, pointwise_initializer = (
        get_auto_range_constraint_initializer(self.pointwise_quantizer_internal,
                                              pointwise_constraint,
                                              pointwise_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        depth_multiplier=depth_multiplier,
        activation=activation,
        use_bias=use_bias,
        depthwise_initializer=initializers.get(depthwise_initializer),
        pointwise_initializer=initializers.get(pointwise_initializer),
        bias_initializer=initializers.get(bias_initializer),
        depthwise_regularizer=regularizers.get(depthwise_regularizer),
        pointwise_regularizer=regularizers.get(pointwise_regularizer),
        bias_regularizer=regularizers.get(bias_regularizer),
        activity_regularizer=regularizers.get(activity_regularizer),
        depthwise_constraint=constraints.get(depthwise_constraint),
        pointwise_constraint=constraints.get(pointwise_constraint),
        bias_constraint=constraints.get(bias_constraint),
        **kwargs
    )

  def call(self, inputs):
    if self.padding == 'causal':
      inputs = array_ops.pad(inputs, self._compute_causal_padding())

    spatial_start_dim = 1 if self.data_format == 'channels_last' else 2

    # Explicitly broadcast inputs and kernels to 4D.
    inputs = array_ops.expand_dims(inputs, spatial_start_dim)
    depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0)
    pointwise_kernel = array_ops.expand_dims(self.pointwise_kernel, 0)
    dilation_rate = (1,) + self.dilation_rate

    if self.padding == 'causal':
      op_padding = 'valid'
    else:
      op_padding = self.padding

    if self.depthwise_quantizer:
      quantized_depthwise_kernel = self.depthwise_quantizer_internal(
          depthwise_kernel)
    else:
      quantized_depthwise_kernel = depthwise_kernel

    if self.pointwise_quantizer:
      quantized_pointwise_kernel = self.pointwise_quantizer_internal(
          pointwise_kernel)
    else:
      quantized_pointwise_kernel = pointwise_kernel

    outputs = tf.keras.backend.separable_conv2d(
        inputs,
        quantized_depthwise_kernel,
        quantized_pointwise_kernel,
        strides=self.strides * 2,
        padding=op_padding,
        dilation_rate=dilation_rate,
        data_format=self.data_format)

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      outputs = tf.keras.backend.bias_add(
          outputs,
          quantized_bias,
          data_format=self.data_format)

    outputs = array_ops.squeeze(outputs, [spatial_start_dim])

    if self.activation is not None:
      return self.activation(outputs)
    return outputs

  def get_config(self):
    config = {
        "depthwise_quantizer": constraints.serialize(
            self.depthwise_quantizer_internal# Google internal code, commented out by copybara
        ),
        "pointwise_quantizer": constraints.serialize(
            self.pointwise_quantizer_internal# Google internal code, commented out by copybara
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QSeparableConv1D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.depthwise_kernel, self.pointwise_kernel]


class QSeparableConv2D(SeparableConv2D, PrunableLayer):
  """Depthwise separable 2D convolution."""

  # most of these parameters follow the implementation of SeparableConv2D
  # in Keras, with the exception of depthwise_quantizer, pointwise_quantizer
  # and bias_quantizer.
  #
  # depthwise_quantizer: quantizer function/class for depthwise spatial kernel
  # pointwise_quantizer: quantizer function/class for pointwise kernel
  # bias_quantizer: quantizer function/class for bias
  #
  # we refer the reader to the documentation of SeparableConv2D in Keras for
  # the other parameters.
  #

  def __init__(self,
               filters,
               kernel_size,
               strides=(1, 1),
               padding='valid',
               data_format=None,
               dilation_rate=(1, 1),
               depth_multiplier=1,
               activation=None,
               use_bias=True,
               depthwise_initializer='glorot_uniform',
               pointwise_initializer='glorot_uniform',
               bias_initializer='zeros',
               depthwise_regularizer=None,
               pointwise_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               depthwise_constraint=None,
               pointwise_constraint=None,
               bias_constraint=None,
               depthwise_quantizer=None,
               pointwise_quantizer=None,
               bias_quantizer=None,
               **kwargs):

    self.depthwise_quantizer = depthwise_quantizer
    self.pointwise_quantizer = pointwise_quantizer
    self.bias_quantizer = bias_quantizer

    self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer)
    self.pointwise_quantizer_internal = get_quantizer(self.pointwise_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.depthwise_quantizer_internal, "_set_trainable_parameter"):
      self.depthwise_quantizer_internal._set_trainable_parameter()

    if hasattr(self.pointwise_quantizer_internal, "_set_trainable_parameter"):
      self.pointwise_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.depthwise_quantizer_internal, self.pointwise_quantizer_internal,
        self.bias_quantizer_internal
    ]

    depthwise_constraint, depthwise_initializer = (
        get_auto_range_constraint_initializer(self.depthwise_quantizer_internal,
                                              depthwise_constraint,
                                              depthwise_initializer))

    pointwise_constraint, pointwise_initializer = (
        get_auto_range_constraint_initializer(self.pointwise_quantizer_internal,
                                              pointwise_constraint,
                                              pointwise_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        depth_multiplier=depth_multiplier,
        activation=activation,
        use_bias=use_bias,
        depthwise_initializer=initializers.get(depthwise_initializer),
        pointwise_initializer=initializers.get(pointwise_initializer),
        bias_initializer=initializers.get(bias_initializer),
        depthwise_regularizer=regularizers.get(depthwise_regularizer),
        pointwise_regularizer=regularizers.get(pointwise_regularizer),
        bias_regularizer=regularizers.get(bias_regularizer),
        activity_regularizer=regularizers.get(activity_regularizer),
        depthwise_constraint=constraints.get(depthwise_constraint),
        pointwise_constraint=constraints.get(pointwise_constraint),
        bias_constraint=constraints.get(bias_constraint),
        **kwargs
    )

  def call(self, inputs):
    # Apply the actual ops.
    if self.depthwise_quantizer:
      quantized_depthwise_kernel = self.depthwise_quantizer_internal(
          self.depthwise_kernel)
    else:
      quantized_depthwise_kernel = self.depthwise_kernel

    if self.pointwise_quantizer:
      quantized_pointwise_kernel = self.pointwise_quantizer_internal(
          self.pointwise_kernel)
    else:
      quantized_pointwise_kernel = self.pointwise_kernel

    outputs = tf.keras.backend.separable_conv2d(
        inputs,
        quantized_depthwise_kernel,
        quantized_pointwise_kernel,
        strides=self.strides,
        padding=self.padding,
        dilation_rate=self.dilation_rate,
        data_format=self.data_format)

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      outputs = tf.keras.backend.bias_add(
          outputs,
          quantized_bias,
          data_format=self.data_format)

    if self.activation is not None:
      return self.activation(outputs)
    return outputs

  def get_config(self):
    config = {
        "depthwise_quantizer": constraints.serialize(
            self.depthwise_quantizer_internal# Google internal code, commented out by copybara
        ),
        "pointwise_quantizer": constraints.serialize(
            self.pointwise_quantizer_internal# Google internal code, commented out by copybara
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QSeparableConv2D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.depthwise_kernel, self.pointwise_kernel]


class QDepthwiseConv2D(DepthwiseConv2D, PrunableLayer):
  """Creates quantized depthwise conv2d. Copied from mobilenet."""

  # most of these parameters follow the implementation of DepthwiseConv2D
  # in Keras, # with the exception of depthwise_range, bias_range,
  # depthwise_quantizer # and bias_quantizer, and kernel_initializer.
  #
  # depthwise_quantizer: quantizer function/class for kernel
  # bias_quantizer: quantizer function/class for bias
  # depthwise_range/bias_ranger: for quantizer functions whose values
  #   can go over [-1,+1], these values are used to set the clipping
  #   value of kernels and biases, respectively, instead of using the
  #   constraints specified by the user.
  #
  # we refer the reader to the documentation of DepthwiseConv2D in Keras for the
  # other parameters.
  #

  def __init__(self,
               kernel_size,
               strides=(1, 1),
               padding="VALID",
               depth_multiplier=1,
               data_format=None,
               activation=None,
               use_bias=True,
               depthwise_initializer="he_normal",
               bias_initializer="zeros",
               depthwise_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               depthwise_constraint=None,
               bias_constraint=None,
               dilation_rate=(1, 1),
               depthwise_quantizer=None,
               bias_quantizer=None,
               depthwise_range=None,
               bias_range=None,
               **kwargs):

    if depthwise_range is not None:
      warnings.warn("depthwise_range is deprecated in QDepthwiseConv2D layer.")

    if bias_range is not None:
      warnings.warn("bias_range is deprecated in QDepthwiseConv2D layer.")

    self.depthwise_range = depthwise_range
    self.bias_range = bias_range

    self.depthwise_quantizer = depthwise_quantizer
    self.bias_quantizer = bias_quantizer

    self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.depthwise_quantizer_internal, "_set_trainable_parameter"):
      self.depthwise_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.depthwise_quantizer_internal, self.bias_quantizer_internal
    ]

    depthwise_constraint, depthwise_initializer = (
        get_auto_range_constraint_initializer(self.depthwise_quantizer_internal,
                                              depthwise_constraint,
                                              depthwise_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))
    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        activation=activation,
        use_bias=use_bias,
        depthwise_regularizer=depthwise_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        depth_multiplier=depth_multiplier,
        depthwise_initializer=depthwise_initializer,
        bias_initializer=bias_initializer,
        depthwise_constraint=depthwise_constraint,
        bias_constraint=bias_constraint,
        dilation_rate=dilation_rate,
        **kwargs
    )

  def build(self, input_shape):
    if len(input_shape) < 4:
      raise ValueError(
          "Inputs to `QDepthwiseConv2D` should have rank 4. "
          "Received input shape:", str(input_shape))
    if self.data_format == "channels_first":
      channel_axis = 1
    else:
      channel_axis = 3
    if input_shape[channel_axis] is None:
      raise ValueError("The channel dimension of the inputs to "
                       "`QDepthwiseConv2D` "
                       "should be defined. Found `None`.")
    input_dim = int(input_shape[channel_axis])
    depthwise_kernel_shape = (self.kernel_size[0], self.kernel_size[1],
                              input_dim, self.depth_multiplier)

    self.depthwise_kernel = self.add_weight(
        shape=depthwise_kernel_shape,
        initializer=self.depthwise_initializer,
        name="depthwise_kernel",
        regularizer=self.depthwise_regularizer,
        constraint=self.depthwise_constraint)

    if self.use_bias:
      self.bias = self.add_weight(
          shape=(input_dim * self.depth_multiplier,),
          initializer=self.bias_initializer,
          name="bias",
          regularizer=self.bias_regularizer,
          constraint=self.bias_constraint)
    else:
      self.bias = None
    # Set input spec.
    self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
    self.built = True

  def call(self, inputs, training=None):
    if self.depthwise_quantizer:
      quantized_depthwise_kernel = (
          self.depthwise_quantizer_internal(self.depthwise_kernel))
    else:
      quantized_depthwise_kernel = self.depthwise_kernel
    outputs = tf.keras.backend.depthwise_conv2d(
        inputs,
        quantized_depthwise_kernel,
        strides=self.strides,
        padding=self.padding,
        dilation_rate=self.dilation_rate,
        data_format=self.data_format)

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias
      outputs = tf.keras.backend.bias_add(
          outputs, quantized_bias, data_format=self.data_format)

    if self.activation is not None:
      return self.activation(outputs)

    return outputs

  def get_config(self):
    config = super(QDepthwiseConv2D, self).get_config()
    config.pop("filters", None)
    config.pop("kernel_initializer", None)
    config.pop("kernel_regularizer", None)
    config.pop("kernel_constraint", None)
    config["depth_multiplier"] = self.depth_multiplier
    config["depthwise_initializer"] = initializers.serialize(
        self.depthwise_initializer# Google internal code, commented out by copybara
    )
    config["depthwise_regularizer"] = regularizers.serialize(
        self.depthwise_regularizer# Google internal code, commented out by copybara
    )
    config["depthwise_constraint"] = constraints.serialize(
        self.depthwise_constraint# Google internal code, commented out by copybara
    )
    config["depthwise_quantizer"] = constraints.serialize(
        self.depthwise_quantizer_internal# Google internal code, commented out by copybara
    )
    config["bias_quantizer"] = constraints.serialize(
        self.bias_quantizer_internal# Google internal code, commented out by copybara
    )
    config["depthwise_range"] = self.depthwise_range
    config["bias_range"] = self.bias_range
    return config

  def get_quantization_config(self):
    return {
        "depthwise_quantizer_internal":
            str(self.depthwise_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "activation":
            str(self.activation),
        "filters" : str(self.filters)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.depthwise_kernel]


def QMobileNetSeparableConv2D(
    filters,  # pylint: disable=invalid-name
    kernel_size,
    strides=(1, 1),
    padding="VALID",
    dilation_rate=(1, 1),
    depth_multiplier=1,
    activation=None,
    use_bias=True,
    depthwise_initializer="he_normal",
    pointwise_initializer="he_normal",
    bias_initializer="zeros",
    depthwise_regularizer=None,
    pointwise_regularizer=None,
    bias_regularizer=None,
    activity_regularizer=None,
    depthwise_constraint=None,
    pointwise_constraint=None,
    bias_constraint=None,
    depthwise_quantizer=None,
    pointwise_quantizer=None,
    bias_quantizer=None,
    depthwise_activation=None,
    depthwise_range=None,
    pointwise_range=None,
    bias_range=None,
    depthwise_dropout_rate=0.0,
    pw_first=False,
    name=""):
  """Adds a quantized separableconv2d."""

  # we use here a modified version that appeared in mobilenet that adds
  # quantization to the network, and possibly an intermediate activation
  # layer that acts as a quantizer and possible dropout layer between
  # the depthwise and pointwise convolutions.
  #
  # since this implementation expands into depthwise -> pointwise
  # convolutions, the users will not see a separable convolution operation
  # in model.summary(), but rather a depthwise convolution followed by a
  # pointwise convolution.
  #
  # depthwise_quantizer: depthwise quantization function
  # pointwise_quantizer: pointwise quantization function
  # bias_quantizer: bias quantization function for the pointwise convolution
  # depthwise_range/pointwise_range/bias_range: ranges to be used if
  # quantization values can become greater than -1 and +1.
  # depthwise_dropout_rate: dropout between depthwise and pointwise is added
  #   if rate > 0.0
  # pw_first: this may disappear in the future, but as deep quantized networks
  #   sometimes behave in different ways, if we are using binary or ternary
  #   quantization, it may be better to apply pointwise before depthwise.
  #
  # For the remaining parameters, please refer to Keras implementation of
  # SeparableConv2D.
  #

  def _call(inputs):  # pylint: disable=invalid-name
    """Internally builds qseparableconv2d."""

    x = inputs

    if pw_first:
      x = QConv2D(
          filters, (1, 1),
          strides=(1, 1),
          padding="same",
          use_bias=use_bias,
          kernel_constraint=pointwise_constraint,
          kernel_initializer=pointwise_initializer,
          kernel_regularizer=pointwise_regularizer,
          kernel_quantizer=pointwise_quantizer,
          bias_quantizer=bias_quantizer,
          bias_regularizer=bias_regularizer,
          bias_initializer=bias_initializer,
          bias_constraint=bias_constraint,
          activity_regularizer=activity_regularizer,
          kernel_range=pointwise_range,
          bias_range=bias_range,
          name=name + "_pw")(
              x)

      if depthwise_activation:
        if isinstance(depthwise_activation, QActivation):
          x = depthwise_activation(x)
        else:
          x = QActivation(depthwise_activation, name=name + "_dw_act")(x)

      if depthwise_dropout_rate > 0.0:
        x = Dropout(rate=depthwise_dropout_rate, name=name + "_dw_dropout")(x)

    x = QDepthwiseConv2D(
        kernel_size,
        strides=strides,
        dilation_rate=dilation_rate,
        padding=padding,
        depth_multiplier=depth_multiplier,
        use_bias=False,
        depthwise_regularizer=depthwise_regularizer,
        depthwise_initializer=depthwise_initializer,
        depthwise_constraint=depthwise_constraint,
        depthwise_quantizer=depthwise_quantizer,
        depthwise_range=depthwise_range,
        name=name + "_dw")(
            x)

    if not pw_first:
      if depthwise_activation:
        if isinstance(depthwise_activation, QActivation):
          x = depthwise_activation(x)
        else:
          x = QActivation(depthwise_activation, name=name + "_dw_act")(x)

      if depthwise_dropout_rate > 0.0:
        x = Dropout(rate=depthwise_dropout_rate, name=name + "_dw_dropout")(x)

      x = QConv2D(
          filters, (1, 1),
          strides=(1, 1),
          padding="same",
          use_bias=use_bias,
          kernel_constraint=pointwise_constraint,
          kernel_initializer=pointwise_initializer,
          kernel_regularizer=pointwise_regularizer,
          kernel_quantizer=pointwise_quantizer,
          bias_quantizer=bias_quantizer,
          bias_regularizer=bias_regularizer,
          bias_initializer=bias_initializer,
          bias_constraint=bias_constraint,
          activity_regularizer=activity_regularizer,
          kernel_range=pointwise_range,
          bias_range=bias_range,
          name=name + "_pw")(
              x)

    if activation:
      if isinstance(activation, QActivation):
        x = activation(x)
      else:
        x = Activation(activation, name=name + "_pw_act")(x)
    return x

  return _call


================================================
FILE: qkeras/qdepthwise_conv2d_transpose.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================


import tensorflow as tf
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import InputSpec

from .qconvolutional import deconv_output_length
from .quantizers import get_quantizer
from tensorflow.python.eager import context
from tensorflow.python.keras import constraints
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import array_ops


# TODO(akshayap): Commonized functionality with QSeparableConv2DTranspose.
class QDepthwiseConv2DTranspose(Conv2DTranspose):
  """Quantized Depthwise Conv2DTranspose layer."""

  # Most of these parameters follow the implementation of Conv2DTranspose
  # in Keras, with the exception of following parameters.
  #
  # depthwise_activation: activation quantizer for depthwise convolution
  # depthwise_kernel_quantizer: quantizer function/class for depthwise kernel
  # bias_quantizer: quantizer function/class for bias
  #
  # we refer the reader to the documentation of Conv2DTranspose in Keras for
  # the other parameters.

  def __init__(
      self,
      filters,
      kernel_size,
      group_size=1,
      strides=(1, 1),
      padding="valid",
      output_padding=None,
      depth_multiplier=1,
      depthwise_activation=None,
      use_bias=True,
      depthwise_kernel_quantizer=None,
      bias_quantizer=None,
      **kwargs,
  ):

    self.filters = filters
    self.kernel_size = kernel_size
    self.strides = strides
    self.padding = padding
    self.output_padding = output_padding
    self.depth_multiplier = depth_multiplier
    self.depthwise_activation = depthwise_activation
    self.use_bias = use_bias
    self.group_size = group_size

    self.depthwise_kernel_quantizer = depthwise_kernel_quantizer
    self.bias_quantizer = bias_quantizer

    self.depthwise_kernel_quantizer_internal = get_quantizer(
        self.depthwise_kernel_quantizer
    )
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    for q in [
        self.depthwise_kernel_quantizer_internal,
    ]:
      if hasattr(q, "_set_trainable_parameter"):
        q._set_trainable_parameter()

    if depthwise_activation is not None:
      self.depthwise_activation = get_quantizer(depthwise_activation)

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        use_bias=use_bias,
        **kwargs,
    )

  def _get_input_axis(self):
    if self.data_format == "channels_first":
      b_axis, c_axis, h_axis, w_axis = 0, 1, 2, 3
    else:
      b_axis, c_axis, h_axis, w_axis = 0, 3, 1, 2

    return b_axis, c_axis, h_axis, w_axis

  def _get_input_dims(self, input_shape):
    b_axis, c_axis, h_axis, w_axis = self._get_input_axis()

    return (
        input_shape[b_axis],
        input_shape[c_axis],
        input_shape[h_axis],
        input_shape[w_axis],
    )

  def _get_output_size(
      self,
      inputs,
      output_padding,
      padding,
      strides,
      dilation_rate,
      kernel_h,
      kernel_w,
  ):
    input_shape = array_ops.shape(inputs)
    batch_size, _, height, width = self._get_input_dims(input_shape)
    stride_h, stride_w = strides

    dilation_h, dilation_w = dilation_rate[0], dilation_rate[1]

    if output_padding is None:
      out_pad_h = out_pad_w = None
    else:
      out_pad_h, out_pad_w = output_padding

    # Infer the dynamic output shape:
    out_height = deconv_output_length(
        height,
        kernel_h,
        padding=padding,
        output_padding=out_pad_h,
        stride=stride_h,
        dilation=dilation_h,
    )

    out_width = deconv_output_length(
        width,
        kernel_w,
        padding=padding,
        output_padding=out_pad_w,
        stride=stride_w,
        dilation=dilation_w,
    )

    return (batch_size, out_height, out_width)

  def build(self, input_shape):
    self._input_shape = input_shape

    _, input_channel, _, _ = self._get_input_dims(input_shape)
    channel_axis = self._get_input_axis()[1]

    self.input_spec = InputSpec(
        min_ndim=self.rank + 2, axes={channel_axis: input_channel}
    )
    # When setting kernel shape=(kw, kh, 1, input_channel), it does depthwise
    # convolution.
    depthwise_kernel_shape = self.kernel_size + (
        input_channel,
        self.group_size,
    )

    self.depthwise_kernel = self.add_weight(
        name=f"depthwise_kernel",
        shape=depthwise_kernel_shape,
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        constraint=self.kernel_constraint,
        trainable=True,
        dtype=self.dtype,
    )

    if self.use_bias:
      self.bias = self.add_weight(
          name="bias",
          shape=(self.filters,),
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          constraint=self.bias_constraint,
          trainable=True,
          dtype=self.dtype,
      )
    else:
      self.bias = None

    self.built = True

  def compute_final_output_shape(self, input_shape, kernel_size, strides):
    input_shape = tf.TensorShape(input_shape).as_list()
    # By using list(), output_shape is a copy of input_shape, instead of a
    # reference to input_shape.
    output_shape = list(input_shape)
    _, c_axis, h_axis, w_axis = self._get_input_axis()

    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = strides

    if self.output_padding is None:
      out_pad_h = out_pad_w = None
    else:
      out_pad_h, out_pad_w = self.output_padding

    # Convolution is performed separately on each spatial domain.
    output_shape[c_axis] = input_shape[c_axis]

    output_shape[h_axis] = deconv_output_length(
        output_shape[h_axis],
        kernel_h,
        padding=self.padding,
        output_padding=out_pad_h,
        stride=stride_h,
        dilation=self.dilation_rate[0],
    )
    output_shape[w_axis] = deconv_output_length(
        output_shape[w_axis],
        kernel_w,
        padding=self.padding,
        output_padding=out_pad_w,
        stride=stride_w,
        dilation=self.dilation_rate[1],
    )
    return tf.TensorShape(output_shape)

  def conv_transpose_op(
      self,
      inputs,
      filters,
      strides,
      padding,
      output_padding,
      dilation_rate,
      kernel_quantizer,
      kernel_weights,
      use_bias,
      bias_quantizer,
      bias,
      activation,
  ):
    """Transpose convolution operation."""

    kernel_h, kernel_w = self.kernel_size
    batch_size, out_height, out_width = self._get_output_size(
        inputs,
        output_padding,
        padding,
        strides,
        dilation_rate,
        kernel_h,
        kernel_w,
    )

    if kernel_quantizer:
      quantized_kernel = kernel_quantizer(kernel_weights)
    else:
      quantized_kernel = kernel_weights

    output_filters = self.group_size

    if self.data_format == "channels_first":
      output_shape = (batch_size, output_filters, out_height, out_width)
    else:
      output_shape = (batch_size, out_height, out_width, output_filters)

    output_shape_tensor = array_ops.stack(output_shape)

    num_input_channels = self._input_shape[-1]
    if num_input_channels % self.group_size:
      raise ValueError(
          "Input channels should be exactly divisible by group_size."
      )
    num_output_groups = num_input_channels // self.group_size

    # Split the input channels into groups.
    x = tf.split(inputs, num_output_groups, axis=-1)

    # For depthwise convolution, since CPU doesn't support grouped
    # convolution, we run convolution on each slice of inputs and concat
    # the results.
    outputs = [
        tf.keras.backend.conv2d_transpose(
            x=x[i],
            kernel=quantized_kernel[
                :,
                :,
                self.group_size * i : self.group_size * (i + 1),
                :,
            ],
            output_shape=output_shape_tensor,
            strides=strides,
            padding=padding,
            data_format=self.data_format,
            dilation_rate=dilation_rate,
        )
        for i in range(num_output_groups)
    ]

    # Concat the channels.
    outputs = tf.concat(outputs, axis=-1)

    if not context.executing_eagerly():
      # Infer the static output shape:
      out_shape = self.compute_final_output_shape(
          input_shape=inputs.shape,
          kernel_size=(kernel_h, kernel_w),
          strides=strides,
      )
      outputs.set_shape(out_shape)

    if use_bias:
      quantized_bias = bias_quantizer(bias) if bias_quantizer else bias
      outputs = tf.keras.backend.bias_add(
          outputs, quantized_bias, data_format=self.data_format
      )

    if activation is not None:
      return activation(outputs)

    return outputs

  def call(self, inputs):
    input_shape = array_ops.shape(inputs)
    _, input_channel, _, _ = self._get_input_dims(input_shape)

    return self.conv_transpose_op(
        inputs=inputs,
        # Depthwise convolution doesn't operate across channels. Thereofore its
        # output channels is the same as input channels.
        filters=input_channel,
        strides=self.strides,
        padding=self.padding,
        output_padding=self.output_padding,
        dilation_rate=self.dilation_rate,
        kernel_quantizer=self.depthwise_kernel_quantizer_internal,
        kernel_weights=self.depthwise_kernel,
        use_bias=False,  # Usually set bias=False for depthwise conv.
        bias_quantizer=None,
        bias=None,
        activation=self.depthwise_activation,
    )

  def get_config(self):
    config = super().get_config()
    config.update({
        "filters": self.filters,
        "kernel_size": self.kernel_size,
        "strides": self.strides,
        "padding": self.padding,
        "output_padding": self.output_padding,
        "dilation_rate": self.dilation_rate,
        "data_format": self.data_format,
        "depth_multiplier": self.depth_multiplier,
        "activation": self.activation,
        "use_bias": self.use_bias,
        "depthwise_kernel_quantizer": constraints.serialize(
            self.depthwise_kernel_quantizer_internal
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal,
        ),
        "group_size": self.group_size,
    })
    return config

  def get_quantizers(self):
    return [
        self.depthwise_kernel_quantizer_internal,
        self.bias_quantizer_internal,
        self.depthwise_activation,
    ]

  def get_prunable_weights(self):
    w = [self.depthwise_kernel]
    if self.use_bias:
      w.append(self.bias)

    return w


================================================
FILE: qkeras/qdepthwiseconv2d_batchnorm.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Fold batchnormalization with previous QDepthwiseConv2D layers."""

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

from .qconvolutional import QDepthwiseConv2D
from .quantizers import *
from tensorflow.python.framework import smart_cond as tf_utils
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import array_ops

tf.compat.v2.enable_v2_behavior()


class QDepthwiseConv2DBatchnorm(QDepthwiseConv2D):
  """Fold batchnormalization with a previous QDepthwiseConv2d layer."""

  def __init__(
      self,
      # QDepthwiseConv2d params
      kernel_size,
      strides=(1, 1),
      padding="VALID",
      depth_multiplier=1,
      data_format=None,
      activation=None,
      use_bias=True,
      depthwise_initializer="he_normal",
      bias_initializer="zeros",
      depthwise_regularizer=None,
      bias_regularizer=None,
      activity_regularizer=None,
      depthwise_constraint=None,
      bias_constraint=None,
      dilation_rate=(1, 1),
      depthwise_quantizer=None,
      bias_quantizer=None,
      depthwise_range=None,
      bias_range=None,

      # batchnorm params
      axis=-1,
      momentum=0.99,
      epsilon=0.001,
      center=True,
      scale=True,
      beta_initializer="zeros",
      gamma_initializer="ones",
      moving_mean_initializer="zeros",
      moving_variance_initializer="ones",
      beta_regularizer=None,
      gamma_regularizer=None,
      beta_constraint=None,
      gamma_constraint=None,
      renorm=False,
      renorm_clipping=None,
      renorm_momentum=0.99,
      fused=None,
      trainable=True,
      virtual_batch_size=None,
      adjustment=None,

      # other params
      ema_freeze_delay=None,
      folding_mode="ema_stats_folding",
      **kwargs):

    """A composite layer that folds depthwiseconv2d and batch normalization.

    The first group of parameters correponds to the initialization parameters
      of a QDepthwiseConv2d layer. check qkeras.qconvolutional.QDepthwiseConv2D
      for details.

    The 2nd group of parameters corresponds to the initialization parameters
      of a BatchNormalization layer. Check keras.layers.normalization.BatchNorma
      lizationBase for details.

    The 3rd group of parameters corresponds to the initialization parameters
      specific to this class.

      ema_freeze_delay: int or None. number of steps before batch normalization
        mv_mean and mv_variance will be frozen and used in the folded layer.
      folding_mode: string
        "ema_stats_folding": mimic tflite which uses the ema statistics to
          fold the kernel to suppress quantization induced jitter then performs
          the correction to have a similar effect of using the current batch
          statistics.
        "batch_stats_folding": use batch mean and variance to fold kernel first;
          after enough training steps switch to moving_mean and moving_variance
          for kernel folding.
    """

    # intialization the QDepthwiseConv2d part of the composite layer
    super().__init__(
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        depth_multiplier=depth_multiplier,
        data_format=data_format,
        activation=activation,
        use_bias=use_bias,
        depthwise_initializer=depthwise_initializer,
        bias_initializer=bias_initializer,
        depthwise_regularizer=depthwise_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        depthwise_constraint=depthwise_constraint,
        bias_constraint=bias_constraint,
        dilation_rate=dilation_rate,
        depthwise_quantizer=depthwise_quantizer,
        bias_quantizer=bias_quantizer,
        depthwise_range=depthwise_range,
        bias_range=bias_range,
        **kwargs
    )

    # initialization of batchnorm part of the composite layer
    self.batchnorm = layers.BatchNormalization(
        axis=axis, momentum=momentum, epsilon=epsilon, center=center,
        scale=scale, beta_initializer=beta_initializer,
        gamma_initializer=gamma_initializer,
        moving_mean_initializer=moving_mean_initializer,
        moving_variance_initializer=moving_variance_initializer,
        beta_regularizer=beta_regularizer,
        gamma_regularizer=gamma_regularizer,
        beta_constraint=beta_constraint, gamma_constraint=gamma_constraint,
        renorm=renorm, renorm_clipping=renorm_clipping, 
        renorm_momentum=renorm_momentum, fused=fused, trainable=trainable,
        virtual_batch_size=virtual_batch_size, adjustment=adjustment)

    self.ema_freeze_delay = ema_freeze_delay
    assert folding_mode in ["ema_stats_folding", "batch_stats_folding"]
    self.folding_mode = folding_mode

  def build(self, input_shape):
    super(QDepthwiseConv2DBatchnorm, self).build(input_shape)

    # If start training from scratch, self._iteration (i.e., training_steps)
    # is initialized with -1. When loading ckpt, it can load the number of
    # training steps that have been previously trainied.
    # TODO(lishanok): develop a way to count iterations outside layer
    self._iteration = tf.Variable(-1, trainable=False, name="iteration",
                                  dtype=tf.int64)

  def call(self, inputs, training=None):

    # numpy value, mark the layer is in training
    training = self.batchnorm._get_training_value(training)  # pylint: disable=protected-access

    # checking if to update batchnorm params
    if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0):
      # if ema_freeze_delay is None or a negative value, do not freeze bn stats
      bn_training = tf.cast(training, dtype=bool)
    else:
      bn_training = tf.math.logical_and(training, tf.math.less_equal(
          self._iteration, self.ema_freeze_delay))

    depthwise_kernel = self.depthwise_kernel

    # run depthwise_conv2d to produce output for the following batchnorm
    conv_outputs = tf.keras.backend.depthwise_conv2d(
        inputs,
        depthwise_kernel,
        strides=self.strides,
        padding=self.padding,
        dilation_rate=self.dilation_rate,
        data_format=self.data_format)

    if self.use_bias:
      bias = self.bias
      conv_outputs = tf.keras.backend.bias_add(
          conv_outputs, bias, data_format=self.data_format)
    else:
      bias = 0

    _ = self.batchnorm(conv_outputs, training=bn_training)

    self._iteration.assign_add(tf_utils.smart_cond(
        training, lambda: tf.constant(1, tf.int64),
        lambda: tf.constant(0, tf.int64)))

    # calcuate mean and variance from current batch
    bn_shape = conv_outputs.shape
    ndims = len(bn_shape)
    reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis]
    keep_dims = len(self.batchnorm.axis) > 1
    mean, variance = self.batchnorm._moments(  # pylint: disable=protected-access
        math_ops.cast(conv_outputs, self.batchnorm._param_dtype),  # pylint: disable=protected-access
        reduction_axes,
        keep_dims=keep_dims)
    gamma = self.batchnorm.gamma
    beta = self.batchnorm.beta
    moving_mean = self.batchnorm.moving_mean
    moving_variance = self.batchnorm.moving_variance

    if self.folding_mode not in ["batch_stats_folding", "ema_stats_folding"]:
      assert ValueError("mode {} not supported!".format(self.folding_mode))

    mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)
    batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)

    if gamma is not None:
      mv_inv *= gamma
      batch_inv *= gamma

    folded_bias = tf_utils.smart_cond(
        bn_training,
        lambda: batch_inv * (bias - mean) + beta,
        lambda: mv_inv * (bias - moving_mean) + beta)

    if self.folding_mode == "batch_stats_folding":
      # using batch mean and variance in the initial training stage
      # after sufficient training, switch to moving mean and variance
      inv = tf_utils.smart_cond(bn_training, lambda: batch_inv, lambda: mv_inv)

    elif self.folding_mode == "ema_stats_folding":
      # We always scale the weights with a correction factor to the long term
      # statistics prior to quantization. This ensures that there is no jitter
      # in the quantized weights due to batch to batch variation. During the
      # initial phase of training, we undo the scaling of the weights so that
      # outputs are identical to regular batch normalization. We also modify
      # the bias terms correspondingly. After sufficient training, switch from
      # using batch statistics to long term moving averages for batch
      # normalization.

      # use batch stats for calcuating bias before bn freeze, and use moving
      # stats after bn freeze

      # moving stats is always used to fold kernel in tflite; before bn freeze
      # an additional correction factor will be applied to the depthwiseconv2d
      # output
      inv = mv_inv

    # for DepthwiseConv2D inv needs to be broadcasted to the last 2 dimensions
    # of the kernels
    depthwise_weights_shape = [
        depthwise_kernel.get_shape().as_list()[2],
        depthwise_kernel.get_shape().as_list()[3]
    ]
    inv = array_ops.reshape(inv, depthwise_weights_shape)
    # wrap conv kernel with bn parameters
    folded_depthwise_kernel = inv * depthwise_kernel
    # quantize the folded kernel
    if self.depthwise_quantizer is not None:
      q_folded_depthwise_kernel = self.depthwise_quantizer_internal(
          folded_depthwise_kernel)
    else:
      q_folded_depthwise_kernel = folded_depthwise_kernel

    # If loaded from a ckpt, bias_quantizer is the ckpt value
    # Else if bias_quantizer not specified, bias
    #   quantizer is None and we need to calculate bias quantizer
    #   type according to accumulator type. User can call
    #   bn_folding_utils.populate_bias_quantizer_for_folded_layers(
    #      model, input_quantizer_list]) to populate such bias quantizer.
    if self.bias_quantizer is not None:
      q_folded_bias = self.bias_quantizer_internal(folded_bias)
    else:
      q_folded_bias = folded_bias

    applied_kernel = q_folded_depthwise_kernel
    applied_bias = q_folded_bias

    # calculate depthwise_conv2d output using the quantized folded kernel
    folded_outputs = tf.keras.backend.depthwise_conv2d(
        inputs,
        applied_kernel,
        strides=self.strides,
        padding=self.padding,
        dilation_rate=self.dilation_rate,
        data_format=self.data_format)

    if training is True and self.folding_mode == "ema_stats_folding":
      batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon)
      y_corr = tf_utils.smart_cond(
          bn_training,
          lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) *
                   math_ops.rsqrt(variance + self.batchnorm.epsilon)),
          lambda: tf.constant(1.0, shape=moving_variance.shape))
      folded_outputs = math_ops.mul(folded_outputs, y_corr)

    folded_outputs = tf.keras.backend.bias_add(
        folded_outputs,
        applied_bias,
        data_format=self.data_format)

    if self.activation is not None:
      return self.activation(folded_outputs)

    return folded_outputs

  def get_config(self):
    base_config = super().get_config()
    bn_config = self.batchnorm.get_config()
    config = {"ema_freeze_delay": self.ema_freeze_delay,
              "folding_mode": self.folding_mode}
    name = base_config["name"]
    out_config = dict(
        list(base_config.items())
        + list(bn_config.items()) + list(config.items()))

    # names from different config override each other; use the base layer name
    # as the this layer's config name
    out_config["name"] = name
    return out_config

  def get_quantization_config(self):
    return {
        "depthwise_quantizer": str(self.depthwise_quantizer_internal),
        "bias_quantizer": str(self.bias_quantizer_internal),
        "activation": str(self.activation),
        "filters": str(self.filters)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_folded_weights(self):
    """Function to get the batchnorm folded weights.

    This function converts the weights by folding batchnorm parameters into
    the weight of QDepthwiseConv2d. The high-level equation:

    W_fold = gamma * W / sqrt(variance + epsilon)
    bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta
    """

    depthwise_kernel = self.depthwise_kernel

    if self.use_bias:
      bias = self.bias
    else:
      bias = 0

    # get Batchnorm stats
    gamma = self.batchnorm.gamma
    beta = self.batchnorm.beta
    moving_mean = self.batchnorm.moving_mean
    moving_variance = self.batchnorm.moving_variance

    # get the inversion factor so that we replace division by multiplication
    inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon)
    if gamma is not None:
      inv *= gamma
    # fold bias with bn stats
    folded_bias = inv * (bias - moving_mean) + beta

    # for DepthwiseConv2D inv needs to be broadcasted to the last 2 dimensions
    # of the kernels
    depthwise_weights_shape = [
        depthwise_kernel.get_shape().as_list()[2],
        depthwise_kernel.get_shape().as_list()[3]
    ]
    inv = array_ops.reshape(inv, depthwise_weights_shape)
    # wrap conv kernel with bn parameters
    folded_depthwise_kernel = inv * depthwise_kernel

    return [folded_depthwise_kernel, folded_bias]


================================================
FILE: qkeras/qlayers.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
"""Definition of quantization package."""

# Some parts of the code were adapted from
#
# https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow
#
# "Copyright (c) 2017, Bert Moons" where it applies
#
# and were implemented following several papers.
#
#    https://arxiv.org/pdf/1609.07061.pdf
#    https://arxiv.org/abs/1602.02830
#    https://arxiv.org/abs/1603.05279
#    https://arxiv.org/abs/1605.04711
#    https://ieeexplore.ieee.org/abstract/document/6986082
#    https://ieeexplore.ieee.org/iel4/78/5934/00229903.pdf
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
import warnings

import numpy as np
import six
import tensorflow.compat.v2 as tf
from tensorflow.keras import activations
from tensorflow.keras import constraints
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
import tensorflow.keras.backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.initializers import Initializer
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Layer
from tensorflow.python.framework import smart_cond as tf_utils

from .quantizers import *
from .quantizers import _get_integer_bits
from .quantizers import get_quantizer
from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer


def get_auto_range_constraint_initializer(quantizer, constraint, initializer):
  """Get value range automatically for quantizer.

  Arguments:
   quantizer: A quantizer class in quantizers.py.
   constraint: A tf.keras constraint.
   initializer: A tf.keras initializer.

  Returns:
    a tuple (constraint, initializer), where
      constraint is clipped by Clip class in this file, based on the
      value range of quantizer.
      initializer is initializer contraint by value range of quantizer.
  """
  if quantizer is not None:
    constraint = get_constraint(constraint, quantizer)
    initializer = get_initializer(initializer)

    if initializer and initializer.__class__.__name__ not in ["Ones", "Zeros", 'QInitializer']:
      # we want to get the max value of the quantizer that depends
      # on the distribution and scale
      if not (hasattr(quantizer, "alpha") and
              isinstance(quantizer.alpha, six.string_types)):
        initializer = QInitializer(
            initializer, use_scale=True, quantizer=quantizer)
  return constraint, initializer


class QInitializer(Initializer):
  """Wraps around Keras initializer to provide a fanin scaling factor."""

  def __init__(self, initializer, use_scale, quantizer):
    self.initializer = initializer
    self.use_scale = use_scale
    self.quantizer = quantizer

    try:
      self.is_po2 = "po2" in quantizer.__class__.__name__
    except:
      self.is_po2 = False

  def __call__(self, shape, dtype=None):
    x = self.initializer(shape, dtype)

    max_x = np.max(abs(x))
    std_x = np.std(x)
    delta = self.quantizer.max() * 2**-self.quantizer.bits

    # delta is the minimum resolution of the number system.
    # we want to make sure we have enough values.
    if delta > std_x and hasattr(self.initializer, "scale"):
      q = self.quantizer(x)
      max_q = np.max(abs(q))
      scale = 1.0
      if max_q == 0.0:
        xx = np.mean(x * x)
        scale = self.quantizer.max() / np.sqrt(xx)
      else:
        qx = np.sum(q * x)
        qq = np.sum(q * q)

        scale = qq / qx

      self.initializer.scale *= max(scale, 1)
      x = self.initializer(shape, dtype)

    return np.clip(x, -self.quantizer.max(), self.quantizer.max())

  def get_config(self):
    return {
        "initializer": self.initializer,
        "use_scale": self.use_scale,
        "quantizer": self.quantizer,
    }

  @classmethod
  def from_config(cls, config):
    config = {
      'initializer' : get_initializer(config['initializer']),
      'use_scale'   : config['use_scale'],
      'quantizer'   : get_quantizer(config['quantizer'])}
    return cls(**config)

#
# Because it may be hard to get serialization from activation functions,
# we may be replacing their instantiation by QActivation in the future.
#


class QActivation(Layer, PrunableLayer):
  """Implements quantized activation layers."""

  # TODO(lishanok): Implement activation type conversion outside of the class.
  # When caller calls the initializer, it should convert string to a quantizer
  # object if string is given as activation.
  def __init__(self, activation, **kwargs):

    super().__init__(**kwargs)

    self.activation = activation

    if not isinstance(activation, six.string_types):
      self.quantizer = activation
      if hasattr(self.quantizer, "__name__"):
        self.__name__ = self.quantizer.__name__
      elif hasattr(self.quantizer, "name"):
        self.__name__ = self.quantizer.name
      elif hasattr(self.quantizer, "__class__"):
        self.__name__ = self.quantizer.__class__.__name__
      return

    self.__name__ = activation

    try:
      self.quantizer = get_quantizer(activation)
    except KeyError:
      raise ValueError("invalid activation '{}'".format(activation))

  def call(self, inputs):
    return self.quantizer(inputs)

  def get_config(self):
    config = {"activation": self.activation}
    base_config = super(QActivation, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  @classmethod
  def from_config(cls, config):
    try:
      if isinstance(config["activation"], dict):
        # If config["activation"] is serialized, it would be a dict.
        # Otherwise, it will be either string or quantizer object, which
        # doesn't require deserialization.
        config["activation"] = activations.deserialize(config["activation"])
      return cls(**config)

    except Exception as e:
      raise TypeError(
          f"Error when deserializing class '{cls.__name__}' using "
          f"config={config}.\n\nException encountered: {e}"
      )

  def get_quantization_config(self):
    return str(self.activation)

  def compute_output_shape(self, input_shape):
    return input_shape

  def get_prunable_weights(self):
    return []


class QAdaptiveActivation(Layer, PrunableLayer):
  """[EXPERIMENTAL] Implements an adaptive quantized activation layer using EMA.

  This layer calculates an exponential moving average of min and max of the
  activation values to automatically determine the scale (integer bits) of
  the quantizer used in this layer.
  """

  def __init__(self,
               activation,
               total_bits,
               current_step=None,
               symmetric=True,
               quantization_delay=0,
               ema_freeze_delay=None,
               ema_decay=0.9999,
               per_channel=False,
               po2_rounding=False,
               relu_neg_slope=0.0,
               relu_upper_bound=None,
               **kwargs):
    """Initializes this QAdaptiveActivation layer.

    Args:
      activation: Str. The activation quantizer type to use for this activation
        layer, such as 'quantized_relu'. Should be a string with no params.
      total_bits: Int. The total bits that can be used by the quantizer
      current_step: tf.Variable specifying the current step in training.
        You can find this by passing model.optimizer.iterations
        (see tf.keras.optimizers.Optimizer.iterations). If set to None, the
        layer will attempt to estimate the current step itself, but please note
        that this number may not always match the optimizer step.
      symmetric: Bool. If to enforce symmetry about the origin in the quantized
        bit representation of the value. When using linear activation, this
        should be True for best results.
      quantization_delay: Int. How many training steps to wait until quantizing
        the activation values.
      ema_freeze_delay: Int. Steps to wait until stopping the update of the
        exponential moving average values. Set to None for an infinite delay.
      ema_decay: Float. The decay value used for exponential moving average (see
        tf.keras.backend.moving_average_update)
      per_channel: Bool. If to quantize the activation values on a
        per-channel basis.
      po2_rounding: Bool. If true, the EMA max value is rounded to the nearest
        power-of-2. If false, the EMA max value is rounded up (with ceil) to a
        power-of-two. These power-of-two operations are necessary to calculate
        the number of integer bits used in the quantizer, and the difference
        between using round and ceil trade off the quantizer's range and
        precision.
      relu_neg_slope: Float. Slope of the negative values in relu to enable the
        use of leaky relu. This parameter will only be used with the quantizer
        type quantized_relu. Set to 0.0 to use normal relu.
      relu_upper_bound: Float. The upper bound to use if the activation is set
        to relu. Set to None to not artificially set an upper bound. Pease note
        that this param is ignored if the activation is not quantized_relu
      **kwargs: Args passed to the Layer class.
    """
    super().__init__(**kwargs)

    self.total_bits = total_bits
    self.symmetric = symmetric
    self.is_estimating_step_count = False  # If the layer should estimate its
    # own step count by incrementing it
    # every call.
    if isinstance(current_step, tf.Variable):
      self.step = current_step
    elif current_step is None:
      self.step = tf.Variable(-1, dtype=tf.int64)
      self.is_estimating_step_count = True
      print("[WARNING] QAdaptiveActivation is estimating it's own training "
            "step count, which may not always be the same as the true optimizer"
            " training step. To mitigate this, please set the current_step "
            "parameter when initializing QAdaptiveActivation", file=sys.stderr)
    else:
      self.step = tf.Variable(current_step, dtype=tf.int64)
      print("[WARNING] QAdaptiveActivation is disconnected from the optimizer "
            "current step, which may lead to incorrect training. If you wish to"
            " resume training, set this layer's self.step to the optimizer's "
            "tf.Variable current step", file=sys.stderr)
    self.quantization_delay = quantization_delay
    self.ema_freeze_delay = ema_freeze_delay
    self.will_ema_freeze = True if ema_freeze_delay else False
    self.ema_decay = ema_decay
    self.per_channel = per_channel
    self.po2_rounding = po2_rounding
    self.ema_min = None
    self.ema_max = None
    self.relu_neg_slope = relu_neg_slope
    self.relu_upper_bound = relu_upper_bound

    # Verify quantizer type is correct
    self.supported_quantizers = ["quantized_bits", "quantized_relu"]
    if activation not in self.supported_quantizers:
      raise ValueError(("Invalid activation {}. Activation quantizer may NOT "
                        "contain any parameters (they will be set automatically"
                        " by this layer), and only the quantizer types {} are "
                        "supported.").format(activation,
                                             self.supported_quantizers))

    # Get the quantizer associated with the activation
    try:
      self.quantizer = get_quantizer(activation)
    except KeyError:
      raise ValueError("Invalid activation '{}'".format(activation))

    # Check that the quantizer is supported
    if self.quantizer.__class__.__name__ not in self.supported_quantizers:
      raise ValueError("Unsupported activation quantizer '{}'".format(
          self.quantizer.__class__.__name__))

    # Set keep_negative
    if self.quantizer.__class__.__name__ == "quantized_relu":
      self.quantizer.is_quantized_clip = False  # Use relu_upper_bound instead
      if self.relu_upper_bound:
        self.quantizer.relu_upper_bound = self.relu_upper_bound
      self.quantizer.negative_slope = relu_neg_slope
      self.keep_negative = relu_neg_slope != 0.0
      self.quantizer.is_quantized_clip = False  # Use normal relu when qnoise=0
    elif self.quantizer.__class__.__name__ == "quantized_bits":
      self.keep_negative = True
      self.quantizer.keep_negative = True

    # If not using quantization delay, then print warning
    if self.quantization_delay < 1:
      print("[WARNING] If QAdaptiveActivation has the quantization_delay set "
            "to 0, then the moving averages will be heavily biased towards the "
            "initial quantizer configuration, which will likely prevent the "
            "model from converging. Consider a larger quantization_delay.",
            file=sys.stderr)

    self.activation = self.quantizer  # self.activation is used by QTools

  def build(self, input_shape):
    if self.will_ema_freeze:
      self.ema_freeze_delay = tf.constant(self.ema_freeze_delay, dtype=tf.int64)

    self.ema_decay = tf.constant(self.ema_decay, dtype=tf.float32)
    self.is_estimating_step_count = tf.constant(self.is_estimating_step_count,
                                                dtype=tf.bool)

    # Calculate the number of channels
    channel_index = -1 if K.image_data_format() == "channels_last" else 1
    if self.per_channel:
      input_shape_list = list(input_shape) if isinstance(
          input_shape, tuple) else input_shape.as_list()
      num_channels = tf.constant(input_shape_list[channel_index],
                                 shape=(1), dtype=tf.int64)
    else:
      num_channels = tf.constant(1, shape=(1), dtype=tf.int64)

    # Initialize the moving mins and max
    if self.ema_min is None or self.ema_max is None:
      self.ema_min = tf.Variable(tf.zeros(num_channels), name="ema_min",
                                 trainable=False)
      self.ema_max = tf.Variable(tf.zeros(num_channels), name="ema_max",
                                 trainable=False)

    # Determine the parameters for the quantizer
    self.quantizer.bits = self.total_bits

    # Set up the initial integer bits and quantizer params
    self.quantizer.integer = tf.Variable(tf.zeros(num_channels,
                                                  dtype=tf.int32),
                                         name="quantizer_integer_bits",
                                         trainable=False)
    integer_bits = _get_integer_bits(min_value=self.ema_min,
                                     max_value=self.ema_max,
                                     bits=self.total_bits,
                                     symmetric=self.symmetric,
                                     keep_negative=self.keep_negative,
                                     is_clipping=self.po2_rounding)
    self.quantizer.integer.assign(integer_bits)
    self.quantizer.alpha = 1.0  # Setting alpha to 1.0 allows the integer bits
    # to serve as the scale
    self.quantizer.symmetric = self.symmetric
    self.quantization_delay = tf.constant(self.quantization_delay,
                                          dtype=tf.int64)

  def call(self, inputs, training=False):
    x = inputs
    training = training and self.trainable
    self.will_ema_freeze = self.will_ema_freeze and self.trainable

    # Update the step count if the optimizer step count is unknown
    self.step.assign_add(K.switch(
        tf.math.logical_and(self.is_estimating_step_count, training),
        tf.constant(1, tf.int64), tf.constant(0, tf.int64)))

    # Perform the quantization
    if training:
      # Calculate the qnoise, a scalar from 0 to 1 that represents the level of
      # quantization noise to use. At training start, we want no quantization,
      # so qnoise_factor = 0.0. After quantization_delay steps, we want normal
      # quantization, so qnoise_factor = 1.0.
      qnoise_factor = K.switch(
          tf.greater_equal(self.step, self.quantization_delay),
          lambda: tf.constant(1.0), lambda: tf.constant(0.0))
      self.quantizer.update_qnoise_factor(qnoise_factor)
      qx = self.quantizer(x)

    else:  # If not training, we always want to use full quantization
      self.quantizer.update_qnoise_factor(tf.constant(1.0))
      qx = self.quantizer(x)

    # Calculate the axis along where to find the min and max EMAs
    len_axis = len(x.shape)
    if len_axis > 1:
      if self.per_channel:
        if K.image_data_format() == "channels_last":
          axis = list(range(len_axis - 1))
        else:
          axis = list(range(1, len_axis))
      else:
        axis = list(range(len_axis))
    else:
      axis = [0]

    # Determine if freezing the EMA
    is_ema_training = tf.constant(training, dtype=tf.bool)
    if self.will_ema_freeze:
      is_ema_training = tf.cond(
          tf.greater(self.step, self.ema_freeze_delay),
          lambda: tf.constant(False), lambda: tf.constant(True))

    def update_branch():
      """ Update the moving average when is_ema_training is True."""

      # Set the qnoise factor to 0 to update the EMA using the unquantized input
      prev_qnoise_factor = tf.identity(self.quantizer.qnoise_factor)
      self.quantizer.update_qnoise_factor(tf.constant(0.0))

      # Update the EMA
      act_x = self.quantizer(x)  # act_x is the input after the activation
      # function, but before the quantizer. This is
      # done by using a qnoise_factor of 0
      new_min = tf.squeeze(K.min(act_x, axis=axis, keepdims=True))
      K.moving_average_update(self.ema_min, new_min, self.ema_decay)
      new_max = tf.squeeze(K.max(act_x, axis=axis, keepdims=True))
      K.moving_average_update(self.ema_max, new_max, self.ema_decay)

      # Reset the qnoise factor to the previous value
      self.quantizer.update_qnoise_factor(prev_qnoise_factor)

    # Update the moving average when is_ema_training is True
    tf_utils.smart_cond(
        is_ema_training, true_fn=update_branch, false_fn=lambda: None)

    # Set the integer bits for the quantizer
    integer_bits = _get_integer_bits(
        min_value=self.ema_min,
        max_value=self.ema_max,
        bits=self.total_bits,
        symmetric=self.symmetric,
        keep_negative=self.keep_negative,
        is_clipping=self.po2_rounding)
    self.quantizer.integer.assign(integer_bits)

    return qx

  # Override get_weights since we do not want ema_min or ema_max to be public
  def get_weights(self):
    return []

  # Override set_weights since we do not want ema_min or ema_max to be public
  def set_weights(self, weights):
    return

  def get_config(self):
    config = {
        "activation": self.quantizer.__class__.__name__,
        "total_bits": self.total_bits,
        "current_step": self.step.numpy(),
        "symmetric": self.symmetric,
        "quantization_delay": np.array(self.quantization_delay),
        "ema_freeze_delay": np.array(self.ema_freeze_delay),
        "ema_decay": np.array(self.ema_decay),
        "per_channel": self.per_channel,
        "po2_rounding": self.po2_rounding,
        "relu_neg_slope": self.relu_neg_slope
    }
    base_config = super(QAdaptiveActivation, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    self.quantizer.integer_bits = np.array(self.quantizer)
    return str(self.quantizer)

  def compute_output_shape(self, input_shape):
    return input_shape

  def get_prunable_weights(self):
    return []


#
# Constraint class to clip weights and bias between -1 and 1 so that:
#    1. quantization approximation is symmetric (b = 0).
#    2. max(x) and min(x) are 1 and -1 respectively.
#
class Clip(Constraint):
  """Clips weight constraint."""

  # This function was modified from Keras minmaxconstraints.
  #
  # Constrains the weights to be between min/max values.
  #   min_value: the minimum norm for the incoming weights.
  #   max_value: the maximum norm for the incoming weights.
  #   constraint: previous constraint to be clipped.
  #   quantizer: quantizer to be applied to constraint.

  def __init__(self, min_value=0.0, max_value=1.0,
               constraint=None, quantizer=None):
    """Initializes Clip constraint class."""

    self.min_value = min_value
    self.max_value = max_value
    self.constraint = constraints.get(constraint)
    # Don't wrap yourself
    if isinstance(self.constraint, Clip):
      self.constraint = None
    self.quantizer = get_quantizer(quantizer)

  def __call__(self, w):
    """Clips values between min and max values."""
    if self.constraint:
      w = self.constraint(w)
      if self.quantizer:
        w = self.quantizer(w)
    w = tf.keras.backend.clip(w, self.min_value, self.max_value)
    return w

  def get_config(self):
    """Returns configuration of constraint class."""
    return {"min_value": self.min_value, "max_value": self.max_value}

  @classmethod
  def from_config(cls, config):
    if isinstance(config.get('constraint', None), Clip):
      config['constraint'] = None
    config['constraint'] = constraints.get(config.get('constraint', None))
    config['quantizer'] = get_quantizer(config.get('quantizer', None))
    return cls(**config)

#
# Definition of Quantized NN classes. These classes were copied
# from the equivalent layers in Keras, and we modified to apply quantization.
# Similar implementations can be seen in the references.
#


class QDense(Dense, PrunableLayer):
  """Implements a quantized Dense layer."""

  # Most of these parameters follow the implementation of Dense in
  # Keras, with the exception of kernel_range, bias_range,
  # kernel_quantizer, bias_quantizer, and kernel_initializer.
  #
  # kernel_quantizer: quantizer function/class for kernel
  # bias_quantizer: quantizer function/class for bias
  # kernel_range/bias_ranger: for quantizer functions whose values
  #   can go over [-1,+1], these values are used to set the clipping
  #   value of kernels and biases, respectively, instead of using the
  #   constraints specified by the user.
  #
  # we refer the reader to the documentation of Dense in Keras for the
  # other parameters.

  def __init__(self,
               units,
               activation=None,
               use_bias=True,
               kernel_initializer="he_normal",
               bias_initializer="zeros",
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               bias_quantizer=None,
               kernel_range=None,
               bias_range=None,
               **kwargs):

    if kernel_range is not None:
      warnings.warn("kernel_range is deprecated in QDense layer.")

    if bias_range is not None:
      warnings.warn("bias_range is deprecated in QDense layer.")

    self.kernel_range = kernel_range
    self.bias_range = bias_range

    self.kernel_quantizer = kernel_quantizer
    self.bias_quantizer = bias_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.kernel_quantizer_internal, self.bias_quantizer_internal
    ]

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))
    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        units=units,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        **kwargs,
    )

  def call(self, inputs):
    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel
    output = tf.keras.backend.dot(inputs, quantized_kernel)
    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias
      output = tf.keras.backend.bias_add(output, quantized_bias,
                                         data_format="channels_last")
    if self.activation is not None:
      output = self.activation(output)
    return output

  def compute_output_shape(self, input_shape):
    assert input_shape and len(input_shape) >= 2
    assert input_shape[-1]
    output_shape = list(input_shape)
    output_shape[-1] = self.units
    return tuple(output_shape)

  def get_config(self):
    config = {
        "units": self.units,
        "activation": activations.serialize(
            self.activation# Google internal code, commented out by copybara
        ),
        "use_bias": self.use_bias,
        "kernel_quantizer": constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        "kernel_initializer": initializers.serialize(
            self.kernel_initializer# Google internal code, commented out by copybara
        ),
        "bias_initializer": initializers.serialize(
            self.bias_initializer# Google internal code, commented out by copybara
        ),
        "kernel_regularizer": regularizers.serialize(
            self.kernel_regularizer# Google internal code, commented out by copybara
        ),
        "bias_regularizer": regularizers.serialize(
            self.bias_regularizer# Google internal code, commented out by copybara
        ),
        "activity_regularizer": regularizers.serialize(
            self.activity_regularizer# Google internal code, commented out by copybara
        ),
        "kernel_constraint": constraints.serialize(
            self.kernel_constraint# Google internal code, commented out by copybara
        ),
        "bias_constraint": constraints.serialize(
            self.bias_constraint# Google internal code, commented out by copybara
        ),
        "kernel_range": self.kernel_range,
        "bias_range": self.bias_range,
    }
    base_config = super(QDense, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "kernel_quantizer":
            str(self.kernel_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "activation":
            str(self.activation),
        "units" : str(self.units)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.kernel]


def get_constraint(identifier, quantizer):
  """Gets the initializer.

  Args:
    identifier: A constraint, which could be dict, string, or callable function.
    quantizer: A quantizer class or quantization function

  Returns:
    A constraint class
  """
  if identifier:
    if isinstance(identifier, dict) and identifier['class_name'] == 'Clip':
      return Clip.from_config(identifier['config'])
    else:
      return constraints.get(identifier)
  else:
    max_value = max(1, quantizer.max()) if hasattr(quantizer, "max") else 1.0
    return Clip(-max_value, max_value, identifier, quantizer)

def get_initializer(identifier):
  """Gets the initializer.

  Args:
    identifier: An initializer, which could be dict, string, or callable function.

  Returns:
    A initializer class

  Raises:
    ValueError: An error occurred when quantizer cannot be interpreted.
  """
  if identifier is None:
    return None
  if isinstance(identifier, dict):
    if identifier['class_name'] == 'QInitializer':
      return QInitializer.from_config(identifier['config'])
    else:
      return initializers.get(identifier)
  elif isinstance(identifier, six.string_types):
    return initializers.get(identifier)
  elif callable(identifier):
    return identifier
  else:
    raise ValueError("Could not interpret initializer identifier: " +
                     str(identifier))


================================================
FILE: qkeras/qmac.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.keras import constraints
from .quantizers import get_quantizer

from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer
from .qlayers import get_auto_range_constraint_initializer


# QKeras needs to support more layers for matrix multiplication and shift
# operations such as in Transformer. Such layers should be all placed here.


class QScaleShift(tf.keras.layers.Layer, PrunableLayer):
  """Quantized scale and shift layer.

  output = scale * x + bias where scale and bias are each of shape (1,).

  QScaleShift is similar to the special case in QDepthwiseConv2D
    where kernel_size=(1,1). However there are several differences:
  1) There is no concept of padding and striding in QScaleShift since
    it's not a conv layer;
  2) QDepthwiseConv2D expected min_ndim=4 for input shape; while QScaleShift
    input could be any shape;
  3) In QDepthwiseConv2D each output channel has its own weight value;
    while QScaleShift share the same weight across the entire input tensor.
  4) Since it's not a Conv operation, hardware implementation for
    QScaleShift and QDWConv2D is fundamentally different. Therefore it
    makes sense to separate them as two different types of layers.
  """

  def __init__(self,
               weight_quantizer=None,
               bias_quantizer=None,
               use_bias=True,
               activation=None,
               weight_initializer="he_normal",
               weight_regularizer=None,
               bias_initializer="zeros",
               bias_regularizer=None,
               **kwargs):

    super().__init__()
    self.use_bias = use_bias
    self.weight_regularizer = weight_regularizer
    self.bias_regularizer = bias_regularizer

    self.weight_quantizer = weight_quantizer
    self.bias_quantizer = bias_quantizer

    self.weight_quantizer_internal = get_quantizer(self.weight_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    _, self.weight_initializer = (
        get_auto_range_constraint_initializer(
            self.weight_quantizer_internal, None,
            weight_initializer))

    _, self.bias_initializer = (
        get_auto_range_constraint_initializer(
            self.bias_quantizer_internal, None, bias_initializer))

    # optimize parameter set to "auto" scaling mode if possible
    if hasattr(self.weight_quantizer_internal, "_set_trainable_parameter"):
      self.weight_quantizer_internal._set_trainable_parameter()
    if hasattr(self.bias_quantizer_internal, "_set_trainable_parameter"):
      self.bias_quantizer_internal._set_trainable_parameter()

    self.quantizers = [self.weight_quantizer_internal,
                       self.bias_quantizer_internal]

    self.activation = get_quantizer(activation)

    super().__init__(**kwargs)

  def build(self, input_shape):
    self.weight = self.add_weight(
        name="weight", shape=(1, 1), dtype="float32",
        initializer=self.weight_initializer,
        regularizer=self.weight_regularizer, trainable=True)

    if self.use_bias:
      self.bias = self.add_weight(
          name="bias", shape=(1, 1), dtype="float32",
          initializer=self.bias_initializer, regularizer=self.bias_regularizer,
          trainable=True)
    else:
      self.bias = None
    self.built = True

  def call(self, inputs):

    quantized_weight = (
        self.weight_quantizer_internal(self.weight) if
        self.weight_quantizer_internal is not None else self.weight)

    outputs = tf.math.multiply(inputs, quantized_weight)

    if self.use_bias:
      quantized_bias = (
          self.bias_quantizer_internal(self.bias) if
          self.bias_quantizer_internal is not None else self.bias)

      outputs = quantized_bias + outputs

    return self.activation(outputs) if self.activation is not None else outputs

  def get_config(self):
    config = {
        "weight_quantizer": constraints.serialize(
            self.weight_quantizer_internal# Google internal code, commented out by copybara
            ),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
            ),
        "weight_initializer": constraints.serialize(
            self.weight_initializer# Google internal code, commented out by copybara
            ),
        "bias_initializer": constraints.serialize(
            self.bias_initializer# Google internal code, commented out by copybara
            ),
        "activation": constraints.serialize(
            self.activation# Google internal code, commented out by copybara
            ),
        "use_bias": self.use_bias,
        "weight_regularizer": constraints.serialize(
            self.weight_regularizer# Google internal code, commented out by copybara
            ),
        "bias_regularizer": constraints.serialize(
            self.bias_regularizer# Google internal code, commented out by copybara
            ),
    }
    base_config = super().get_config()
    base_config.update(config)
    return base_config

  def get_quantization_config(self):
    return {
        "weight_quantizer":
            str(self.weight_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "activation":
            str(self.activation)
    }

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return [self.weight, self.bias]


================================================
FILE: qkeras/qmodel.proto
================================================
// Copyright 2019 Google LLC
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ==============================================================================
syntax = "proto2";

package qkeras;

import "google/protobuf/any.proto";

// Protobuf to represent a quantized machine learning model.
message QModel {
  // Layers of a quantized model.
  repeated QLayer qlayers = 1;
}

// Protobuf to represent an individual layer that supports quantization.
//
// TODO(akshayap): Add platform agnostic way of saving weights, ideally
// something that can mimic numpy arrays.
message QLayer {
  // Layer name.
  optional string name = 1;
  // Input shape for the layer.
  repeated int32 input_shape = 2 [packed = true];
  // Output shape for the layer.
  repeated int32 output_shape = 3 [packed = true];
  // Quantization configuration for this layer.
  optional Quantization quantization = 4;
  // Harware parameters associated with this layer.
  optional HardwareParams hw_params = 5;
  // Model specific custom details.
  optional google.protobuf.Any details = 6;
}

// Qantization configurations for a model layer.
message Quantization {
  // Number of bits to perform quantization.
  optional int32 bits = 1;
  // Number of bits to the left of the decimal point.
  optional int32 integer = 2;
  // The minimum allowed power of two exponent
  optional int32 min_po2 = 3;
  // The maximum allowed power of two exponent
  optional int32 max_po2 = 4;
}

// Parameters for hardware synthesis of machine learning models.
message HardwareParams {
  // MAC bitwidth.
  optional int32 mac_bitwidth = 1;
}


================================================
FILE: qkeras/qnormalization.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
"""Definition of normalization quantization package."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import six
import warnings

import tensorflow.compat.v2 as tf

from tensorflow.keras import constraints
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras.layers import BatchNormalization
from tensorflow.python.framework import ops
from tensorflow.python.framework import smart_cond as tf_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from .qlayers import Clip
from .qlayers import get_auto_range_constraint_initializer
from .qlayers import get_quantizer
from .quantizers import quantized_relu_po2
from .quantizers import quantized_po2
from .safe_eval import safe_eval
from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer


class QBatchNormalization(BatchNormalization, PrunableLayer):
  """Quantized Batch Normalization layer.
  For training, mean and variance are not quantized.
  For inference, the quantized moving mean and moving variance are used.

  output = (x - mean) / sqrt(var + epsilon) * quantized_gamma + quantized_beta

  """

  def __init__(
      self,
      axis=-1,
      momentum=0.99,
      epsilon=1e-3,
      center=True,
      scale=True,
      activation=None,
      beta_initializer='zeros',
      gamma_initializer='ones',
      moving_mean_initializer='zeros',
      moving_variance_initializer='ones',
      beta_regularizer=None,
      gamma_regularizer=None,
      beta_quantizer='quantized_po2(5)',
      gamma_quantizer='quantized_relu_po2(6, 2048)',
      mean_quantizer='quantized_po2(5)',
      variance_quantizer='quantized_relu_po2(6, quadratic_approximation=True)',
      inverse_quantizer=None,
      gamma_constraint=None,
      beta_constraint=None,
      # use quantized_po2 and enforce quadratic approximation
      # to get an even exponent for sqrt
      beta_range=None,
      gamma_range=None,
      **kwargs):

    if gamma_range is not None:
      warnings.warn('gamma_range is deprecated in QBatchNormalization layer.')

    if beta_range is not None:
      warnings.warn('beta_range is deprecated in QBatchNormalization layer.')

    self.gamma_range = gamma_range
    self.beta_range = beta_range
    self.activation = activation

    self.beta_quantizer = beta_quantizer
    self.gamma_quantizer = gamma_quantizer
    self.mean_quantizer = mean_quantizer
    self.variance_quantizer = variance_quantizer
    self.inverse_quantizer = inverse_quantizer

    if self.inverse_quantizer is not None:
      assert self.variance_quantizer is None and self.gamma_quantizer is None, (
          'If using the inverse quantizer, the gamma and variance quantizers '
          'should not be used in order to avoid quantizing a value twice.')

    self.beta_quantizer_internal = get_quantizer(self.beta_quantizer)
    self.gamma_quantizer_internal = get_quantizer(self.gamma_quantizer)
    self.mean_quantizer_internal = get_quantizer(self.mean_quantizer)
    self.variance_quantizer_internal = get_quantizer(self.variance_quantizer)
    self.inverse_quantizer_internal = get_quantizer(self.inverse_quantizer)

    if hasattr(self.gamma_quantizer_internal, '_set_trainable_parameter'):
      self.gamma_quantizer_internal._set_trainable_parameter()
    if hasattr(self.variance_quantizer_internal, '_set_trainable_parameter'):
      self.variance_quantizer_internal._set_trainable_parameter()

    self.quantizers = [
        self.gamma_quantizer_internal,
        self.beta_quantizer_internal,
        self.mean_quantizer_internal,
        self.variance_quantizer_internal,
        self.inverse_quantizer_internal
    ]

    if scale and self.gamma_quantizer:
      gamma_constraint, gamma_initializer = (
          get_auto_range_constraint_initializer(
              self.gamma_quantizer_internal,
              gamma_constraint,
              gamma_initializer)
      )

    if center and self.beta_quantizer:
      beta_constraint, beta_initializer = (
          get_auto_range_constraint_initializer(
              self.beta_quantizer_internal,
              beta_constraint,
              beta_initializer)
      )

    if kwargs.get('fused', None):
      warnings.warn('batch normalization fused is disabled '
                    'in qkeras qnormalization.py.')
      del kwargs['fused']

    if kwargs.get('renorm', None):
      warnings.warn('batch normalization renorm is disabled '
                    'in qkeras qnormalization.py.')
      del kwargs['renorm']

    if kwargs.get('virtual_batch_size', None):
      warnings.warn('batch normalization virtual_batch_size is disabled '
                    'in qkeras qnormalization.py.')
      del kwargs['virtual_batch_size']

    if kwargs.get('adjustment', None):
      warnings.warn('batch normalization adjustment is disabled '
                    'in qkeras qnormalization.py.')
      del kwargs['adjustment']

    super().__init__(
        axis=axis,
        momentum=momentum,
        epsilon=epsilon,
        center=center,
        scale=scale,
        beta_initializer=beta_initializer,
        gamma_initializer=gamma_initializer,
        moving_mean_initializer=moving_mean_initializer,
        moving_variance_initializer=moving_variance_initializer,
        beta_regularizer=beta_regularizer,
        gamma_regularizer=gamma_regularizer,
        beta_constraint=beta_constraint,
        gamma_constraint=gamma_constraint,
        fused=False,
        renorm=False,
        virtual_batch_size=None,
        adjustment=None,
        **kwargs
    )

  def call(self, inputs, training=None):
    if self.scale and self.gamma_quantizer:
      quantized_gamma = self.gamma_quantizer_internal(self.gamma)
    else:
      quantized_gamma = self.gamma

    if self.center and self.beta_quantizer:
      quantized_beta = self.beta_quantizer_internal(self.beta)
    else:
      quantized_beta = self.beta

    if self.mean_quantizer:
      quantized_moving_mean = self.mean_quantizer_internal(self.moving_mean)
    else:
      quantized_moving_mean = self.moving_mean

    if self.variance_quantizer:
      quantized_moving_variance = self.variance_quantizer_internal(
          self.moving_variance)
    else:
      quantized_moving_variance = self.moving_variance

    training = self._get_training_value(training)

    # Compute the axes along which to reduce the mean / variance
    input_shape = inputs.shape
    ndims = len(input_shape)
    reduction_axes = [i for i in range(ndims) if i not in self.axis]

    # Broadcasting only necessary for single-axis batch norm where the axis is
    # not the last dimension
    broadcast_shape = [1] * ndims
    broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value
    def _broadcast(v):
      if (v is not None and len(v.shape) != ndims and
          reduction_axes != list(range(ndims - 1))):
        return array_ops.reshape(v, broadcast_shape)
      return v

    scale, offset = _broadcast(quantized_gamma), _broadcast(quantized_beta)

    # Determine a boolean value for `training`: could be True, False, or None.
    training_value = tf_utils.smart_constant_value(training)
    if training_value == False:  # pylint: disable=singleton-comparison,g-explicit-bool-comparison
      quantized_mean, quantized_variance = (quantized_moving_mean,
                                            quantized_moving_variance)
    else:
      # Some of the computations here are not necessary when training==False
      # but not a constant. However, this makes the code simpler.
      keep_dims = len(self.axis) > 1
      mean, variance = self._moments(
          math_ops.cast(inputs, self._param_dtype),
          reduction_axes,
          keep_dims=keep_dims)

      moving_mean = self.moving_mean
      moving_variance = self.moving_variance

      mean = tf_utils.smart_cond(
          training, lambda: mean, lambda: ops.convert_to_tensor(moving_mean))
      variance = tf_utils.smart_cond(
          training,
          lambda: variance,
          lambda: ops.convert_to_tensor(moving_variance))

      new_mean, new_variance = mean, variance

      if self.mean_quantizer:
        quantized_mean = self.mean_quantizer_internal(mean)
      else:
        quantized_mean = mean

      if self.variance_quantizer:
        quantized_variance = self.variance_quantizer_internal(variance)
      else:
        quantized_variance = variance

      if self._support_zero_size_input():
        inputs_size = array_ops.size(inputs)
      else:
        inputs_size = None

      def _do_update(var, value):
        """Compute the updates for mean and variance."""
        return self._assign_moving_average(var, value, self.momentum,
                                           inputs_size)

      def mean_update():
        true_branch = lambda: _do_update(self.moving_mean, new_mean)
        false_branch = lambda: self.moving_mean
        return tf_utils.smart_cond(training, true_branch, false_branch)

      def variance_update():
        """Update the moving variance."""
        true_branch = lambda: _do_update(self.moving_variance, new_variance)
        false_branch = lambda: self.moving_variance
        return tf_utils.smart_cond(training, true_branch, false_branch)

      self.add_update(mean_update)
      self.add_update(variance_update)

    quantized_mean = _broadcast(math_ops.cast(quantized_mean, inputs.dtype))
    quantized_variance = _broadcast(
        math_ops.cast(quantized_variance, inputs.dtype))
    if offset is not None:
      offset = math_ops.cast(offset, inputs.dtype)
    if scale is not None:
      scale = math_ops.cast(scale, inputs.dtype)

    # Calculate and quantize the inverse
    inv = math_ops.rsqrt(quantized_variance + self.epsilon)
    if scale is not None:
      inv *= scale
    if self.inverse_quantizer_internal is not None:
      inv = self.inverse_quantizer_internal(inv)

    # Calculate the forward pass of the BN
    outputs = inputs * math_ops.cast(inv, inputs.dtype) + math_ops.cast(
        offset - quantized_mean * inv
        if offset is not None else -quantized_mean * inv, inputs.dtype)

    # If some components of the shape got lost due to adjustments, fix that.
    outputs.set_shape(input_shape)

    return outputs

  def get_config(self):
    config = {
        'axis': self.axis,
        'momentum': self.momentum,
        'epsilon': self.epsilon,
        'center': self.center,
        'scale': self.scale,
        'beta_quantizer': constraints.serialize(
            self.beta_quantizer_internal# Google internal code, commented out by copybara
        ),
        'gamma_quantizer': constraints.serialize(
            self.gamma_quantizer_internal# Google internal code, commented out by copybara
        ),
        'mean_quantizer': constraints.serialize(
            self.mean_quantizer_internal# Google internal code, commented out by copybara
        ),
        'variance_quantizer': constraints.serialize(
            self.variance_quantizer_internal# Google internal code, commented out by copybara
        ),
        'beta_initializer': initializers.serialize(
            self.beta_initializer# Google internal code, commented out by copybara
        ),
        'gamma_initializer': initializers.serialize(
            self.gamma_initializer# Google internal code, commented out by copybara
        ),
        'moving_mean_initializer': initializers.serialize(
            self.moving_mean_initializer# Google internal code, commented out by copybara
        ),
        'moving_variance_initializer': initializers.serialize(
            self.moving_variance_initializer# Google internal code, commented out by copybara
        ),
        'inverse_quantizer': initializers.serialize(
            self.inverse_quantizer_internal# Google internal code, commented out by copybara
        ),
        'beta_regularizer': regularizers.serialize(
            self.beta_regularizer# Google internal code, commented out by copybara
        ),
        'gamma_regularizer': regularizers.serialize(
            self.gamma_regularizer# Google internal code, commented out by copybara
        ),
        'beta_constraint': constraints.serialize(
            self.beta_constraint# Google internal code, commented out by copybara
        ),
        'gamma_constraint': constraints.serialize(
            self.gamma_constraint# Google internal code, commented out by copybara
        ),
        'beta_range': self.beta_range,
        'gamma_range': self.gamma_range,
    }
    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def compute_output_shape(self, input_shape):
    return input_shape

  def get_quantizers(self):
    return self.quantizers

  def get_prunable_weights(self):
    return []


================================================
FILE: qkeras/qoctave.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Octave Convolution."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import re

from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import UpSampling2D
from .qlayers import QActivation
from .qconvolutional import QConv2D
from .qconvolutional import QSeparableConv2D
from .qpooling import QAveragePooling2D


def GetActivationSuffix(activation):
  """Returns suffix for layer name to facilitate debugging."""
  if not activation:
    return "linear"

  if "po2" in activation:
    return "q2"
  elif "quantized_relu" in activation:
    suffix = "qr"
  elif "quantized_tanh" in activation:
    suffix = "qt"
  else:
    suffix = "qb"

  numbers = re.findall(r"[0-9]+", activation)

  numbers = [n + "_" if len(n) > 1 else n for n in numbers]

  return suffix + "".join(numbers)


def QOctaveConv2D(
    filters,
    kernel_size,
    alpha,
    strides=(1, 1),
    padding="valid",
    kernel_initializer="he_normal",
    bias_initializer="zeros",
    # NOTE: kernel_regularizer not used with separable convolution
    kernel_regularizer=None,
    bias_regularizer=None,
    kernel_constraint=None,
    bias_constraint=None,
    use_separable=True,
    name="",
    **kwargs):
  """Implements quantized QOctaveConv2D."""

  def _QOctaveConv2DInternal(x):
    """Computes QOctaveConv2D on a tensor."""

    x_h, x_l = x

    bias_quantizer = kwargs.get("bias_quantizer", None)
    kernel_quantizer = kwargs.get("kernel_quantizer", None)
    depthwise_quantizer = kwargs.get("depthwise_quantizer", None)
    pointwise_quantizer = kwargs.get("pointwise_quantizer", None)
    acc_quantizer = kwargs.get("acc_quantizer", None)
    pooling_quantizer = kwargs.get("pooling_quantizer", None)
    depthwise_activation = kwargs.get("depthwise_activation", None)
    activation = kwargs.get("activation", None)

    bias_range = kwargs.get("bias_range", 1.0)
    kernel_range = kwargs.get("kernel_range", 1.0)
    depthwise_range = kwargs.get("depthwise_range", 1.0)
    pointwise_range = kwargs.get("pointwise_range", 1.0)

    if activation:
      act_suffix = "_" + GetActivationSuffix(activation)
    acc_suffix = "_" + GetActivationSuffix(acc_quantizer)

    if alpha == -1.0:
      if use_separable:
        x_h = QSeparableConv2D(
            filters, kernel_size, strides=strides, padding=padding,
            depthwise_regularizer=kernel_regularizer,
            depthwise_constraint=kernel_constraint,
            depthwise_initializer=kernel_initializer,
            pointwise_regularizer=kernel_regularizer,
            pointwise_constraint=kernel_constraint,
            pointwise_initializer=kernel_initializer,
            bias_regularizer=bias_regularizer,
            bias_constraint=bias_constraint,
            bias_initializer=bias_initializer,
            depthwise_quantizer=depthwise_quantizer,
            pointwise_quantizer=pointwise_quantizer,
            bias_quantizer=bias_quantizer,
            depthwise_activation=depthwise_activation,
            pointwise_range=pointwise_range,
            depthwise_range=depthwise_range,
            bias_range=bias_range,
            name=name + "_c_h_to_h")(x_h)
      else:
        x_h = QConv2D(
            filters, kernel_size, strides=strides, padding=padding,
            kernel_regularizer=kernel_regularizer,
            kernel_constraint=kernel_constraint,
            kernel_initializer=kernel_initializer,
            bias_regularizer=bias_regularizer,
            bias_constraint=bias_constraint,
            bias_initializer=bias_initializer,
            kernel_quantizer=kernel_quantizer,
            bias_quantizer=bias_quantizer,
            kernel_range=kernel_range,
            bias_range=bias_range,
            name=name + "_c_h_to_h")(x_h)

      if activation:
        x_h = QActivation(
            activation, name=name + "_c_h_to_h_act" + act_suffix)(
                x_h)

      return [x_h, None]

    co_h = int(filters * (1 - alpha))
    co_l = filters - co_h

    x_h_to_h = None
    x_h_to_l = None
    x_l_to_l = None
    x_l_to_h = None

    if co_h > 0:
      if x_h is not None:
        if use_separable:
          x_h_to_h = QSeparableConv2D(
              co_h, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              depthwise_quantizer=depthwise_quantizer,
              pointwise_quantizer=pointwise_quantizer,
              bias_quantizer=bias_quantizer,
              depthwise_activation=depthwise_activation,
              pointwise_range=pointwise_range,
              depthwise_range=depthwise_range,
              bias_range=bias_range,
              name=name + "_c_h_to_h")(x_h)
        else:
          x_h_to_h = QConv2D(
              co_h, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              kernel_quantizer=kernel_quantizer,
              bias_quantizer=bias_quantizer,
              kernel_range=kernel_range,
              bias_range=bias_range,
              name=name + "_c_h_to_h")(x_h)

        if acc_quantizer:
          x_h_to_h = QActivation(
              acc_quantizer,
              name=name + "_c_h_to_h_act" + acc_suffix)(x_h_to_h)

    if co_l > 0:
      if x_h is not None:
        x_h_to_l = QAveragePooling2D(
            pool_size=2, strides=2,
            quantizer=pooling_quantizer,
            name=name + "_avg_h_to_l")(x_h)

        if use_separable:
          x_h_to_l = QSeparableConv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              depthwise_quantizer=depthwise_quantizer,
              pointwise_quantizer=pointwise_quantizer,
              bias_quantizer=bias_quantizer,
              depthwise_activation=depthwise_activation,
              pointwise_range=pointwise_range,
              depthwise_range=depthwise_range,
              bias_range=bias_range,
              name=name + "_c_h_to_l")(x_h_to_l)
        else:
          x_h_to_l = QConv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              kernel_quantizer=kernel_quantizer,
              bias_quantizer=bias_quantizer,
              kernel_range=kernel_range,
              bias_range=bias_range,
              name=name + "_c_h_to_l")(x_h_to_l)

        if acc_quantizer:
          x_h_to_l = QActivation(
              acc_quantizer,
              name=name + "_c_h_to_l_act" + acc_suffix)(x_h_to_l)

    if co_h > 0:
      if x_l is not None:
        _, height, width, _ = x_l.shape.as_list()
        if height == 1 and width == 1:
          local_kernel = 1
          local_strides = 1
          local_padding = "same"
          upsampling = False
        else:
          local_kernel = kernel_size
          local_strides = strides
          local_padding = padding
          upsampling = True

        if use_separable and upsampling:
          x_l_to_h = QSeparableConv2D(
              co_h, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              depthwise_quantizer=depthwise_quantizer,
              pointwise_quantizer=pointwise_quantizer,
              bias_quantizer=bias_quantizer,
              depthwise_activation=depthwise_activation,
              pointwise_range=pointwise_range,
              depthwise_range=depthwise_range,
              bias_range=bias_range,
              name=name + "_c_l_to_h")(x_l)
        else:
          x_l_to_h = QConv2D(
              co_h, local_kernel, strides=local_strides, padding=local_padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              kernel_quantizer=kernel_quantizer,
              bias_quantizer=bias_quantizer,
              kernel_range=kernel_range,
              bias_range=bias_range,
              name=name + "_c_l_to_h")(x_l)

        if acc_quantizer:
          x_l_to_h = QActivation(
              acc_quantizer,
              name=name + "_c_l_to_h_act" + acc_suffix)(x_l_to_h)

        if upsampling:
          x_l_to_h = UpSampling2D(
              size=(2, 2), name=name + "_u_l_to_h")(x_l_to_h)

    if co_l > 0:
      if x_l is not None:
        if use_separable:
          x_l_to_l = QSeparableConv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              depthwise_quantizer=depthwise_quantizer,
              pointwise_quantizer=depthwise_quantizer,
              bias_quantizer=bias_quantizer,
              depthwise_activation=depthwise_activation,
              pointwise_range=pointwise_range,
              depthwise_range=depthwise_range,
              bias_range=bias_range,
              name=name + "_c_l_to_l")(x_l)
        else:
          x_l_to_l = QConv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              kernel_quantizer=kernel_quantizer,
              bias_quantizer=bias_quantizer,
              kernel_range=kernel_range,
              bias_range=bias_range,
              name=name + "_c_l_to_l")(x_l)

        if acc_quantizer:
          x_l_to_l = QActivation(
              acc_quantizer, name=name + "_c_l_to_l_act" + acc_suffix)(
                  x_l_to_l)

    if x_h_to_h is not None and x_l_to_h is not None:
      x_h = Add(name=name + "_a_h")([x_h_to_h, x_l_to_h])
    elif x_h_to_h is not None:
      x_h = x_h_to_h
    elif x_l_to_h is not None:
      x_h = x_l_to_h
    else:
      x_h = None

    if x_l_to_l is not None and x_h_to_l is not None:
      x_l = Add(name=name + "_a_l")([x_l_to_l, x_h_to_l])
    elif x_l_to_l is not None:
      x_l = x_l_to_l
    elif x_h_to_l is not None:
      x_l = x_h_to_l
    else:
      x_l = None

    if x_h is not None and activation is not None:
      x_h = QActivation(activation,
                        name=name + "_h_act" + act_suffix)(x_h)

    if x_l is not None and activation is not None:
      x_l = QActivation(activation,
                        name=name + "_l_act" + act_suffix)(x_l)

    return [x_h, x_l]

  return _QOctaveConv2DInternal


def OctaveConv2D(
    filters, kernel_size, alpha,
    strides=(1, 1), padding="valid",
    kernel_initializer="he_normal",
    bias_initializer="zeros",
    kernel_regularizer=None,
    bias_regularizer=None,
    kernel_constraint=None,
    bias_constraint=None,
    activation=None,
    use_separable=True,
    name="",
    **kwargs):

  """Implements OctaveConv2D."""

  def _OctaveConv2DInternal(x):

    """Computes octave on tensor."""

    acc_quantizer = kwargs.get("acc_quantizer", None)

    x_h, x_l = x

    if alpha == -1.0:
      if use_separable:
        x_h = SeparableConv2D(
            filters, kernel_size, strides=strides, padding=padding,
            depthwise_regularizer=kernel_regularizer,
            depthwise_constraint=kernel_constraint,
            depthwise_initializer=kernel_initializer,
            pointwise_regularizer=kernel_regularizer,
            pointwise_constraint=kernel_constraint,
            pointwise_initializer=kernel_initializer,
            bias_regularizer=bias_regularizer,
            bias_constraint=bias_constraint,
            bias_initializer=bias_initializer,
            name=name + "_c_h_to_h")(x_h)
      else:
        x_h = Conv2D(
            filters, kernel_size, strides=strides, padding=padding,
            kernel_regularizer=kernel_regularizer,
            kernel_constraint=kernel_constraint,
            kernel_initializer=kernel_initializer,
            bias_regularizer=bias_regularizer,
            bias_constraint=bias_constraint,
            bias_initializer=bias_initializer,
            name=name+"_c_h_to_h")(x_h)

      if activation:
        x_h = Activation(activation, name=name + "_c_h_to_h_act")(x_h)

      return [x_h, None]

    co_h = int(filters * (1 - alpha))
    co_l = filters - co_h

    x_h_to_h = None
    x_h_to_l = None
    x_l_to_l = None
    x_l_to_h = None

    if co_h > 0:
      if x_h is not None:
        if use_separable:
          x_h_to_h = SeparableConv2D(
              co_h, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_h_to_h")(x_h)
        else:
          x_h_to_h = Conv2D(
              co_h, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_h_to_h")(x_h)

        if activation:
          x_h_to_h = Activation(
              acc_quantizer, name=name + "_c_h_to_h_act")(x_h_to_h)

    if co_l > 0:
      if x_h is not None:
        x_h_to_l = AveragePooling2D(pool_size=2, strides=2,
                                    name=name + "_p_h_to_l")(x_h)

        if use_separable:
          x_h_to_l = SeparableConv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_h_to_l")(x_h_to_l)
        else:
          x_h_to_l = Conv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_h_to_l")(x_h_to_l)

        if activation:
          x_h_to_l = Activation(
              acc_quantizer, name=name + "_c_h_to_l_act")(x_h_to_l)

    if co_h > 0:
      if x_l is not None:
        _, height, width, _ = x_l.shape.as_list()
        if height == 1 and width == 1:
          local_kernel = 1
          local_strides = 1
          local_padding = "same"
          upsampling = False
        else:
          local_kernel = kernel_size
          local_strides = strides
          local_padding = padding
          upsampling = True

        if use_separable and upsampling:
          x_l_to_h = SeparableConv2D(
              co_h, kernel_size, strides=strides, padding=padding,
              depthwise_regularizer=kernel_regularizer,
              depthwise_constraint=kernel_constraint,
              depthwise_initializer=kernel_initializer,
              pointwise_regularizer=kernel_regularizer,
              pointwise_constraint=kernel_constraint,
              pointwise_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_l_to_h")(x_l)
        else:
          x_l_to_h = Conv2D(
              co_h, local_kernel, strides=local_strides, padding=local_padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_l_to_h")(x_l)

        if activation:
          x_l_to_h = Activation(
              acc_quantizer, name=name + "_c_l_to_h_act")(x_l_to_h)

        if upsampling:
          x_l_to_h = UpSampling2D(
              size=(2, 2), name=name + "_u_l_to_h")(x_l_to_h)

    if co_l > 0:
      if x_l is not None:
        if use_separable:
          x_l_to_l = SeparableConv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_l_to_l")(x_l)
        else:
          x_l_to_l = Conv2D(
              co_l, kernel_size, strides=strides, padding=padding,
              kernel_regularizer=kernel_regularizer,
              kernel_constraint=kernel_constraint,
              kernel_initializer=kernel_initializer,
              bias_regularizer=bias_regularizer,
              bias_constraint=bias_constraint,
              bias_initializer=bias_initializer,
              name=name + "_c_l_to_l")(x_l)

        if activation:
          x_l_to_l = Activation(
              acc_quantizer, name=name + "_c_l_to_l_act")(x_l_to_l)

    if x_h_to_h is not None and x_l_to_h is not None:
      x_h = Add(name=name + "_a_h")([x_h_to_h, x_l_to_h])
    elif x_h_to_h is not None:
      x_h = x_h_to_h
    elif x_l_to_h is not None:
      x_h = x_l_to_h
    else:
      x_h = None

    if x_l_to_l is not None and x_h_to_l is not None:
      x_l = Add(name=name + "_a_l")([x_l_to_l, x_h_to_l])
    elif x_l_to_l is not None:
      x_l = x_l_to_l
    elif x_h_to_l is not None:
      x_l = x_h_to_l
    else:
      x_l = None

    if x_h is not None:
      x_h = Activation(activation, name=name + "_h_act")(x_h)

    if x_l is not None:
      x_l = Activation(activation, name=name + "_l_act")(x_l)

    return (x_h, x_l)

  return _OctaveConv2DInternal


================================================
FILE: qkeras/qpooling.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
from tensorflow.keras import constraints

import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D
from .qlayers import QActivation
from .quantizers import get_quantizer


class QAveragePooling2D(AveragePooling2D):
  """Computes the quantized version of AveragePooling2D."""

  def __init__(self, pool_size=(2, 2),
               strides=None,
               padding="valid",
               data_format=None,
               average_quantizer=None,
               activation=None,
               **kwargs):

    self.average_quantizer = average_quantizer
    self.average_quantizer_internal = get_quantizer(self.average_quantizer)
    self.quantizers = [self.average_quantizer_internal]

    if activation is not None:
      self.activation = get_quantizer(activation)
    else:
      self.activation = activation

    super().__init__(
        pool_size=pool_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        **kwargs
    )

  def call(self, inputs):
    """Performs quantized AveragePooling followed by QActivation.

    Since there is no specific parameter for averaging op, we couldn't apply
    averaging quantizer to the averaging op. We have two options:
    1. we perform our own average as sum first then multiply with the
       inversion
       of the division factor: sum(x) * quantize(1/pool_area)
    2. first, we call keras version of averaging first: y1 = keras_average(x)
       then multiply it with pool_size^2: y2 = y1 * pool_area
       Last, y3 = y2 * quantize(1/ pool_area)
    3. Improved based on #2, but multiply x with pool_area before averaging
       so that we don't lose precision during averaging. The order now becomes:
       first, multiply x with pool_area: y1 = x * pool_area
       then we call keras version of averaging: y2 = keras_average(y1)
       Last, y3 = y2 * quantize(1/ pool_area)
    4. Since there is sum_pooling operation, another solution is to use
       depthwise_conv2d with kernel weights = 1 to get the pooling sum. In this
       case we don't lose precision due to averaging. However, this solution
       will introduce extra weights to the layer, which might break our code
       elsewhere.

    Since we need to match software and hardware inference numerics, we are now
    using #3 in the implementation.
    """

    if self.average_quantizer:
      # Calculates the pool area
      if isinstance(self.pool_size, int):
        pool_area = self.pool_size * self.pool_size
      else:
        pool_area = np.prod(self.pool_size)

      # Calculates the pooling average of x*pool_area
      x = super(QAveragePooling2D, self).call(inputs*pool_area)

      # Quantizes the multiplication factor.
      mult_factor = 1.0 / pool_area
      q_mult_factor = self.average_quantizer_internal(mult_factor)
      q_mult_factor = K.cast_to_floatx(q_mult_factor)

      # Computes pooling average.
      x = x * q_mult_factor

    else:
      # Since no quantizer is available, we directly call the keras layer
      x = super(QAveragePooling2D, self).call(inputs)

    if self.activation is not None:
      return self.activation(x)
    return x

  def get_config(self):
    config = {
        "average_quantizer": constraints.serialize(
            self.average_quantizer_internal# Google internal code, commented out by copybara
        ),
        "activation": constraints.serialize(
            self.activation# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QAveragePooling2D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "average_quantizer":
            str(self.average_quantizer_internal),
        "activation":
            str(self.activation)
    }

  def get_quantizers(self):
    return self.quantizers


class QGlobalAveragePooling2D(GlobalAveragePooling2D):
  """Computes the quantized version of GlobalAveragePooling2D."""

  def __init__(self, data_format=None,
               average_quantizer=None,
               activation=None,
               **kwargs):

    self.average_quantizer = average_quantizer
    self.average_quantizer_internal = get_quantizer(self.average_quantizer)
    self.quantizers = [self.average_quantizer_internal]

    if activation is not None:
      self.activation = get_quantizer(activation)
    else:
      self.activation = activation

    super().__init__(data_format=data_format, **kwargs)

  def compute_pooling_area(self, input_shape):
    if not isinstance(input_shape, tuple):
      input_shape = input_shape.as_list()
    if self.data_format == "channels_last":
      return input_shape[1] * input_shape[2]
    else:
      return input_shape[2] * input_shape[3]

  def call(self, inputs):
    """Performs quantized GlobalAveragePooling followed by QActivation.

    Since there is no specific parameter for averaging op, we couldn't apply
    averaging quantizer to the averaging op. We have two options:
    1. we perform our own average as sum first then multiply with the
       inversion
       of the division factor: sum(x) * quantize(1/pool_area)
    2. first, we call keras version of averaging first:
       y1 = keras_global_average(x)
       then multiply it with the denominator(pool_area) used by averaging:
       y2 = y1 * pool_area
       Last, y3 = y2 * quantize(1/ pool_area)
    3. we perform pooling sum, and then multiply the sum with the quantized
       inverse multiplication factor to get the average value.

    Our previous implementation uses option #2. Yet we observed minor numerical
    mismatch between software and hardware inference. Therefore we use #3 as
    the current implementation.
    """

    if self.average_quantizer:
      # Calculates pooling sum.
      if self.data_format == "channels_last":
        x = K.sum(inputs, axis=[1, 2], keepdims=self.keepdims)
      else:
        x = K.sum(inputs, axis=[2, 3], keepdims=self.keepdims)

      # Calculates the pooling area
      pool_area = self.compute_pooling_area(input_shape=inputs.shape)

      # Quantizes the inverse multiplication factor
      mult_factor = 1.0 / pool_area
      q_mult_factor = self.average_quantizer_internal(mult_factor)

      # Derives average pooling value from pooling sum.
      x = x * q_mult_factor

    else:
      # If quantizer is not available, calls the keras layer.
      x = super(QGlobalAveragePooling2D, self).call(inputs)

    if self.activation is not None:
      return self.activation(x)
    return x

  def get_config(self):
    config = {
        "average_quantizer": constraints.serialize(
            self.average_quantizer_internal# Google internal code, commented out by copybara
        ),
        "activation": constraints.serialize(
            self.activation# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QGlobalAveragePooling2D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "average_quantizer":
            str(self.average_quantizer_internal),
        "activation":
            str(self.activation)
    }

  def get_quantizers(self):
    return self.quantizers


================================================
FILE: qkeras/qrecurrent.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
""" Quantized Recurrent layers. """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import warnings
import tensorflow as tf
from tensorflow.keras import activations
from tensorflow.keras import constraints
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import SimpleRNNCell
from tensorflow.keras.layers import LSTMCell
from tensorflow.keras.layers import GRUCell
from tensorflow.keras.layers import RNN
from tensorflow.keras.layers import Bidirectional
from tensorflow.python.util import nest
from tensorflow.python.ops import array_ops
# from tensorflow.python.ops import array_ops
from tensorflow.python.framework import ops
from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer

import tensorflow.keras.backend as K
from .qlayers import get_auto_range_constraint_initializer
from .qlayers import QActivation
from .quantizers import get_quantized_initializer
from .quantizers import get_quantizer


class QSimpleRNNCell(SimpleRNNCell):
  """
  Cell class for the QSimpleRNNCell layer.

  Most of these parameters follow the implementation of SimpleRNNCell in
  Keras, with the exception of kernel_quantizer, recurrent_quantizer,
  bias_quantizer, and state_quantizer.

  kernel_quantizer: quantizer function/class for kernel
  recurrent_quantizer: quantizer function/class for recurrent kernel
  bias_quantizer: quantizer function/class for bias
  state_quantizer: quantizer function/class for states

  We refer the reader to the documentation of SimpleRNNCell in Keras for the
  other parameters.

  """
  def __init__(self,
               units,
               activation='quantized_tanh',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               recurrent_quantizer=None,
               bias_quantizer=None,
               state_quantizer=None,
               dropout=0.,
               recurrent_dropout=0.,
               **kwargs):

    self.kernel_quantizer = kernel_quantizer
    self.recurrent_quantizer = recurrent_quantizer
    self.bias_quantizer = bias_quantizer
    self.state_quantizer = state_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)
    self.state_quantizer_internal = get_quantizer(self.state_quantizer)

    self.quantizers = [
        self.kernel_quantizer_internal, self.recurrent_quantizer_internal,
        self.bias_quantizer_internal, self.state_quantizer_internal
    ]

    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    if hasattr(self.recurrent_quantizer_internal, "_set_trainable_parameter"):
      self.recurrent_quantizer_internal._set_trainable_parameter()

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    recurrent_constraint, recurrent_initializer = (
        get_auto_range_constraint_initializer(self.recurrent_quantizer_internal,
                                              recurrent_constraint,
                                              recurrent_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    super().__init__(
        units=units,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        **kwargs
    )

  def call(self, inputs, states, training=None):
    prev_output = states[0] if nest.is_nested(states) else states

    dp_mask = self.get_dropout_mask_for_cell(inputs, training)
    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(
        prev_output, training)

    if self.state_quantizer:
      quantized_prev_output = self.state_quantizer_internal(prev_output)
    else:
      quantized_prev_output = prev_output

    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel

    if dp_mask is not None:
      h = K.dot(inputs * dp_mask, quantized_kernel)
    else:
      h = K.dot(inputs, quantized_kernel)

    if self.bias is not None:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      h = K.bias_add(h, quantized_bias)

    if rec_dp_mask is not None:
      quantized_prev_output = quantized_prev_output * rec_dp_mask

    if self.recurrent_quantizer:
      quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel)
    else:
      quantized_recurrent = self.recurrent_kernel

    output = h + K.dot(quantized_prev_output, quantized_recurrent)

    if self.activation is not None:
      output = self.activation(output)
    return output, [output]

  def get_config(self):
    config = {
        'kernel_quantizer': constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        'recurrent_quantizer': constraints.serialize(
            self.recurrent_quantizer_internal# Google internal code, commented out by copybara
        ),
        'bias_quantizer': constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        'state_quantizer': constraints.serialize(
            self.state_quantizer_internal# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QSimpleRNNCell, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))


class QSimpleRNN(RNN, PrunableLayer):
  """
  Class for the QSimpleRNN layer.

  Most of these parameters follow the implementation of SimpleRNN in
  Keras, with the exception of kernel_quantizer, recurrent_quantizer,
  bias_quantizer and state_quantizer.


  kernel_quantizer: quantizer function/class for kernel
  recurrent_quantizer: quantizer function/class for recurrent kernel
  bias_quantizer: quantizer function/class for bias
  state_quantizer: quantizer function/class for states


  We refer the reader to the documentation of SimpleRNN in Keras for the
  other parameters.

  """

  def __init__(self,
               units,
               activation='quantized_tanh',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               recurrent_quantizer=None,
               bias_quantizer=None,
               state_quantizer=None,
               dropout=0.,
               recurrent_dropout=0.,
               return_sequences=False,
               return_state=False,
               go_backwards=False,
               stateful=False,
               unroll=False,
               **kwargs):

    if 'enable_caching_device' in kwargs:
      cell_kwargs = {'enable_caching_device':
                     kwargs.pop('enable_caching_device')}
    else:
      cell_kwargs = {}

    cell = QSimpleRNNCell(
        units,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        kernel_quantizer=kernel_quantizer,
        recurrent_quantizer=recurrent_quantizer,
        bias_quantizer=bias_quantizer,
        state_quantizer=state_quantizer,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        dtype=kwargs.get('dtype'),
        trainable=kwargs.get('trainable', True),
        **cell_kwargs)

    super().__init__(
        cell,
        return_sequences=return_sequences,
        return_state=return_state,
        go_backwards=go_backwards,
        stateful=stateful,
        unroll=unroll,
        **kwargs
    )
    self.activity_regularizer = regularizers.get(activity_regularizer)
    self.input_spec = [tf.keras.layers.InputSpec(ndim=3)]

  def call(self, inputs, mask=None, training=None, initial_state=None):
    self._maybe_reset_cell_dropout_mask(self.cell)
    return super(QSimpleRNN, self).call(
        inputs, mask=mask, training=training, initial_state=initial_state)

  def get_quantizers(self):
    return self.cell.quantizers

  def get_prunable_weights(self):
    return [self.cell.kernel, self.cell.recurrent_kernel]

  @property
  def units(self):
    return self.cell.units

  @property
  def activation(self):
    return self.cell.activation

  @property
  def use_bias(self):
    return self.cell.use_bias

  @property
  def kernel_initializer(self):
    return self.cell.kernel_initializer

  @property
  def recurrent_initializer(self):
    return self.cell.recurrent_initializer

  @property
  def bias_initializer(self):
    return self.cell.bias_initializer

  @property
  def kernel_regularizer(self):
    return self.cell.kernel_regularizer

  @property
  def recurrent_regularizer(self):
    return self.cell.recurrent_regularizer

  @property
  def bias_regularizer(self):
    return self.cell.bias_regularizer

  @property
  def kernel_constraint(self):
    return self.cell.kernel_constraint

  @property
  def recurrent_constraint(self):
    return self.cell.recurrent_constraint

  @property
  def bias_constraint(self):
    return self.cell.bias_constraint

  @property
  def kernel_quantizer_internal(self):
    return self.cell.kernel_quantizer_internal

  @property
  def recurrent_quantizer_internal(self):
    return self.cell.recurrent_quantizer_internal

  @property
  def bias_quantizer_internal(self):
    return self.cell.bias_quantizer_internal

  @property
  def state_quantizer_internal(self):
    return self.cell.state_quantizer_internal

  @property
  def kernel_quantizer(self):
    return self.cell.kernel_quantizer

  @property
  def recurrent_quantizer(self):
    return self.cell.recurrent_quantizer

  @property
  def bias_quantizer(self):
    return self.cell.bias_quantizer

  @property
  def state_quantizer(self):
    return self.cell.state_quantizer

  @property
  def dropout(self):
    return self.cell.dropout

  @property
  def recurrent_dropout(self):
    return self.cell.recurrent_dropout

  def get_config(self):
    config = {
        'units': self.units,
        'activation': activations.serialize(
            self.activation# Google internal code, commented out by copybara
        ),
        'use_bias': self.use_bias,
        'kernel_initializer': initializers.serialize(
            self.kernel_initializer# Google internal code, commented out by copybara
        ),
        'recurrent_initializer': initializers.serialize(
            self.recurrent_initializer# Google internal code, commented out by copybara
        ),
        'bias_initializer': initializers.serialize(
            self.bias_initializer# Google internal code, commented out by copybara
        ),
        'kernel_regularizer': regularizers.serialize(
            self.kernel_regularizer# Google internal code, commented out by copybara
        ),
        'recurrent_regularizer': regularizers.serialize(
            self.recurrent_regularizer# Google internal code, commented out by copybara
        ),
        'bias_regularizer': regularizers.serialize(
            self.bias_regularizer# Google internal code, commented out by copybara
        ),
        'activity_regularizer': regularizers.serialize(
            self.activity_regularizer# Google internal code, commented out by copybara
        ),
        'kernel_constraint': constraints.serialize(
            self.kernel_constraint# Google internal code, commented out by copybara
        ),
        'recurrent_constraint': constraints.serialize(
            self.recurrent_constraint# Google internal code, commented out by copybara
        ),
        'bias_constraint': constraints.serialize(
            self.bias_constraint# Google internal code, commented out by copybara
        ),
        'kernel_quantizer': constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        'recurrent_quantizer': constraints.serialize(
            self.recurrent_quantizer_internal# Google internal code, commented out by copybara
        ),
        'bias_quantizer': constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        'state_quantizer': constraints.serialize(
            self.state_quantizer_internal# Google internal code, commented out by copybara
        ),
        'dropout': self.dropout,
        'recurrent_dropout': self.recurrent_dropout,
    }
    base_config = super(QSimpleRNN, self).get_config()
    del base_config['cell']
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "kernel_quantizer":
            str(self.kernel_quantizer_internal),
        "recurrent_quantizer":
            str(self.recurrent_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "state_quantizer":
            str(self.state_quantizer_internal),
        "activation":
            str(self.activation)
    }

  @classmethod
  def from_config(cls, config):
    if 'implementation' in config:
      config.pop('implementation')
    return cls(**config)


class QLSTMCell(LSTMCell):
  """
  Cell class for the QLSTMCell layer.

  Most of these parameters follow the implementation of LSTMCell in
  Keras, with the exception of kernel_quantizer, recurrent_quantizer,
  bias_quantizer, state_quantizer.


  kernel_quantizer: quantizer function/class for kernel
  recurrent_quantizer: quantizer function/class for recurrent kernel
  bias_quantizer: quantizer function/class for bias
  state_quantizer: quantizer function/class for states

  We refer the reader to the documentation of LSTMCell in Keras for the
  other parameters.

  """

  def __init__(self,
               units,
               activation='quantized_tanh',
               recurrent_activation='hard_sigmoid',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               unit_forget_bias=True,
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               recurrent_quantizer=None,
               bias_quantizer=None,
               state_quantizer=None,
               dropout=0.,
               recurrent_dropout=0.,
               implementation=1,
               **kwargs):
    self.kernel_quantizer = kernel_quantizer
    self.recurrent_quantizer = recurrent_quantizer
    self.bias_quantizer = bias_quantizer
    self.state_quantizer = state_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)
    self.state_quantizer_internal = get_quantizer(self.state_quantizer)

    self.quantizers = [
      self.kernel_quantizer_internal,
      self.recurrent_quantizer_internal,
      self.bias_quantizer_internal,
      self.state_quantizer_internal,
    ]

    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    if hasattr(self.recurrent_quantizer_internal, "_set_trainable_parameter"):
      self.recurrent_quantizer_internal._set_trainable_parameter()

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    recurrent_constraint, recurrent_initializer = (
        get_auto_range_constraint_initializer(self.recurrent_quantizer_internal,
                                              recurrent_constraint,
                                              recurrent_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    if recurrent_activation is not None:
      recurrent_activation = get_quantizer(recurrent_activation)

    super().__init__(
        units=units,
        activation=activation,
        use_bias=use_bias,
        recurrent_activation=recurrent_activation,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        unit_forget_bias=True,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        implementation=implementation,
        **kwargs
    )

  def _compute_carry_and_output(self, x, h_tm1, c_tm1, quantized_recurrent):
    """Computes carry and output using split kernels."""
    x_i, x_f, x_c, x_o = x
    h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1
    i = self.recurrent_activation(
        x_i + K.dot(h_tm1_i, quantized_recurrent[:, :self.units]))
    f = self.recurrent_activation(x_f + K.dot(
        h_tm1_f, quantized_recurrent[:, self.units:self.units * 2]))
    c = f * c_tm1 + i * self.activation(x_c + K.dot(
        h_tm1_c, quantized_recurrent[:, self.units * 2:self.units * 3]))
    o = self.recurrent_activation(
        x_o + K.dot(h_tm1_o, quantized_recurrent[:, self.units * 3:]))
    return c, o

  def _compute_carry_and_output_fused(self, z, c_tm1):
    """Computes carry and output using fused kernels."""
    z0, z1, z2, z3 = z
    i = self.recurrent_activation(z0)
    f = self.recurrent_activation(z1)
    c = f * c_tm1 + i * self.activation(z2)
    o = self.recurrent_activation(z3)
    return c, o

  def call(self, inputs, states, training=None):
    h_tm1_tmp = states[0]  # previous memory state
    c_tm1_tmp = states[1]  # previous carry state

    if self.state_quantizer:
      c_tm1 = self.state_quantizer_internal(c_tm1_tmp)
      h_tm1 = self.state_quantizer_internal(h_tm1_tmp)
    else:
      c_tm1 = c_tm1_tmp
      h_tm1 = h_tm1_tmp

    dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(
        h_tm1, training, count=4)

    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel
    if self.recurrent_quantizer:
      quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel)
    else:
      quantized_recurrent = self.recurrent_kernel
    if self.bias_quantizer:
      quantized_bias = self.bias_quantizer_internal(self.bias)
    else:
      quantized_bias = self.bias

    if self.implementation == 1:
      if 0 < self.dropout < 1.:
        inputs_i = inputs * dp_mask[0]
        inputs_f = inputs * dp_mask[1]
        inputs_c = inputs * dp_mask[2]
        inputs_o = inputs * dp_mask[3]
      else:
        inputs_i = inputs
        inputs_f = inputs
        inputs_c = inputs
        inputs_o = inputs
      k_i, k_f, k_c, k_o = array_ops.split(
          quantized_kernel, num_or_size_splits=4, axis=1)
      x_i = K.dot(inputs_i, k_i)
      x_f = K.dot(inputs_f, k_f)
      x_c = K.dot(inputs_c, k_c)
      x_o = K.dot(inputs_o, k_o)
      if self.use_bias:
        b_i, b_f, b_c, b_o = array_ops.split(
            quantized_bias, num_or_size_splits=4, axis=0)
        x_i = K.bias_add(x_i, b_i)
        x_f = K.bias_add(x_f, b_f)
        x_c = K.bias_add(x_c, b_c)
        x_o = K.bias_add(x_o, b_o)

      if 0 < self.recurrent_dropout < 1.:
        h_tm1_i = h_tm1 * rec_dp_mask[0]
        h_tm1_f = h_tm1 * rec_dp_mask[1]
        h_tm1_c = h_tm1 * rec_dp_mask[2]
        h_tm1_o = h_tm1 * rec_dp_mask[3]
      else:
        h_tm1_i = h_tm1
        h_tm1_f = h_tm1
        h_tm1_c = h_tm1
        h_tm1_o = h_tm1
      x = (x_i, x_f, x_c, x_o)
      h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o)
      c, o = self._compute_carry_and_output(x, h_tm1, c_tm1, quantized_recurrent)
    else:
      if 0. < self.dropout < 1.:
        inputs = inputs * dp_mask[0]
      z = K.dot(inputs, quantized_kernel)
      z += K.dot(h_tm1, quantized_recurrent)
      if self.use_bias:
        z = K.bias_add(z, quantized_bias)

      z = array_ops.split(z, num_or_size_splits=4, axis=1)
      c, o = self._compute_carry_and_output_fused(z, c_tm1)

    h = o * self.activation(c)
    return h, [h, c]

  def get_config(self):
    config = {
        'kernel_quantizer': constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        'recurrent_quantizer': constraints.serialize(
            self.recurrent_quantizer_internal# Google internal code, commented out by copybara
        ),
        'bias_quantizer': constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        'state_quantizer': constraints.serialize(
            self.state_quantizer_internal# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QLSTMCell, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))


class QLSTM(RNN, PrunableLayer):
  """
  Class for the QLSTM layer.

  Most of these parameters follow the implementation of LSTM in
  Keras, with the exception of kernel_quantizer, recurrent_quantizer,
  bias_quantizer, state_quantizer.


  kernel_quantizer: quantizer function/class for kernel
  recurrent_quantizer: quantizer function/class for recurrent kernel
  bias_quantizer: quantizer function/class for bias
  state_quantizer: quantizer function/class for states

  We refer the reader to the documentation of LSTM in Keras for the
  other parameters.

  """

  def __init__(self,
               units,
               activation='quantized_tanh',
               recurrent_activation='hard_sigmoid',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               unit_forget_bias=True,
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               recurrent_quantizer=None,
               bias_quantizer=None,
               state_quantizer=None,
               dropout=0.,
               recurrent_dropout=0.,
               implementation=1,
               return_sequences=False,
               return_state=False,
               go_backwards=False,
               stateful=False,
               unroll=False,
               **kwargs):
    if implementation == 0:
      print('`implementation=0` has been deprecated, '
              'and now defaults to `implementation=1`.'
              'Please update your layer call.')

    if 'enable_caching_device' in kwargs:
      cell_kwargs = {'enable_caching_device':
                     kwargs.pop('enable_caching_device')}
    else:
      cell_kwargs = {}

    cell = QLSTMCell(
        units,
        activation=activation,
        recurrent_activation=recurrent_activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        unit_forget_bias=unit_forget_bias,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        kernel_quantizer=kernel_quantizer,
        recurrent_quantizer=recurrent_quantizer,
        bias_quantizer=bias_quantizer,
        state_quantizer=state_quantizer,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        implementation=implementation,
        dtype=kwargs.get('dtype'),
        trainable=kwargs.get('trainable', True),
        **cell_kwargs)

    super().__init__(
        cell,
        return_sequences=return_sequences,
        return_state=return_state,
        go_backwards=go_backwards,
        stateful=stateful,
        unroll=unroll,
        **kwargs
    )
    self.activity_regularizer = regularizers.get(activity_regularizer)
    self.input_spec = [tf.keras.layers.InputSpec(ndim=3)]

  def call(self, inputs, mask=None, training=None, initial_state=None):
    self._maybe_reset_cell_dropout_mask(self.cell)
    return super(QLSTM, self).call(
        inputs, mask=mask, training=training, initial_state=initial_state)

  def get_quantizers(self):
    return self.cell.quantizers

  def get_prunable_weights(self):
    return [self.cell.kernel, self.cell.recurrent_kernel]

  @property
  def units(self):
    return self.cell.units

  @property
  def activation(self):
    return self.cell.activation

  @property
  def recurrent_activation(self):
    return self.cell.recurrent_activation

  @property
  def use_bias(self):
    return self.cell.use_bias

  @property
  def kernel_initializer(self):
    return self.cell.kernel_initializer

  @property
  def recurrent_initializer(self):
    return self.cell.recurrent_initializer

  @property
  def bias_initializer(self):
    return self.cell.bias_initializer

  @property
  def unit_forget_bias(self):
    return self.cell.unit_forget_bias

  @property
  def kernel_regularizer(self):
    return self.cell.kernel_regularizer

  @property
  def recurrent_regularizer(self):
    return self.cell.recurrent_regularizer

  @property
  def bias_regularizer(self):
    return self.cell.bias_regularizer

  @property
  def kernel_constraint(self):
    return self.cell.kernel_constraint

  @property
  def recurrent_constraint(self):
    return self.cell.recurrent_constraint

  @property
  def bias_constraint(self):
    return self.cell.bias_constraint

  @property
  def kernel_quantizer_internal(self):
    return self.cell.kernel_quantizer_internal

  @property
  def recurrent_quantizer_internal(self):
    return self.cell.recurrent_quantizer_internal

  @property
  def bias_quantizer_internal(self):
    return self.cell.bias_quantizer_internal

  @property
  def state_quantizer_internal(self):
    return self.cell.state_quantizer_internal

  @property
  def kernel_quantizer(self):
    return self.cell.kernel_quantizer

  @property
  def recurrent_quantizer(self):
    return self.cell.recurrent_quantizer

  @property
  def bias_quantizer(self):
    return self.cell.bias_quantizer

  @property
  def state_quantizer(self):
    return self.cell.state_quantizer

  @property
  def dropout(self):
    return self.cell.dropout

  @property
  def recurrent_dropout(self):
    return self.cell.recurrent_dropout

  @property
  def implementation(self):
    return self.cell.implementation

  def get_config(self):
    config = {
        'units': self.units,
        'activation': activations.serialize(
            self.activation# Google internal code, commented out by copybara
        ),
        'recurrent_activation': activations.serialize(
            self.recurrent_activation# Google internal code, commented out by copybara
        ),
        'use_bias': self.use_bias,
        'kernel_initializer': initializers.serialize(
            self.kernel_initializer# Google internal code, commented out by copybara
        ),
        'recurrent_initializer': initializers.serialize(
            self.recurrent_initializer# Google internal code, commented out by copybara
        ),
        'bias_initializer': initializers.serialize(
            self.bias_initializer# Google internal code, commented out by copybara
        ),
        'unit_forget_bias': self.unit_forget_bias,
        'kernel_regularizer': regularizers.serialize(
            self.kernel_regularizer# Google internal code, commented out by copybara
        ),
        'recurrent_regularizer': regularizers.serialize(
            self.recurrent_regularizer# Google internal code, commented out by copybara
        ),
        'bias_regularizer': regularizers.serialize(
            self.bias_regularizer# Google internal code, commented out by copybara
        ),
        'activity_regularizer': regularizers.serialize(
            self.activity_regularizer# Google internal code, commented out by copybara
        ),
        'kernel_constraint': constraints.serialize(
            self.kernel_constraint# Google internal code, commented out by copybara
        ),
        'recurrent_constraint': constraints.serialize(
            self.recurrent_constraint# Google internal code, commented out by copybara
        ),
        'bias_constraint': constraints.serialize(
            self.bias_constraint# Google internal code, commented out by copybara
        ),
        'kernel_quantizer': constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        'recurrent_quantizer': constraints.serialize(
            self.recurrent_quantizer_internal# Google internal code, commented out by copybara
        ),
        'bias_quantizer': constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        'state_quantizer': constraints.serialize(
            self.state_quantizer_internal# Google internal code, commented out by copybara
        ),
        'dropout': self.dropout,
        'recurrent_dropout': self.recurrent_dropout,
        'implementation': self.implementation,
    }
    base_config = super(QLSTM, self).get_config()
    del base_config['cell']
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "kernel_quantizer":
            str(self.kernel_quantizer_internal),
        "recurrent_quantizer":
            str(self.recurrent_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "state_quantizer":
            str(self.state_quantizer_internal),
        "activation":
            str(self.activation),
        "recurrent_activation":
            str(self.recurrent_activation),
    }

  @classmethod
  def from_config(cls, config):
    if 'implementation' in config and config['implementation'] == 0:
      config['implementation'] = 1
    return cls(**config)


class QGRUCell(GRUCell):
  """
  Cell class for the QGRUCell layer.

  Most of these parameters follow the implementation of GRUCell in
  Keras, with the exception of kernel_quantizer, recurrent_quantizer,
  bias_quantizer and state_quantizer.


  kernel_quantizer: quantizer function/class for kernel
  recurrent_quantizer: quantizer function/class for recurrent kernel
  bias_quantizer: quantizer function/class for bias
  state_quantizer: quantizer function/class for states


  We refer the reader to the documentation of GRUCell in Keras for the
  other parameters.

  """
  def __init__(self,
               units,
               activation='quantized_tanh',
               recurrent_activation='hard_sigmoid',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               recurrent_quantizer=None,
               bias_quantizer=None,
               state_quantizer=None,
               dropout=0.,
               recurrent_dropout=0.,
               implementation=1,
               reset_after=False,
               **kwargs):

    self.kernel_quantizer = kernel_quantizer
    self.recurrent_quantizer = recurrent_quantizer
    self.bias_quantizer = bias_quantizer
    self.state_quantizer = state_quantizer

    self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer)
    self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)
    self.state_quantizer_internal = get_quantizer(self.state_quantizer)

    self.quantizers = [
      self.kernel_quantizer_internal,
      self.recurrent_quantizer_internal,
      self.bias_quantizer_internal,
      self.state_quantizer_internal
    ]

    if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"):
      self.kernel_quantizer_internal._set_trainable_parameter()

    if hasattr(self.recurrent_quantizer_internal, "_set_trainable_parameter"):
      self.recurrent_quantizer_internal._set_trainable_parameter()

    kernel_constraint, kernel_initializer = (
        get_auto_range_constraint_initializer(self.kernel_quantizer_internal,
                                              kernel_constraint,
                                              kernel_initializer))

    recurrent_constraint, recurrent_initializer = (
        get_auto_range_constraint_initializer(self.recurrent_quantizer_internal,
                                              recurrent_constraint,
                                              recurrent_initializer))

    if use_bias:
      bias_constraint, bias_initializer = (
          get_auto_range_constraint_initializer(self.bias_quantizer_internal,
                                                bias_constraint,
                                                bias_initializer))

    if activation is not None:
      activation = get_quantizer(activation)

    if recurrent_activation is not None:
      recurrent_activation = get_quantizer(recurrent_activation)

    super().__init__(
        units=units,
        activation=activation,
        recurrent_activation=recurrent_activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        implementation=implementation,
        reset_after=reset_after,
        **kwargs
    )

  def call(self, inputs, states, training=None):
    # previous memory
    h_tm1_tmp = states[0] if nest.is_nested(states) else states

    dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3)
    rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(
        h_tm1_tmp, training, count=3)

    if self.state_quantizer:
      h_tm1 = self.state_quantizer_internal(h_tm1_tmp)
    else:
      h_tm1 = h_tm1_tmp

    if self.kernel_quantizer:
      quantized_kernel = self.kernel_quantizer_internal(self.kernel)
    else:
      quantized_kernel = self.kernel
    if self.recurrent_quantizer:
      quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel)
    else:
      quantized_recurrent = self.kernel

    if self.use_bias:
      if self.bias_quantizer:
        quantized_bias = self.bias_quantizer_internal(self.bias)
      else:
        quantized_bias = self.bias

      if not self.reset_after:
        input_bias, recurrent_bias = quantized_bias, None
      else:
        input_bias, recurrent_bias = array_ops.unstack(quantized_bias)

    if self.implementation == 1:
      if 0. < self.dropout < 1.:
        inputs_z = inputs * dp_mask[0]
        inputs_r = inputs * dp_mask[1]
        inputs_h = inputs * dp_mask[2]
      else:
        inputs_z = inputs
        inputs_r = inputs
        inputs_h = inputs

      x_z = K.dot(inputs_z, quantized_kernel[:, :self.units])
      x_r = K.dot(inputs_r, quantized_kernel[:, self.units:self.units * 2])
      x_h = K.dot(inputs_h, quantized_kernel[:, self.units * 2:])

      if self.use_bias:
        x_z = K.bias_add(x_z, input_bias[:self.units])
        x_r = K.bias_add(x_r, input_bias[self.units: self.units * 2])
        x_h = K.bias_add(x_h, input_bias[self.units * 2:])

      if 0. < self.recurrent_dropout < 1.:
        h_tm1_z = h_tm1 * rec_dp_mask[0]
        h_tm1_r = h_tm1 * rec_dp_mask[1]
        h_tm1_h = h_tm1 * rec_dp_mask[2]
      else:
        h_tm1_z = h_tm1
        h_tm1_r = h_tm1
        h_tm1_h = h_tm1

      recurrent_z = K.dot(h_tm1_z, quantized_recurrent[:, :self.units])
      recurrent_r = K.dot(h_tm1_r,
                          quantized_recurrent[:, self.units:self.units * 2])
      if self.reset_after and self.use_bias:
        recurrent_z = K.bias_add(recurrent_z, recurrent_bias[:self.units])
        recurrent_r = K.bias_add(recurrent_r,
                                 recurrent_bias[self.units:self.units * 2])

      z = self.recurrent_activation(x_z + recurrent_z)
      r = self.recurrent_activation(x_r + recurrent_r)

      # reset gate applied after/before matrix multiplication
      if self.reset_after:
        recurrent_h = K.dot(h_tm1_h, quantized_recurrent[:, self.units * 2:])
        if self.use_bias:
          recurrent_h = K.bias_add(recurrent_h, recurrent_bias[self.units * 2:])
        recurrent_h = r * recurrent_h
      else:
        recurrent_h = K.dot(r * h_tm1_h,
                            quantized_recurrent[:, self.units * 2:])

      hh = self.activation(x_h + recurrent_h)
    else:
      if 0. < self.dropout < 1.:
        inputs = inputs * dp_mask[0]

      # inputs projected by all gate matrices at once
      matrix_x = K.dot(inputs, quantized_kernel)
      if self.use_bias:
        # biases: bias_z_i, bias_r_i, bias_h_i
        matrix_x = K.bias_add(matrix_x, input_bias)

      x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=-1)

      if self.reset_after:
        # hidden state projected by all gate matrices at once
        matrix_inner = K.dot(h_tm1, quantized_recurrent)
        if self.use_bias:
          matrix_inner = K.bias_add(matrix_inner, recurrent_bias)
      else:
        # hidden state projected separately for update/reset and new
        matrix_inner = K.dot(h_tm1, quantized_recurrent[:, :2 * self.units])

      recurrent_z, recurrent_r, recurrent_h = array_ops.split(
          matrix_inner, [self.units, self.units, -1], axis=-1)

      z = self.recurrent_activation(x_z + recurrent_z)
      r = self.recurrent_activation(x_r + recurrent_r)

      if self.reset_after:
        recurrent_h = r * recurrent_h
      else:
        recurrent_h = K.dot(r * h_tm1,
                            quantized_recurrent[:, 2 * self.units:])

      hh = self.activation(x_h + recurrent_h)
    # previous and candidate state mixed by update gate
    h = z * h_tm1 + (1 - z) * hh
    return h, [h]

  def get_config(self):
    config = {
        'kernel_quantizer': constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        'recurrent_quantizer': constraints.serialize(
            self.recurrent_quantizer_internal# Google internal code, commented out by copybara
        ),
        'bias_quantizer': constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        'state_quantizer': constraints.serialize(
            self.state_quantizer_internal# Google internal code, commented out by copybara
        ),
    }
    base_config = super(QGRUCell, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))


class QGRU(RNN, PrunableLayer):
  """
  Class for the QGRU layer.

  Most of these parameters follow the implementation of GRU in
  Keras, with the exception of kernel_quantizer, recurrent_quantizer,
  bias_quantizer and state_quantizer.


  kernel_quantizer: quantizer function/class for kernel
  recurrent_quantizer: quantizer function/class for recurrent kernel
  bias_quantizer: quantizer function/class for bias
  state_quantizer: quantizer function/class for states


  We refer the reader to the documentation of GRU in Keras for the
  other parameters.

  """

  def __init__(self,
               units,
               activation='quantized_tanh',
               recurrent_activation='hard_sigmoid',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               kernel_quantizer=None,
               recurrent_quantizer=None,
               bias_quantizer=None,
               state_quantizer=None,
               dropout=0.,
               recurrent_dropout=0.,
               implementation=1,
               return_sequences=False,
               return_state=False,
               go_backwards=False,
               stateful=False,
               unroll=False,
               reset_after=False,
               **kwargs):
    if implementation == 0:
      print('`implementation=0` has been deprecated, '
              'and now defaults to `implementation=1`.'
              'Please update your layer call.')

    if 'enable_caching_device' in kwargs:
      cell_kwargs = {'enable_caching_device':
                     kwargs.pop('enable_caching_device')}
    else:
      cell_kwargs = {}

    cell = QGRUCell(
        units,
        activation=activation,
        recurrent_activation=recurrent_activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        kernel_quantizer=kernel_quantizer,
        recurrent_quantizer=recurrent_quantizer,
        bias_quantizer=bias_quantizer,
        state_quantizer=state_quantizer,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        implementation=implementation,
        reset_after=reset_after,
        dtype=kwargs.get('dtype'),
        trainable=kwargs.get('trainable', True),
        **cell_kwargs)

    super().__init__(
        cell,
        return_sequences=return_sequences,
        return_state=return_state,
        go_backwards=go_backwards,
        stateful=stateful,
        unroll=unroll,
        **kwargs
    )
    self.activity_regularizer = regularizers.get(activity_regularizer)
    self.input_spec = [tf.keras.layers.InputSpec(ndim=3)]

  def call(self, inputs, mask=None, training=None, initial_state=None):
    self._maybe_reset_cell_dropout_mask(self.cell)
    return super(QGRU, self).call(
        inputs, mask=mask, training=training, initial_state=initial_state)

  def get_quantizers(self):
    return self.cell.quantizers

  def get_prunable_weights(self):
    return [self.cell.kernel, self.cell.recurrent_kernel]

  @property
  def units(self):
    return self.cell.units

  @property
  def activation(self):
    return self.cell.activation

  @property
  def recurrent_activation(self):
    return self.cell.recurrent_activation

  @property
  def use_bias(self):
    return self.cell.use_bias

  @property
  def kernel_initializer(self):
    return self.cell.kernel_initializer

  @property
  def recurrent_initializer(self):
    return self.cell.recurrent_initializer

  @property
  def bias_initializer(self):
    return self.cell.bias_initializer

  @property
  def kernel_regularizer(self):
    return self.cell.kernel_regularizer

  @property
  def recurrent_regularizer(self):
    return self.cell.recurrent_regularizer

  @property
  def bias_regularizer(self):
    return self.cell.bias_regularizer

  @property
  def kernel_constraint(self):
    return self.cell.kernel_constraint

  @property
  def recurrent_constraint(self):
    return self.cell.recurrent_constraint

  @property
  def bias_constraint(self):
    return self.cell.bias_constraint

  @property
  def kernel_quantizer_internal(self):
    return self.cell.kernel_quantizer_internal

  @property
  def recurrent_quantizer_internal(self):
    return self.cell.recurrent_quantizer_internal

  @property
  def bias_quantizer_internal(self):
    return self.cell.bias_quantizer_internal

  @property
  def state_quantizer_internal(self):
    return self.cell.state_quantizer_internal

  @property
  def kernel_quantizer(self):
    return self.cell.kernel_quantizer

  @property
  def recurrent_quantizer(self):
    return self.cell.recurrent_quantizer

  @property
  def bias_quantizer(self):
    return self.cell.bias_quantizer

  @property
  def state_quantizer(self):
    return self.cell.state_quantizer

  @property
  def dropout(self):
    return self.cell.dropout

  @property
  def recurrent_dropout(self):
    return self.cell.recurrent_dropout

  @property
  def implementation(self):
    return self.cell.implementation

  @property
  def reset_after(self):
    return self.cell.reset_after

  def get_config(self):
    config = {
        'units': self.units,
        'activation': activations.serialize(
            self.activation# Google internal code, commented out by copybara
        ),
        'recurrent_activation': activations.serialize(
            self.recurrent_activation# Google internal code, commented out by copybara
        ),
        'use_bias': self.use_bias,
        'kernel_initializer': initializers.serialize(
            self.kernel_initializer# Google internal code, commented out by copybara
        ),
        'recurrent_initializer': initializers.serialize(
            self.recurrent_initializer# Google internal code, commented out by copybara
        ),
        'bias_initializer': initializers.serialize(
            self.bias_initializer# Google internal code, commented out by copybara
        ),
        'kernel_regularizer': regularizers.serialize(
            self.kernel_regularizer# Google internal code, commented out by copybara
        ),
        'recurrent_regularizer': regularizers.serialize(
            self.recurrent_regularizer# Google internal code, commented out by copybara
        ),
        'bias_regularizer': regularizers.serialize(
            self.bias_regularizer# Google internal code, commented out by copybara
        ),
        'activity_regularizer': regularizers.serialize(
            self.activity_regularizer# Google internal code, commented out by copybara
        ),
        'kernel_constraint': constraints.serialize(
            self.kernel_constraint# Google internal code, commented out by copybara
        ),
        'recurrent_constraint': constraints.serialize(
            self.recurrent_constraint# Google internal code, commented out by copybara
        ),
        'bias_constraint': constraints.serialize(
            self.bias_constraint# Google internal code, commented out by copybara
        ),
        'kernel_quantizer': constraints.serialize(
            self.kernel_quantizer_internal# Google internal code, commented out by copybara
        ),
        'recurrent_quantizer': constraints.serialize(
            self.recurrent_quantizer_internal# Google internal code, commented out by copybara
        ),
        'bias_quantizer': constraints.serialize(
            self.bias_quantizer_internal# Google internal code, commented out by copybara
        ),
        'state_quantizer': constraints.serialize(
            self.state_quantizer_internal# Google internal code, commented out by copybara
        ),
        'dropout': self.dropout,
        'recurrent_dropout': self.recurrent_dropout,
        'implementation': self.implementation,
        'reset_after': self.reset_after,
    }
    base_config = super(QGRU, self).get_config()
    del base_config['cell']
    return dict(list(base_config.items()) + list(config.items()))

  def get_quantization_config(self):
    return {
        "kernel_quantizer":
            str(self.kernel_quantizer_internal),
        "recurrent_quantizer":
            str(self.recurrent_quantizer_internal),
        "bias_quantizer":
            str(self.bias_quantizer_internal),
        "state_quantizer":
            str(self.state_quantizer_internal),
        "activation":
            str(self.activation),
        "recurrent_activation":
            str(self.recurrent_activation),
    }

  @classmethod
  def from_config(cls, config):
    if 'implementation' in config and config['implementation'] == 0:
      config['implementation'] = 1
    return cls(**config)


class QBidirectional(Bidirectional):
  """
  Class for the QBidirecitonal wrapper.

  Most of these parameters follow the implementation of Bidirectional in
  Keras.

  We refer the reader to the documentation of Bidirectional in Keras for the
  other parameters.

  """
  def get_quantizers(self):
    """
    Returns quantizers in the order they were created.
    """
    return self.forward_layer.get_quantizers() + self.backward_layer.get_quantizers()

  @property
  def activation(self):
    return self.layer.activation

  def get_quantization_config(self):
    return {
      "layer" : self.layer.get_quantization_config(),
      "backward_layer" : self.backward_layer.get_quantization_config()
    }


================================================
FILE: qkeras/qseparable_conv2d_transpose.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================


import tensorflow as tf
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import InputSpec

from .qconvolutional import deconv_output_length
from .quantizers import get_quantizer
from tensorflow.python.eager import context
from tensorflow.python.keras import constraints
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import array_ops


class QSeparableConv2DTranspose(Conv2DTranspose):
  """Quantized Separable Conv2DTranspose layer."""

  # Most of these parameters follow the implementation of Conv2DTranspose
  # in Keras, with the exception of following parameters.
  #
  # depthwise_activation: activation quantizer for depthwise convolution
  # pointwise_activation: activation quantizer for pointwise convolution
  # depthwise_kernel_quantizer: quantizer function/class for depthwise kernel
  # pointwise_kernel_quantizers: quantizer function/class for pointwise kernel
  # bias_quantizer: quantizer function/class for bias
  #
  # we refer the reader to the documentation of Conv2DTranspose in Keras for
  # the other parameters.

  def __init__(self,
               filters,
               kernel_size,
               strides=(1, 1),
               padding="valid",
               output_padding=None,
               depth_multiplier=1,
               depthwise_activation=None,
               pointwise_activation=None,
               use_bias=True,
               depthwise_kernel_quantizer=None,
               pointwise_kernel_quantizer=None,
               bias_quantizer=None,
               **kwargs):

    self.filters = filters
    self.kernel_size = kernel_size
    self.strides = strides
    self.padding = padding
    self.output_padding = output_padding
    self.depth_multiplier = depth_multiplier
    self.depthwise_activation = depthwise_activation
    self.pointwise_activation = pointwise_activation
    self.use_bias = use_bias

    self.depthwise_kernel_quantizer = depthwise_kernel_quantizer
    self.pointwise_kernel_quantizer = pointwise_kernel_quantizer
    self.bias_quantizer = bias_quantizer

    self.depthwise_kernel_quantizer_internal = get_quantizer(
        self.depthwise_kernel_quantizer)
    self.pointwise_kernel_quantizer_internal = get_quantizer(
        self.pointwise_kernel_quantizer)
    self.bias_quantizer_internal = get_quantizer(self.bias_quantizer)

    # optimize parameter set to "auto" scaling mode if possible
    for q in [self.depthwise_kernel_quantizer_internal,
              self.pointwise_kernel_quantizer_internal]:
      if hasattr(q, "_set_trainable_parameter"):
        q._set_trainable_parameter()

    if depthwise_activation is not None:
      self.depthwise_activation = get_quantizer(depthwise_activation)

    if pointwise_activation is not None:
      self.pointwise_activation = get_quantizer(pointwise_activation)

    super().__init__(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        use_bias=use_bias,
        **kwargs)

  def _get_input_axis(self):
    if self.data_format == "channels_first":
      b_axis, c_axis, h_axis, w_axis = 0, 1, 2, 3
    else:
      b_axis, c_axis, h_axis, w_axis = 0, 3, 1, 2

    return b_axis, c_axis, h_axis, w_axis

  def _get_input_dims(self, input_shape):
    b_axis, c_axis, h_axis, w_axis = self._get_input_axis()

    return (
        input_shape[b_axis], input_shape[c_axis],
        input_shape[h_axis], input_shape[w_axis])

  def _get_output_size(self, inputs, output_padding, padding, strides,
                       dilation_rate, kernel_weights):
    input_shape = array_ops.shape(inputs)
    batch_size, _, height, width = self._get_input_dims(input_shape)
    kernel_h, kernel_w = kernel_weights.shape[:2]
    stride_h, stride_w = strides

    dilation_h, dilation_w = dilation_rate[0], dilation_rate[1]

    if output_padding is None:
      out_pad_h = out_pad_w = None
    else:
      out_pad_h, out_pad_w = output_padding

    # Infer the dynamic output shape:
    out_height = deconv_output_length(
        height,
        kernel_h,
        padding=padding,
        output_padding=out_pad_h,
        stride=stride_h,
        dilation=dilation_h,
    )

    out_width = deconv_output_length(
        width,
        kernel_w,
        padding=padding,
        output_padding=out_pad_w,
        stride=stride_w,
        dilation=dilation_w,
    )

    return (batch_size, out_height, out_width, kernel_h, kernel_w)

  def build(self, input_shape):
    self._input_shape = input_shape

    _, input_channel, _, _ = self._get_input_dims(input_shape)
    channel_axis = self._get_input_axis()[1]

    self.input_spec = InputSpec(
        min_ndim=self.rank + 2, axes={channel_axis: input_channel}
    )
    # By enforcing the kernel shape, we can control how convolution is
    # done in depthwise or pointwise.
    # When setting kernel shape=(kw, kh, 1, input_channel), it does depthwise
    # convolution.
    depthwise_kernel_shape = self.kernel_size + (1, input_channel)

    self.depthwise_kernel = self.add_weight(
        name="depthwise_kernel",
        shape=depthwise_kernel_shape,
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        constraint=self.kernel_constraint,
        trainable=True,
        dtype=self.dtype,
    )

    # When setting kernel shape=(1, 1, output_channel, input_channel), it does
    # pointwise convolution.
    pointwise_kernel_shape = (1, 1, self.filters, input_channel)
    self.pointwise_kernel = self.add_weight(
        name="pointwise_kernel",
        shape=pointwise_kernel_shape,
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        constraint=self.kernel_constraint,
        trainable=True,
        dtype=self.dtype,
    )

    if self.use_bias:
      # This bias term is usally add at the end of the pointwise convolution.
      self.bias = self.add_weight(
          name="bias",
          shape=(self.filters,),
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          constraint=self.bias_constraint,
          trainable=True,
          dtype=self.dtype,
      )
    else:
      self.bias = None

    self.built = True

  def compute_final_output_shape(
      self, input_shape, kernel_size, strides, is_depthwise=True):
    input_shape = tf.TensorShape(input_shape).as_list()
    # By using list(), output_shape is a copy of input_shape, instead of a
    # reference to input_shape.
    output_shape = list(input_shape)
    _, c_axis, h_axis, w_axis = self._get_input_axis()

    kernel_h, kernel_w = kernel_size
    stride_h, stride_w = strides

    if self.output_padding is None:
      out_pad_h = out_pad_w = None
    else:
      out_pad_h, out_pad_w = self.output_padding

    if is_depthwise:
      # Convolution is performed separately on each spatial domain.
      output_shape[c_axis] = input_shape[c_axis]
    else:
      # Pointwise convolution maps input channels to output filters.
      output_shape[c_axis] = self.filters

    output_shape[h_axis] = deconv_output_length(
        output_shape[h_axis],
        kernel_h,
        padding=self.padding,
        output_padding=out_pad_h,
        stride=stride_h,
        dilation=self.dilation_rate[0],
    )
    output_shape[w_axis] = deconv_output_length(
        output_shape[w_axis],
        kernel_w,
        padding=self.padding,
        output_padding=out_pad_w,
        stride=stride_w,
        dilation=self.dilation_rate[1],
    )
    return tf.TensorShape(output_shape)

  def conv_transpose_op(self, inputs, filters, strides, padding,
                        output_padding, dilation_rate,
                        kernel_quantizer, kernel_weights, use_bias,
                        bias_quantizer, bias, activation, is_depthwise):
    """Transpose convolution op that shared by both depthwise and pointwise."""

    batch_size, out_height, out_width, kernel_h, kernel_w = (
        self._get_output_size(inputs, output_padding, padding, strides,
                              dilation_rate, kernel_weights))

    if kernel_quantizer:
      quantized_kernel = kernel_quantizer(kernel_weights)
    else:
      quantized_kernel = kernel_weights

    output_filters = 1 if is_depthwise else filters

    if self.data_format == "channels_first":
      output_shape = (batch_size, output_filters, out_height, out_width)
    else:
      output_shape = (batch_size, out_height, out_width, output_filters)

    output_shape_tensor = array_ops.stack(output_shape)

    # Split the input channels into groups.
    x = tf.split(inputs, self._input_shape[-1], axis=-1)

    if is_depthwise:
      # For depthwise convolution, since CPU doesn't support grouped
      # convolution, we run convolution on each slice of inputs and concat
      # the results.
      outputs = [
          tf.keras.backend.conv2d_transpose(
              x=x[i],
              kernel=quantized_kernel[:, :, :, i : i + 1],
              output_shape=output_shape_tensor,
              strides=strides,
              padding=padding,
              data_format=self.data_format,
              dilation_rate=dilation_rate,
          )
          for i in range(len(x))
      ]

      # Concat the channels.
      outputs = tf.concat(outputs, axis=-1)

    else:
      outputs = tf.keras.backend.conv2d_transpose(
          inputs,
          quantized_kernel,
          output_shape_tensor,
          strides=strides,
          padding=padding,
          data_format=self.data_format,
          dilation_rate=dilation_rate,
      )

    if not context.executing_eagerly():
      # Infer the static output shape:
      out_shape = self.compute_final_output_shape(
          input_shape=inputs.shape,
          kernel_size=(kernel_h, kernel_w),
          strides=strides,
          is_depthwise=is_depthwise)
      outputs.set_shape(out_shape)

    if use_bias:
      quantized_bias = bias_quantizer(bias) if bias_quantizer else bias
      outputs = tf.keras.backend.bias_add(
          outputs,
          quantized_bias,
          data_format=self.data_format)

    if activation is not None:
      return activation(outputs)

    return outputs

  def call(self, inputs):
    input_shape = array_ops.shape(inputs)
    _, input_channel, _, _ = self._get_input_dims(input_shape)

    # First apply depthwise transposed convolution.
    x = self.conv_transpose_op(
        inputs=inputs,
        # Depthwise convolution doesn't operate across channels. Thereofore its
        # output channels is the same as input channels.
        filters=input_channel,
        strides=self.strides,
        padding=self.padding,
        output_padding=self.output_padding,
        dilation_rate=self.dilation_rate,
        kernel_quantizer=self.depthwise_kernel_quantizer_internal,
        kernel_weights=self.depthwise_kernel,
        use_bias=False,  # Usually set bias=False for depthwise conv.
        bias_quantizer=None,
        bias=None,
        activation=self.depthwise_activation,
        is_depthwise=True)

    # Then apply pointwise transposed convolution
    x = self.conv_transpose_op(
        inputs=x,
        # Pointwise convolution maps input channels to output filters.
        filters=self.filters,
        strides=(1, 1),   # strides is set to (1, 1) for pointwise conv.
        # Though it will not applied in pointwise conv, we need to set
        # padding here to pass value checking in keras utility functions.
        padding=self.padding,
        output_padding=None,  # Prevent output_padding from adding twice.
        dilation_rate=self.dilation_rate,
        kernel_quantizer=self.pointwise_kernel_quantizer_internal,
        kernel_weights=self.pointwise_kernel,
        use_bias=self.use_bias,
        bias_quantizer=self.bias_quantizer_internal,
        bias=self.bias,
        activation=self.pointwise_activation,
        is_depthwise=False)

    return x

  def get_config(self):
    config = super().get_config()
    config.update({
        "filters": self.filters,
        "kernel_size": self.kernel_size,
        "strides": self.strides,
        "padding": self.padding,
        "output_padding": self.output_padding,
        "dilation_rate": self.dilation_rate,
        "data_format": self.data_format,
        "depth_multiplier": self.depth_multiplier,
        "activation": self.activation,
        "use_bias": self.use_bias,
        "depthwise_kernel_quantizer": constraints.serialize(
            self.depthwise_kernel_quantizer_internal),
        "pointwise_kernel_quantizer": constraints.serialize(
            self.pointwise_kernel_quantizer_internal),
        "bias_quantizer": constraints.serialize(
            self.bias_quantizer_internal,
            ),
    })
    return config

  def get_quantizers(self):
    return [
        self.depthwise_kernel_quantizer_internal,
        self.pointwise_kernel_quantizer_internal,
        self.bias_quantizer_internal,
        self.depthwise_activation,
        self.pointwise_activation,
    ]

  def get_prunable_weights(self):
    w = [self.depthwise_kernel, self.pointwise_kernel]
    if self.use_bias:
      w.append(self.bias)

    return w


================================================
FILE: qkeras/qtools/DnC/divide_and_conquer.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""divide_and_conquer hardware cost profiling.

Given a target throughput and a ML model, this implementation determines
the key HW design parameters (bitwidth, unroll factors) for ML area
optimization in a pipelined architecture.

It generates recommended design parameters to assist downstream HW synthesis
design. With this, it provides accurate HW cost modeling for ML training
and ML complexity evaluation such as AV2/ROOF_ML.
"""

import enum
import logging
from typing import Any, List, Union

import numpy as np
import tensorflow as tf

from qkeras import base_quantizer
from qkeras import quantizers
from qkeras.qtools import generate_layer_data_type_map
from qkeras.qtools import qgraph
from qkeras.qtools import qtools_util
from qkeras.qtools.DnC import dnc_layer_cost_ace


class CostMode(enum.Enum):
  ACE = 1  # cost is computed from theoretical equations.
  PE_AREA = 2  # cost is computed from compute area only.
  PE_BW_AREA = 3  # cost is computed from both compute and memory bandwidth.


# pylint: disable=invalid-name
class DivideConquerGraph:
  """This class creates model graph structure and methods to access layers."""

  def __init__(
      self,
      model: tf.keras.Model,
      source_quantizers: base_quantizer.BaseQuantizer = None,
  ):
    self._model = model
    self._source_quantizer_list = source_quantizers or [
        quantizers.quantized_bits(8, 0, 1)]

    (self._graph, self._source_quantizer_list) = qgraph.CreateGraph(
        model, source_quantizers, "quantized_bits(8, 0, 1)")

    # Propagate output quantizer info into the graph edges.
    qgraph.GraphPropagateActivationsToEdges(self._graph)

    self._layer_map = generate_layer_data_type_map.generate_layer_data_type_map(
        self._graph, self._source_quantizer_list, is_inference=False,
        keras_accumulator=None, for_reference=False)["layer_data_type_map"]

    # Create layer-to-index mapping dict.
    self._layer_to_idx_dict = {}
    for idx in self._graph._node.keys():
      self._layer_to_idx_dict[self.idx_to_layer(idx)] = idx

  def idx_to_layer(self, idx: int):
    # Map layer index to the layer object.
    return self._graph._node[idx]["layer"][0]

  def layer_to_idx(self, layer: tf.keras.layers.Layer):
    # Map a layer object to index.
    return self._layer_to_idx_dict.get(layer, None)

  def get_first_node(self):
    # Get the source node of the graph.
    return qgraph.SOURCE

  def is_first_node(self, node: Union[int, tf.keras.layers.Layer]):
    # Find whether a given node is the first node of the graph.
    # Node could be either index value or layer object.
    idx = node if isinstance(node, int) else self.layer_to_idx(node)
    return idx == qgraph.SOURCE

  def get_last_node(self):
    # Find the last node of the graph.
    return qgraph.SINK

  def is_last_node(self, node: Union[int, tf.keras.layers.Layer]):
    # Find whether a given node is the last node of the graph.
    # Node could be either index value or layer object.
    idx = node if isinstance(node, int) else self.layer_to_idx(node)
    return idx == qgraph.SINK

  def get_prev_nodes(self, node: Union[int, tf.keras.layers.Layer]):
    # Find the predecessor nodes in the graph of the given node.
    # Node could be either index value or layer object.
    idx = node if isinstance(node, int) else self.layer_to_idx(node)
    return list(self._graph.predecessors(idx))

  def get_next_nodes(self, node: Union[int, tf.keras.layers.Layer]):
    # Find the successor nodes in the graph of the given node.
    # node could be either index value or layer object.
    idx = node if isinstance(node, int) else self.layer_to_idx(node)
    return list(self._graph.successors(idx))

  def get_layer_quantizer_bitwidth(
      self, node: Union[int, tf.keras.layers.Layer]):
    """Find various quantizer bitwidth of the current layer."""
    layer = self.idx_to_layer(node) if isinstance(node, int) else node

    if layer:
      layer_item = self._layer_map[layer]
      weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer")
      mac_quantizer = qtools_util.get_val(layer_item, "multiplier")
      acc_quantizer = qtools_util.get_val(layer_item, "accumulator")
      input_quantizer_list = qtools_util.get_val(
          layer_item, "input_quantizer_list")
      output_quantizer = qtools_util.get_val(layer_item, "output_quantizer")

      return  {
          # TODO(lishanok@): Handle multiple input quantizers
          # in non-sequential models.
          "input_bits": input_quantizer_list[0].bits,
          # When the current layer has no concept of weight, there won't
          # be any weight quantizer.
          "weight_bits": weight_quantizer.bits if weight_quantizer else 0,
          # If mac bits don't exist, that means we don't have x * w type of
          # operations. In this case, pass input_bits through.
          "mac_bits": (
              mac_quantizer.output.bits if mac_quantizer else
              input_quantizer_list[0].bits),
          "acc_bits": (
              acc_quantizer.output.bits if acc_quantizer else
              input_quantizer_list[0].bits),
          "output_bits": output_quantizer.bits}
    else:
      # For the "dummy" head and tail nodes in the graph that we inserted at
      # the begining and ending of the model graph, we run this branch.
      return {
          "input_bits": 0,
          "weight_bits": 0,
          "mac_bits": 0,
          "acc_bits": 0,
          "output_bits": 0
      }

  def get_layer_mac_count(self, node: Union[int, tf.keras.layers.Layer]):
    """Find the number of multiplier ops in the current layer."""
    layer = self.idx_to_layer(node) if isinstance(node, int) else node

    return (
        qtools_util.get_val(self._layer_map[layer], "operation_count", 0)
        if layer else 0)

  def get_layer_shapes(self, node: Union[int, tf.keras.layers.Layer]):
    layer = self.idx_to_layer(node) if isinstance(node, int) else node

    # Multiple inputs with merge layers.
    input_shape_list = layer.input_shape if layer else 0
    if not isinstance(input_shape_list, list):
      input_shape_list = [input_shape_list]

    return {
        "weight_shape": (
            qtools_util.get_val(self._layer_map[layer], "w_shapes", 0)
            if layer else 0),
        "output_shape": (
            qtools_util.get_val(self._layer_map[layer], "output_shapes", 0)
            if layer else 0),
        "input_shape_list": (input_shape_list)}


class Choice:
  """This class stores a combination of HW design param values."""

  def __init__(self, l: float = 0, k: float = 0, cin_unroll: int = 0,
               cout_unroll: int = 0, kh_unroll: int = 0, kw_unroll: int = 0):
    """Intializer for a combination of hardware design parameters.

    Args:
      l: Ratio between OutElementPerClk and ComputeOutElementPerClk
      k: Ratio between InElementPerClk and ComputeInElementPerClk
      cin_unroll: Unroll factors for input channel
      cout_unroll: Unroll factors for output channel
      kh_unroll: Unroll factors for kernel height
      kw_unroll: Unroll factors for kernel width
    """

    self.k = k
    self.l = l
    self.cin_unroll = cin_unroll
    self.cout_unroll = cout_unroll
    self.kh_unroll = kh_unroll
    self.kw_unroll = kw_unroll

  def __str__(self):
    return (f"Choice(k={self.k}, l={self.l}, cin_unroll={self.cin_unroll}, "
            f"cout_unroll={self.cout_unroll} kh_unroll={self.kh_unroll}, "
            f"kw_unroll={self.kw_unroll})")


def get_valid_unrolls(layer: tf.keras.layers.Layer, cout_unroll: int,
                      target_pe_throughput: float):
  """Get valid unroll values where resulting throughput>=Target throughput."""

  input_channel = qtools_util.get_layer_info(layer, "input_channel")
  output_channel = qtools_util.get_layer_info(layer, "output_channel")
  kernel_height = qtools_util.get_layer_info(layer, "kernel_height")
  kernel_width = qtools_util.get_layer_info(layer, "kernel_width")
  layer_type = qtools_util.get_layer_info(layer, "layer_type")

  if layer_type in ["QDepthwiseConv2D", "QAveragePooling2D", "MaxPooling2D",
                    "QGlobalAveragePooling2D", "GlobalMaxPooling2D"]:
    # Since ops are done in each channel without cross-channel ops,
    # cin_unroll == cout_unroll in hardware.
    cin_unroll_list = [cout_unroll]
  else:
    # Cin_unroll needs to be a divisor of layer.input_channel
    cin_unroll_list = qtools_util.find_divisors(input_channel)

  # kw_unroll needs to be a divisor of layer.kernel_width
  kw_unroll_list = qtools_util.find_divisors(kernel_width)
  # kh_unroll needs to be a divisor of layer.kernel_height
  kh_unroll_list = qtools_util.find_divisors(kernel_height)

  valid_unrolls = []
  for cin_unroll in cin_unroll_list:
    for kw_unroll in kw_unroll_list:
      for kh_unroll in kh_unroll_list:
        logging.debug("............cin_unroll: %d kh_unroll: %d kw_unroll: %d",
                      cin_unroll, kh_unroll, kw_unroll)
        # Caculate computation throughput.
        pe_throughput = get_pe_throughput(
            layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll,
            input_channel, output_channel, kernel_height, kernel_width)
        logging.debug("............pe_throughput: %.2f", pe_throughput)
        if pe_throughput >= target_pe_throughput:
          # Save the valid combination of unroll factors to valid_unrolls.
          valid_unrolls.append((cin_unroll, kh_unroll, kw_unroll))

  return valid_unrolls


def get_per_layer_cost(layer_quantizer_bitwidth, layer_mac_count, layer_shapes,
                       cin_unroll, cout_unroll, kh_unroll, kw_unroll,
                       InElementPerClk, OutElementPerClk, mode):
  """Area per layer, including both PE and memory Bandwidth."""

  # TODO(lishanok@): needs to add modes that support data-driven cost modeling.
  assert mode == CostMode.ACE, "Only CostMode.ACE is supported for now."

  # Compute memory is calculated according to ACE metric, translated to gates.
  mac_gates = dnc_layer_cost_ace.get_ace_mac_gates(
      xbit=layer_quantizer_bitwidth["input_bits"],
      wbit=layer_quantizer_bitwidth["weight_bits"],
      abit=layer_quantizer_bitwidth["acc_bits"],
      regen_params=False)

  # pe_area is not dependent on total num of MACs in the layer.
  pe_area = (mac_gates * cin_unroll * cout_unroll * kh_unroll * kw_unroll)

  # Memory includes input, output and weight memory, translated to gates.
  # TODO(lishanok@): weights could be stored in either SRAM or ROM, dependent
  # on user specification.
  memory_area = (
      InElementPerClk * layer_quantizer_bitwidth["input_bits"] *
      dnc_layer_cost_ace.MemoryGatesPerBit["Register"] +
      OutElementPerClk * layer_quantizer_bitwidth["output_bits"] *
      dnc_layer_cost_ace.MemoryGatesPerBit["Register"] +
      np.prod(layer_shapes["weight_shape"]) *
      layer_quantizer_bitwidth["weight_bits"] *
      dnc_layer_cost_ace.MemoryGatesPerBit["ROM"])

  return (pe_area + memory_area)


def get_valid_candidates(input_value, output_to_input_ratio_max):
  candidate_list = qtools_util.find_divisors(input_value)
  # Add the other scenario where ComputeElementPerClk is multiple
  # of ElementPerClk.
  if output_to_input_ratio_max >= 2:
    candidate_list += [input_value * x for x in list(
        range(2, output_to_input_ratio_max+1))]

  return candidate_list


def get_InBufferThru(InElementPerClk, input_channel):
  return InElementPerClk / input_channel


def get_OutBufferThru(OutElementPerClk, output_channel, kernel_height,
                      kernel_width, layer_type):
  if layer_type in ["UpSampling2D"]:
    return OutElementPerClk / (
        output_channel * kernel_height * kernel_width)
  else:
    return OutElementPerClk / output_channel


def is_bufferThru_greater_than_targetThru(
    layer_type: str, InElementPerClk: int, OutElementPerClk: int,
    input_channel: int, output_channel: int, kernel_height: int,
    kernel_width: int, target_out_throughput: float,
    target_in_throughput: float):
  """Verify whether the resulting buffer throughput > target throughput."""

  # Calculate throughput of input buffer.
  InBuf_throughput = get_InBufferThru(InElementPerClk, input_channel)
  # Calculate throughput of output buffer.
  OutBuf_throughput = get_OutBufferThru(
      layer_type=layer_type,
      OutElementPerClk=OutElementPerClk, output_channel=output_channel,
      kernel_height=kernel_height, kernel_width=kernel_width)

  logging.debug(
      "...............InBuf_throughput: %.2f OutBuf_throughput: %.2f",
      InBuf_throughput, OutBuf_throughput)

  # Valid unroll values must meet buffer throughput requirements.
  return (InBuf_throughput >= target_out_throughput and
          OutBuf_throughput >= target_in_throughput)


def set_best_global_cost_in_paths(
    OutElementPerClk_list, paths, layer_idx, cur_layer_idx,
    layer_quantizer_bitwidth, layer_mac_count, layer_shapes, mode):
  """Find the best global cost of the entire model and update the paths dict.

  Args:
    OutElementPerClk_list: list of OutElementPerClk for the current layer.
    paths: Dict that contains the choices that each layer has.
    layer_idx: Int. The index value of the current layer's predecessor.
    cur_layer_idx: current layer's index value.
    layer_quantizer_bitwidth: Dict that contains layer-related quantizer
      bitwidth, including acc_bits, mac_bits, input_bits and output_bits.
    layer_mac_count: Int. Use the number of multiplication as the operation
      count. To include the number of accumulations, we should multiply the
      value by 2, assuming accumulation count ~= multiplication count.
    layer_shapes: Dict with keys: weight_shape, input_shape_list and
      output_shape.
    mode: CostMode. The mode to calculate per layer cost.

  Returns:
    None.
  """

  def calculate_cost(OutElementPerClk):
    cur_layer_cost = get_per_layer_cost(
        layer_quantizer_bitwidth, layer_mac_count, layer_shapes, 0, 0, 0, 0, 0,
        OutElementPerClk, mode)
    accumulative_cost = cur_layer_cost + paths[layer_idx][
        OutElementPerClk]["acc_cost"]
    return (cur_layer_cost, accumulative_cost, OutElementPerClk)

  cost_and_values = list(map(calculate_cost, OutElementPerClk_list))

  layer_cost, min_accumulative_cost, best_OutElementPerClk = (
      min(cost_and_values, key=lambda x: x[1]))

  # For the initial node, we find the best path which contains a sentinel
  # choice, cost with that path, and the chosen OutElementPerClk
  # that will point to the corresponding choice of the following layer.
  paths[cur_layer_idx] = {
      best_OutElementPerClk: {
          "choice": Choice().__str__(),
          "cur_cost": layer_cost,
          "acc_cost": min_accumulative_cost,
          "OutElementPerClk": best_OutElementPerClk
      }}


def backtrack(graph, paths):
  """Backtracking of the best path from the first layer to the last."""
  best_path = {}
  # Get the second node from the graph as the first node is a sentinel node.
  layer_idx = graph.get_first_node()

  logging.debug("=======================")
  logging.debug("Trimmed Paths:")
  logging.debug("paths: %s", paths)
  logging.debug("=======================")

  # Find the best choice of the first layer.
  # TODO(lishanok@): extend code to non-sequential model where there are
  # multiple input layers
  best_OutElementPerClk = list(paths[layer_idx].keys())[0]
  best_entry = paths[layer_idx][best_OutElementPerClk]
  # Add layer name to improve readability.
  layer = graph.idx_to_layer(layer_idx)
  best_entry["layer_name"] = layer.name if layer else "None"
  best_path[layer_idx] = best_entry
  best_OutElementPerClk = best_entry["OutElementPerClk"]
  best_accumlative_cost = best_entry["acc_cost"]

  layer_idx = graph.get_next_nodes(layer_idx)[0]
  # Given the best choice of 1st layer, find the best choice for all following
  # layers by backtracking.
  while not graph.is_last_node(layer_idx):
    # Find current layer's best choice from the ptr (ie. best_OutElementPerClk)
    # stored in the best choice of the previous layer.
    best_entry = paths[layer_idx][best_OutElementPerClk]
    layer = graph.idx_to_layer(layer_idx)
    best_entry["layer_name"] = layer.name if layer else "None"
    best_path[layer_idx] = best_entry
    # Update the ptr to the next layer.
    best_OutElementPerClk = best_entry["OutElementPerClk"]

    # get the next node from the graph
    # TODO(lishanok@): extend the code to non-sequential model where there are
    # multiple next layers.
    layer_idx = graph.get_next_nodes(layer_idx)[0]

  # best_path stores the best hw param combination and cost for each layer.
  return best_path, best_accumlative_cost


def update_cur_best_choices(
    cur_best_choices: List[Any], OutElementPerClk: int,
    prev_OutElementPerClk: int, cur_layer_cost: float,
    accumulative_cost: float, choice: Choice):
  """Update the cur_best_choices dict.

  At each layer, different choices of unroll factors will generate a
  prev_OutElementPerClk value. Some of the choices might generate the same
  prev_OutElementPerClk. So for each pre_OutElementPerClk, we only store
  the best choice which has the min cost.
  """

  entry = cur_best_choices.get(prev_OutElementPerClk, None)
  existing_accumulative_cost = entry["acc_cost"] if entry else np.inf
  logging.debug("...............cost of cur_best_choices [%d]: %.2f",
                prev_OutElementPerClk, existing_accumulative_cost)
  if accumulative_cost < existing_accumulative_cost:
    # Stores the best choice and its cost for the given
    # prev_OutElementPerClk. We also store the ptr to next layer's
    # OutElementPerClk for future backtracking purpose.
    cur_best_choices[prev_OutElementPerClk] = {
        "choice": choice.__str__(),
        "cur_cost": cur_layer_cost,
        "acc_cost": accumulative_cost,
        "OutElementPerClk": OutElementPerClk}
    logging.debug(
        "...............Find better cost! Update cur_best_choices[%d]: %s",
        prev_OutElementPerClk, cur_best_choices[prev_OutElementPerClk])


def get_ComputeInElementPerClk(layer_type, cin_unroll,
                               cout_unroll, kh_unroll, kw_unroll):
  if layer_type in ["QConv2D", "QDense"]:
    return cin_unroll * kh_unroll * kw_unroll
  elif layer_type in ["QDepthwiseConv2D", "QAveragePooling2D", "MaxPooling2D"]:
    return cout_unroll * kh_unroll * kw_unroll
  elif layer_type in ["QGlobalAveragePooling2D", "GlobalMaxPooling2D",
                      "UpSampling2D"]:
    return cout_unroll
  elif layer_type in ["Concatenate"]:
    return cin_unroll


def get_InElementPerClk_base(ComputInElementPerClk, kh_unroll, kw_unroll):
  return int(ComputInElementPerClk / (kh_unroll * kw_unroll))


def get_pe_throughput(layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll,
                      input_channel, output_channel, kernel_height,
                      kernel_width):
  """Calculate compute throughput for the given unroll factors."""
  if layer_type in ["QConv2D", "QDense"]:
    return 1.0 * cin_unroll * cout_unroll * kh_unroll * kw_unroll / (
        input_channel * output_channel * kernel_height * kernel_width)
  elif layer_type in ["QDepthwiseConv2D", "QAveragePooling2D", "MaxPooling2D",
                      "UpSampling2D"]:
    return 1.0 * cout_unroll * kh_unroll * kw_unroll / (
        output_channel * kernel_height * kernel_width)
  elif layer_type in ["QGlobalAveragePooling2D", "GlobalMaxPooling2D",
                      "Concatenate"]:
    return 1.0 * cout_unroll / output_channel
  else:
    raise ValueError(f"Unspported layer type: {layer_type}")


def get_target_throughputs(layer, target_out_throughput):
  """Update throughput for a given layer."""

  # For layer that do not change the number of inference pixels,
  # throughput remains the same. For layers that decrease or increase the
  # number of inference pixels, the target throughput needs to update
  # accordingly.

  def multiply_elements_except_none(my_tuple):
    # Convert None values to np.nan and then use np.nanprod to calculate
    # the product
    return np.nanprod([x if x is not None else np.nan for x in my_tuple])

  if layer:
    input_size = multiply_elements_except_none(layer.input_shape[:-1])
    output_size = multiply_elements_except_none(layer.output_shape[:-1])
    target_in_throughput = target_out_throughput * input_size / output_size
  else:
    target_in_throughput = target_out_throughput

  # Per new design, target_pe_throughput equals to target_out_throughput.
  target_pe_throughput = target_out_throughput
  return target_in_throughput, target_pe_throughput


def calc_hw_params(graph, target_OutElementPerClk, target_out_throughput,
                   input_quantizer_bits,
                   compute_to_memory_max_ratio=4,
                   memory_to_unroll_max_ratio=4,
                   mode=CostMode.ACE):
  """Calculate HW params that minimizes total cost.

  Args:
    graph: DivideConquerGraph Object. Model graph.
    target_OutElementPerClk: Int. Target number of elements per clock
      cycle that the hardware needs to output.
    target_out_throughput: Float. Target number of inferences per clock
      cycle that the hardware needs to make.
    input_quantizer_bits: Int. Model's input quantizer bits.
    compute_to_memory_max_ratio: Int. Max allowed ratio between
      ComputeOutElement and OutElement
    memory_to_unroll_max_ratio: Int. Max allowed ratio between
      InElementPerClk and CinUnroll
    mode: CostMode. The mode to calculate per layer cost. Default is ACE.

  Returns:
    best_path: Dict. Stores the best hw param value at each layer and their
      irrespective cost.
    best_cost: Float. The best global cost of the entire model.
  """

  # Paths stores the best choices for every layer.
  # For the layer_idx, for each OutElementPerClk, we can calculate the best hw
  # param choice. We store all these best choices, each choice will
  # correspond to one OutElementPerClk key. Path therefore has the format:
  # {layer: {OutElementPerClk: (choice, cost, downstream_OutElementPerClk)}}
  paths = {}

  # We start the computation from the last node.
  layer_idx = graph.get_last_node()

  # Store the hw choices for the last node (a dummy node) for the sake
  # of completion.
  paths[layer_idx] = {
      target_OutElementPerClk: {
          "choice": Choice().__str__(),
          "cur_cost": 0,
          "acc_cost": 0,
          "OutElementPerClk": -1}}

  logging.debug("====== Extracting HW params combinations per layer =====")

  # The following code calculates cost backward, from last layer to the first.
  while  graph.get_prev_nodes(layer_idx):
    # Find precessor of the layer.
    # TODO(lishanok@): extend this code to multiple prev layers.
    cur_layer_idx = graph.get_prev_nodes(layer_idx)[0]
    cur_layer = graph.idx_to_layer(cur_layer_idx)
    logging.debug("processing layer_idx:%d name:%s type:%s ***",
                  cur_layer_idx, getattr(cur_layer, "name", None),
                  cur_layer.__class__.__name__)

    target_in_throughput, target_pe_throughput = get_target_throughputs(
        cur_layer, target_out_throughput)

    # Previous layer will generate a list of candidates for OutElementPerClk
    # values for the current layer.
    OutElementPerClk_list = list(paths[layer_idx].keys())
    logging.debug("OutElementPerClk_list:%s", OutElementPerClk_list)

    layer_quantizer_bitwidth = graph.get_layer_quantizer_bitwidth(cur_layer)
    layer_mac_count = graph.get_layer_mac_count(cur_layer)
    layer_shapes = graph.get_layer_shapes(cur_layer)

    # TODO(lishanok@): need to extend to multiple input layers, i.e., more
    # than 1 layer will reach graph's first node. We should only exit if all
    # input layers are processed.
    if graph.is_first_node(cur_layer_idx):
      # Computation reaches the 1st node of the graph. We can now find the best
      # path of all OutElementPerClk choices at the first layer.
      set_best_global_cost_in_paths(
          OutElementPerClk_list, paths, layer_idx, cur_layer_idx,
          layer_quantizer_bitwidth, layer_mac_count, layer_shapes, mode)
      break

    # Get layer-related information
    input_channel = qtools_util.get_layer_info(cur_layer, "input_channel")
    output_channel = qtools_util.get_layer_info(cur_layer, "output_channel")
    kernel_height = qtools_util.get_layer_info(cur_layer, "kernel_height")
    kernel_width = qtools_util.get_layer_info(cur_layer, "kernel_width")
    layer_type = qtools_util.get_layer_info(cur_layer, "layer_type")
    output_channel_divisors = qtools_util.find_divisors(output_channel)

    logging.debug("input_channel: %d, output_channel: %d, kernel_height: %d, "
                  "kernel_width: %d, weight_quantizer_bits: %d",
                  input_channel, output_channel, kernel_height, kernel_width,
                  layer_quantizer_bitwidth["weight_bits"])

    cur_best_choices = {}
    for OutElementPerClk in OutElementPerClk_list:
      logging.debug("...OutElementPerClk: %d", OutElementPerClk)

      # Pass through OutElementPerClk and cost for non-essential layers.
      if layer_type in ["QBatchNormalization", "QActivation", "Dropout",
                        "Reshape", "Activation", "ZeroPadding2D"]:
        logging.debug("...... Passing through layer_type: %s with 0 cost",
                      layer_type)

        # Update the best choices dict with only 1 key-value pair. By
        # considering current light-computation layer in the graph
        # as a pass-through node, we set layer cost=0, and set the predecessor
        # node's OutElementPerClk the same as current node's OutElementPerClk.
        update_cur_best_choices(
            cur_best_choices, OutElementPerClk=OutElementPerClk,
            prev_OutElementPerClk=OutElementPerClk, cur_layer_cost=0,
            accumulative_cost=paths[layer_idx][OutElementPerClk]["acc_cost"],
            choice=Choice())

        # Exit current iteration since there is no design param to explore
        # for these layer types.
        continue

      # For each of the possible OutElementPerClk values provided by the next
      # layer, we derive possible HW params choices of the current layer.
      for ComputeOutElementPerClk in get_valid_candidates(
          OutElementPerClk, compute_to_memory_max_ratio):
        logging.debug("......ComputeOutElementPerClk: %d",
                      ComputeOutElementPerClk)

        l = OutElementPerClk / ComputeOutElementPerClk
        cout_unroll = ComputeOutElementPerClk

        # cout_unroll needs to be a divisor of output_channels
        if cout_unroll not in output_channel_divisors:
          continue

        logging.debug(
            ".........OutElementPerClk / ComputeOutElementPerClk = %.2f,"
            "cout_unroll=%.2f", l, cout_unroll)
        # Find valid unroll values that meet pe throughput requirement.
        valid_unrolls = get_valid_unrolls(cur_layer, cout_unroll,
                                          target_pe_throughput)
        if not valid_unrolls:
          # Skip if no valid unroll values are found.
          logging.debug(".........No valid unroll values found!")
          continue

        for (cin_unroll, kh_unroll, kw_unroll) in valid_unrolls:
          # Check throughput requirement of each combination of unroll values.
          logging.debug(".........cin_unroll: %d, kh_unroll: %d, kw_unroll: %d",
                        cin_unroll, kh_unroll, kw_unroll)
          ComputInElementPerClk = get_ComputeInElementPerClk(
              layer_type, cin_unroll=cin_unroll, cout_unroll=cout_unroll,
              kh_unroll=kh_unroll, kw_unroll=kw_unroll)

          # InElementPerClk = k*ComputeInElementPerClk/(kh_unroll * kw_unroll)
          # TODO(lishanok@): Confirm if it works for Concatenate layer.
          InElementPerClk_base = get_InElementPerClk_base(
              ComputInElementPerClk=ComputInElementPerClk, kh_unroll=kh_unroll,
              kw_unroll=kw_unroll)
          for InElementPerClk in get_valid_candidates(
              InElementPerClk_base, memory_to_unroll_max_ratio):
            # With given cin_unroll, check throughput requirement of each
            # possible candidate of InElementPerClk.
            logging.debug("............InElementPerClk: %d", InElementPerClk)
            k = cin_unroll / InElementPerClk
            # prev_OutElementPerClk is the predecessor node's OutElementPerClk
            prev_OutElementPerClk = InElementPerClk

            if is_bufferThru_greater_than_targetThru(
                layer_type=layer_type, InElementPerClk=InElementPerClk,
                OutElementPerClk=OutElementPerClk, input_channel=input_channel,
                output_channel=output_channel, kernel_height=kernel_height,
                kernel_width=kernel_width,
                target_out_throughput=target_out_throughput,
                target_in_throughput=target_in_throughput):
              # If valid unroll values meet buffer throughput requirements,
              # comput cost.
              # cost = current layer's cost + total of downstream layers' cost.
              # Since we derive cost iteratively starting from the last layer,
              # paths already store the total cost of the downstream layers.
              cur_layer_cost = get_per_layer_cost(
                  layer_quantizer_bitwidth, layer_mac_count, layer_shapes,
                  cin_unroll, cout_unroll, kh_unroll, kw_unroll,
                  InElementPerClk, OutElementPerClk, mode)
              accumulative_cost = (
                  cur_layer_cost + paths[layer_idx][OutElementPerClk][
                      "acc_cost"])

              logging.debug("...............Buf throughput is good! "
                            "Accumulative_cost: %.2f", accumulative_cost)

              # Each choice is a hw param combination.
              choice = Choice(l, k, cin_unroll, cout_unroll, kh_unroll,
                              kw_unroll)

              update_cur_best_choices(cur_best_choices, OutElementPerClk,
                                      prev_OutElementPerClk, cur_layer_cost,
                                      accumulative_cost, choice)

    if not cur_best_choices:
      logging.error("Cannot find any valid HW choice for layer %s! Exit!",
                    cur_layer.name)
      return {}, None

    logging.debug("=======================")

    # Store the best choices of hw params for the current layer. Proceed to
    # the previous layer.
    paths[cur_layer_idx] = cur_best_choices
    layer_idx = cur_layer_idx
    # Predicessor node's OutBuf throughput is sucessor node's InBuf throughput.
    target_out_throughput = target_in_throughput

  return backtrack(graph, paths)


def estimate_model_cost(
    model: tf.keras.Model,
    input_quantizer_bits: int = 8,
    target_OutElementPerClk: int = 10,
    target_out_throughput: float = 1.0,
    compute_to_memory_max_ratio: int = 4,
    memory_to_unroll_max_ratio: int = 4,
    mode: CostMode = CostMode.ACE):
  """Main function to divide and conquer cost modeling.

  Args:
    model: QKeras model.
    input_quantizer_bits: Model's input quantizer bits.
    target_OutElementPerClk: Target number of elements per clock
      cycle that the hardware needs to output.
    target_out_throughput: Target number of inferences per clock
      cycle that the hardware needs to make.
    compute_to_memory_max_ratio: Max allowed ratio between
      ComputeOutElement and OutElement
    memory_to_unroll_max_ratio: Max allowed ratio between
      InElementPerClk and CinUnroll
    mode: The mode to calculate per layer cost.

  Returns:
    best_path: Dict. Stores the best hw param value at each layer and their
      irrespective cost.
    best_cost: Float. The best global cost of the entire model.
  """

  logging.info("Estimating model design params and cost...")
  # Generate graph
  graph = DivideConquerGraph(model)
  # Call the main function to generate optimal HW configs for all layers
  best_path, best_cost = calc_hw_params(
      graph=graph, target_OutElementPerClk=target_OutElementPerClk,
      target_out_throughput=target_out_throughput,
      input_quantizer_bits=input_quantizer_bits,
      compute_to_memory_max_ratio=(
          compute_to_memory_max_ratio),
      memory_to_unroll_max_ratio=(
          memory_to_unroll_max_ratio),
      mode=mode
  )

  logging.info("best_design_params: %s", best_path)

  return (best_path, best_cost)


================================================
FILE: qkeras/qtools/DnC/dnc_layer_cost_ace.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""divide_and_conquer per layer cost modeling using ACE and data fitting.

For a given layer with its hardware design params, predict its cost
in actual ASIC implementation using ACE metric and actual MAC gates data points.
"""

import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit


# Rule-of-thumb mapping between bits and gates in memory area estimate.
MemoryGatesPerBit = {
    'Register': 10.0,
    'SRAM': 1.0,
    'ROM': 0.1,
}


# Previously calculated 3D polynomial coefficients with relative MAE<5%.
MAC_POLY3D_PARAMS = np.array([7.70469119, 13.76199652, -92.15756665])


# MAC area data points generated from go/mac_vs_area.
MAC24 = pd.read_csv(io.StringIO('''
283,280,286,313,325,336,356,,
274,290,325,372,401,428,485,,
285,325,388,510,568,614,713,,
308,372,509,750,865,1002,1167,,
336,427,617,1003,1151,1309,,,
356,480,722,1165,,,,,
'''), header=None)

MAC32 = pd.read_csv(io.StringIO('''
391,365,377,410,453,433,458,507,
364,382,418,466,497,521,578,685,
378,418,485,594,659,721,832,1035,
408,466,596,843,1029,1151,1321,1642,
432,521,724,1153,1363,1512,1797,,
457,578,830,1330,1551,1782,2273,,
'''), header=None)

MAC40 = pd.read_csv(io.StringIO('''
458,457,470,500,522,527,551,605,664
457,475,513,561,597,616,670,782,888
470,513,579,699,766,816,928,1150,1358
499,561,699,996,1161,1273,1499,1850,2189
527,612,818,1275,1545,1691,2054,2516,
549,670,927,1496,1798,2035,2490,3294,
'''), header=None)

MAC48 = pd.read_csv(io.StringIO('''
595,550,566,594,659,624,642,694,745
551,566,607,654,727,707,763,881,984
566,607,679,794,871,921,1017,1270,1489
594,655,793,1097,1285,1401,1668,2101,2378
624,711,921,1397,1816,1950,2277,2763,3301
642,762,1015,1669,1974,2264,2718,3631,4415
'''), header=None)


def mac_gates_polynomial_3d(xyz, a, b, c):
  """Using a 3d polynomial function to model MAC area.

  This function models the MAC area to be the sum of multipler, accumulator
  and a constant shift. Particularly, multiplier area is modeled to be linear
  # to input_bits * weight_bits, per ACE rule.

  Args:
    xyz: tuple includes input, weight and accumulator bits.
    a: polynomial coefficient 0.
    b: polynomial coefficient 1.
    c: polynomial coefficient 2.

  Returns:
    MAC area predicted by the function.
  """
  x, y, z = xyz
  return a * x * y + b * z + c


def gen_mac_gate_model(do_plot=False):
  """Generate the polynomial cost model coefficients using given data.

  Args:
    do_plot: Bool indicates whether plot the raw data and the fitted curve.

  Returns:
    params: The esitimated params of the polynomical function.
    mae_predict: Calculate the mean absolute error of the predictions.
    parameter_std_deviation: one standard deviation errors on the parameters,
      indicating the uncertainties of the params.
  """
  # acc bits, 1st index
  abit = np.array([24, 32, 40, 48])
  abit = np.repeat(abit, 54)

  # weight bits, 2nd index
  wbit = np.array([1, 2, 4, 8, 12, 16])
  wbit = np.tile(np.repeat(wbit, 9), 4)

  # input bits, 3rd index
  xbit = np.array([1, 2, 4, 8, 10, 12, 16, 24, 32])
  xbit = np.tile(xbit, 24)

  # Record all mac area data points associated with each accumulator bitwidth
  mac_arrs = []
  # Record the start and end index of the mac area data points
  # associated with each accumulator bitwidth
  mac_arrs_index = {}
  # Record index of all valid data points
  valid_index = []
  start_pos = 0

  for (mac_acc, acc_bits) in zip(
      [MAC24, MAC32, MAC40, MAC48], [24, 32, 40, 48]):
    cur_mac = mac_acc.to_numpy().reshape(-1)
    # Filter out nan data points
    cur_valid_index = ~np.isnan(cur_mac)
    cur_valid_mac = cur_mac[cur_valid_index]
    # Record the data length for each accumulator bits
    end_pos = start_pos + len(cur_valid_mac)
    mac_arrs_index[acc_bits] = (start_pos, end_pos)
    # Append mac areas of each accumulator bits to a list
    mac_arrs += list(cur_valid_mac)
    start_pos = end_pos
    valid_index += list(cur_valid_index)

  # Filter out invalid data
  xbit = xbit[valid_index]
  wbit = wbit[valid_index]
  abit = abit[valid_index]

  # curve fitting for all data points
  params, covariance = curve_fit(
      mac_gates_polynomial_3d, (xbit, wbit, abit), mac_arrs)

  # Compute one standard deviation errors on the parameters.
  parameter_std_deviation = np.sqrt(np.diag(covariance))

  # Calculate the mean absolute error between prediction and given data.
  mac_predict = mac_gates_polynomial_3d((xbit, wbit, abit), *params)
  mae = np.mean(np.abs(mac_predict - mac_arrs))
  mae_predict = mae / np.mean(mac_arrs)

  if do_plot:
    # Plot all raw data points
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')

    ax.scatter(xbit, wbit, mac_arrs, label='Data')

    ax.set_xlabel('X_bits')
    ax.set_ylabel('W_bits')
    ax.set_zlabel('MAC')

    plt.title('MAC area data points')
    plt.show()

    # Generate a mesh grid for plotting.
    x_fit = np.linspace(min(xbit), max(xbit), 50)
    w_fit = np.linspace(min(wbit), max(wbit), 50)
    xmesh, wmesh = np.meshgrid(x_fit, w_fit)

    fig = plt.figure(figsize=(16, 16))
    index = 1

    # Plotting 3D fitting curve for each accumulator bitwidth
    for acc_bits in [24, 32, 40, 48]:
      ax = fig.add_subplot(2, 2, index, projection='3d')

      start_pos = mac_arrs_index[acc_bits][0]
      end_pos = mac_arrs_index[acc_bits][1]
      ax.scatter(xbit[start_pos:end_pos], wbit[start_pos:end_pos],
                 mac_arrs[start_pos:end_pos], label='Data')

      amesh = np.full(shape=(50, 50), fill_value=acc_bits)
      poly_fit = mac_gates_polynomial_3d((xmesh, wmesh, amesh), *params)

      ax.plot_surface(
          xmesh, wmesh, poly_fit, cmap='viridis', alpha=0.8,
          label=f'Fitted Surface | acc_bits={acc_bits}')

      ax.set_xlabel('X')
      ax.set_ylabel('W')
      ax.set_zlabel('MAC')
      ax.set_title(f'accumulator bitwidth: {acc_bits}')
      index += 1

    plt.show()

  return params, mae_predict, parameter_std_deviation


def get_ace_mac_gates(xbit, wbit, abit, regen_params=False):
  """Function to estimate MAC area, including 1 multipler and 1 accumulator.

  Args:
    xbit: int. input bits.
    wbit: int. weight bits.
    abit: int. accumulator bits.
    regen_params: Bool. If True, regenerate the MAC cost model coefficients.
      If False, reuse the previously generated model coefficients.

  Returns:
    Estimated MAC gates.
  """
  if regen_params:
    mac_params, _, _ = gen_mac_gate_model(do_plot=True)
  else:
    mac_params = MAC_POLY3D_PARAMS

  return mac_gates_polynomial_3d((xbit, wbit, abit), *mac_params)


================================================
FILE: qkeras/qtools/__init__.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Export qtools package."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from .run_qtools import QTools
from .settings import cfg as qtools_cfg


================================================
FILE: qkeras/qtools/config_public.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""configuration file for external usage."""

config_settings = {
    "default_source_quantizer": "quantized_bits(8, 0, 1)",
    "default_interm_quantizer": "quantized_bits(8, 0, 1)",

    "horowitz": {
        "fpm_add": [0.003125, 0],
        "fpm_mul": [0.002994791667, 0.001041666667, 0],
        "fp16_add": [0.4],
        "fp16_mul": [1.1],
        "fp32_add": [0.9],
        "fp32_mul": [3.7],
        "sram_rd": [9.02427321e-04, -2.68847858e-02, 2.08900804e-01, 0.0],
        "dram_rd": [20.3125, 0]
    },

    "include_energy": {
        "QActivation": ["outputs"],
        "QAdaptiveActivation": ["outputs"],
        "Activation": ["outputs"],
        "QBatchNormalization": ["parameters"],
        "BatchNormalization": ["parameters"],
        "Add": ["op_cost"],
        "Subtract": ["op_cost"],
        "MaxPooling2D": ["op_cost"],
        "default": ["inputs", "parameters", "op_cost"]
    }
}


================================================
FILE: qkeras/qtools/examples/example_generate_json.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Example code to generate weight and MAC sizes in a json file."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow.keras as keras

from qkeras import QActivation
from qkeras import QDense
from qkeras import quantizers
from qkeras.qtools import run_qtools


def hybrid_model():
  """hybrid model that mixes qkeras and keras layers."""

  x = x_in = keras.layers.Input((784,), name="input")
  x = keras.layers.Dense(300, name="d0")(x)
  x = keras.layers.Activation("relu", name="d0_act")(x)
  x = QDense(100, kernel_quantizer=quantizers.quantized_po2(4),
             bias_quantizer=quantizers.quantized_po2(4),
             name="d1")(x)
  x = QActivation("quantized_relu(4,0)", name="d1_qr4")(x)
  x = QDense(
      10, kernel_quantizer=quantizers.quantized_po2(4),
      bias_quantizer=quantizers.quantized_po2(4),
      name="d2")(x)
  x = keras.layers.Activation("softmax", name="softmax")(x)

  return keras.Model(inputs=[x_in], outputs=[x])


def generate_json(in_model):
  """example to generate data type map for a given model.

  Args:
    in_model: qkeras model object

  Usage:
    input_quantizer_list:
      A list of input quantizers for the model. It could be in the form of:
        1. a list of quantizers, each quantizer for each one of the model inputs
        2. one single quantizer, which will be used for all of the model inputs
        3. None. Default input quantizer defined in config_xxx.py will be used
        for all of the model inputs

    for_reference: get energy for a reference model/trial model
      1. True: get baseline energy for a given model. Use keras_quantizer/keras_
        accumulator (or default_interm_quantizer in config_xxx.py if keras_
        quantizer/keras_accumulator not given) to quantizer all layers in a
        model in order to calculate its energy. It servers the purpose of
        setting up a baseline energy for a given model architecture.
      2. False: get "real" energy for a given model use user-specified
        quantizers. For layers that are not quantized (keras layer) or have no
        user-specified quantizers (qkeras layers without quantizers specified),
        keras_quantizer and keras_accumulator(or default_interm_quantizer in
        config_xxx.py if keras_quantizer/keras_accumulator not given)
        will be used as their quantizers.

     process: technology process to use in configuration (horowitz, ...)

     weights_path: absolute path to the model weights

     is_inference: whether model has been trained already, which is needed to
         compute tighter bounds for QBatchNormalization Power estimation

     Other parameters (defined in config_xxx.py):
       1. "default_source_quantizer" is used as default input quantizer
          if user do not specify any input quantizers,
       2. "default_interm_quantizer": is used as default quantizer for any
          intermediate variables such as multiplier, accumulator, weight/bias
          in a qkeras layer if user do not secifiy the corresponding variable
       3. process_name: energy calculation parameters for different processes.
          "horowitz" is the process we use by default.
       4. "include_energy": what energy to include at each layer
          when calculation the total energy of the entire model.
          "parameters": memory access energy for loading model parameters.
          "inputs": memory access energy to reading inputs
          "outputs": memory access energy for writing outputs
          "op_cost": operation energy for multiplication and accumulation
  """

  input_quantizer_list = [quantizers.quantized_bits(8, 0, 1)]
  reference_internal = "int8"
  reference_accumulator = "int32"

  # generate QTools object which contains model data type map in json format
  q = run_qtools.QTools(
      in_model,
      # energy calculation using a given process
      process="horowitz",
      # quantizers for model inputs
      source_quantizers=input_quantizer_list,
      # training or inference with a pre-trained model
      is_inference=False,
      # path to pre-trained model weights
      weights_path=None,
      # keras_quantizer to quantize weight/bias in non-quantized keras layers
      keras_quantizer=reference_internal,
      # keras_accumulator to quantize MAC in un-quantized keras layers
      keras_accumulator=reference_accumulator,
      # calculating baseline energy or not
      for_reference=False)

  # print data type map
  q.qtools_stats_print()

  # dump the layer data map to a json file
  # json_name = "output.json"
  # q.qtools_stats_to_json(json_name)


if __name__ == "__main__":
  model = hybrid_model()
  model.summary()

  generate_json(model)


================================================
FILE: qkeras/qtools/examples/example_get_energy.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Example code to generate weight and MAC sizes in a json file."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow.keras as keras

from qkeras import QActivation
from qkeras import QDense
from qkeras import quantizers
from qkeras.qtools import run_qtools
from qkeras.qtools import settings as qtools_settings


def hybrid_model():
  """hybrid model that mixes qkeras and keras layers."""

  x = x_in = keras.layers.Input((784,), name="input")
  x = keras.layers.Dense(300, name="d0")(x)
  x = keras.layers.Activation("relu", name="d0_act")(x)
  x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
             bias_quantizer=quantizers.quantized_bits(4, 0, 1),
             name="d1")(x)
  x = QActivation("quantized_relu(4,0)", name="d1_qr4")(x)
  x = QDense(
      10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
      bias_quantizer=quantizers.quantized_bits(4, 0, 1),
      name="d2")(x)
  x = keras.layers.Activation("softmax", name="softmax")(x)

  return keras.Model(inputs=[x_in], outputs=[x])


if __name__ == "__main__":
  # input parameters:
  # process: technology process to use in configuration (horowitz, ...)
  # weights_on_memory: whether to store parameters in dram, sram, or fixed
  # activations_on_memory: store activations in dram or sram
  # rd_wr_on_io: whether load data from dram to sram (consider sram as a cache
  #   for dram. If false, we will assume data will be already in SRAM
  # source_quantizers: quantizers for model input
  # is_inference: whether model has been trained already, which is
  #   needed to compute tighter bounds for QBatchNormalization Power estimation.
  # reference_internal: size to use for weight/bias/activation in
  #   get_reference energy calculation (int8, fp16, fp32)
  # reference_accumulator: accumulator and multiplier type in get_reference
  #   energy calculation
  model = hybrid_model()
  model.summary()

  reference_internal = "int8"
  reference_accumulator = "int32"

  # By setting for_reference=True, we create QTools object which uses
  # keras_quantizer to quantize weights/bias and
  # keras_accumulator to quantize MAC variables for all layers. Obviously, this
  # overwrites any quantizers that user specified in the qkeras layers. The
  # purpose of doing so is to enable user to calculate a baseline energy number
  # for a given model architecture and compare it against quantized models.
  q = run_qtools.QTools(
      model,
      # energy calculation using a given process
      process="horowitz",
      # quantizers for model input
      source_quantizers=[quantizers.quantized_bits(8, 0, 1)],
      is_inference=False,
      # absolute path (including filename) of the model weights
      weights_path=None,
      # keras_quantizer to quantize weight/bias in un-quantized keras layers
      keras_quantizer=reference_internal,
      # keras_quantizer to quantize MAC in un-quantized keras layers
      keras_accumulator=reference_accumulator,
      # whether calculate baseline energy
      for_reference=True)

  # caculate energy of the derived data type map.
  ref_energy_dict = q.pe(
      # whether to store parameters in dram, sram, or fixed
      weights_on_memory="sram",
      # store activations in dram or sram
      activations_on_memory="sram",
      # minimum sram size in number of bits
      min_sram_size=8*16*1024*1024,
      # whether load data from dram to sram (consider sram as a cache
      # for dram. If false, we will assume data will be already in SRAM
      rd_wr_on_io=False)

  # get stats of energy distribution in each layer
  reference_energy_profile = q.extract_energy_profile(
      qtools_settings.cfg.include_energy, ref_energy_dict)
  # extract sum of energy of each layer according to the rule specified in
  # qtools_settings.cfg.include_energy
  total_reference_energy = q.extract_energy_sum(
      qtools_settings.cfg.include_energy, ref_energy_dict)
  print("Baseline energy profile:", reference_energy_profile)
  print("Total baseline energy:", total_reference_energy)

  # By setting for_reference=False, we quantize the model using quantizers
  # specified by users in qkeras layers. For hybrid models where there are
  # mixture of unquantized keras layers and quantized qkeras layers, we use
  # keras_quantizer to quantize weights/bias and keras_accumulator to quantize
  # MAC variables for all keras layers.
  q = run_qtools.QTools(
      model, process="horowitz",
      source_quantizers=[quantizers.quantized_bits(8, 0, 1)],
      is_inference=False, weights_path=None,
      keras_quantizer=reference_internal,
      keras_accumulator=reference_accumulator,
      for_reference=False)
  trial_energy_dict = q.pe(
      weights_on_memory="sram",
      activations_on_memory="sram",
      min_sram_size=8*16*1024*1024,
      rd_wr_on_io=False)
  trial_energy_profile = q.extract_energy_profile(
      qtools_settings.cfg.include_energy, trial_energy_dict)
  total_trial_energy = q.extract_energy_sum(
      qtools_settings.cfg.include_energy, trial_energy_dict)
  print("energy profile:", trial_energy_profile)
  print("Total energy:", total_trial_energy)


================================================
FILE: qkeras/qtools/generate_layer_data_type_map.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates MAC, input and output datatype for a qkeras model."""
import collections
import copy
import numpy as np
import sys

import networkx as nx
from qkeras.qtools import qgraph
from qkeras.qtools import qtools_util
from qkeras.qtools import quantized_operators
from qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module
from qkeras.qtools.settings import cfg
from qkeras.qtools.quantized_operators import adder_factory
from qkeras.qtools.quantized_operators.fused_bn_factory import FusedBNFactory

class TagMissingError(ValueError):
  pass


LayerDataType = collections.namedtuple(
    "LayerDataType",
    [
        "input_quantizer_list",
        "multiplier",
        "accumulator",

        "weight_quantizer",
        "w_shapes",

        "bias_quantizer",
        "b_shapes",

        "output_quantizer",
        "output_shapes",

        "operation_count",
    ],
)

QKERAS_LAYERS = [
    "QDense",
    "QConv1D",
    "QConv2D",
    "QDepthwiseConv2D",
    "QConv2D",
    "QConv2DTranspose",
]

KERAS_LAYERS = [
    "Dense",
    "Conv1D",
    "Conv2D",
    "DepthwiseConv2D",
    "Conv2DTranspose",
]


def get_bn_quantizers(layer, quantizer_factory, cfg, keras_quantizer,
                      input_quantizer, is_inference, for_reference,
                      model_weights_already_quantized):
  """Extract quantizers from a given batchnorm layer."""

  # QKeras layers might be mixed with keras layers.
  if for_reference or not hasattr(layer, "get_quantizers"):
    # Keras BatchNorm layer mixed with quantized model
    # -> no reference mode
    gamma_quantizer = quantizer_factory.make_default_quantizer(
        mode=cfg.default_interm_quantizer)
    beta_quantizer = quantizer_factory.make_default_quantizer(
        mode=cfg.default_interm_quantizer)
    mean_quantizer = quantizer_factory.make_default_quantizer(
        mode=cfg.default_interm_quantizer)
    variance_quantizer = quantizer_factory.make_default_quantizer(
        mode=cfg.default_interm_quantizer)
    inverse_quantizer = quantizer_factory.make_default_quantizer(
        mode=cfg.default_interm_quantizer)

    if keras_quantizer:
      gamma_quantizer = quantizer_factory.make_default_quantizer(
          mode=keras_quantizer)
      beta_quantizer = quantizer_factory.make_default_quantizer(
          mode=keras_quantizer)
      mean_quantizer = quantizer_factory.make_default_quantizer(
          mode=keras_quantizer)
      variance_quantizer = quantizer_factory.make_default_quantizer(
          mode=keras_quantizer)
      inverse_quantizer = quantizer_factory.make_default_quantizer(
          mode=keras_quantizer)
  else:
    (qkeras_gamma_quantizer, qkeras_beta_quantizer,
     qkeras_mean_quantizer, qkeras_variance_quantizer,
     qkeras_inverse_quantizer) = layer.get_quantizers()

    if not qkeras_beta_quantizer:
      beta_quantizer = quantizer_factory.clone_quantizer(input_quantizer)
    else:
      beta_quantizer = quantizer_factory.make_quantizer(
          qkeras_beta_quantizer)

    if not qkeras_mean_quantizer:
      mean_quantizer = quantizer_factory.clone_quantizer(input_quantizer)
    else:
      mean_quantizer = quantizer_factory.make_quantizer(
          qkeras_mean_quantizer)

    if not qkeras_variance_quantizer:
      variance_quantizer = quantizer_factory.make_default_quantizer(
          mode=cfg.default_interm_quantizer)
    else:
      # If variance is float, convert to input_quantizer.
      variance_quantizer = quantizer_factory.make_quantizer(
          qkeras_variance_quantizer)

    if not qkeras_gamma_quantizer:
      gamma_quantizer = quantizer_factory.make_default_quantizer(
          mode=cfg.default_interm_quantizer)
    else:
      gamma_quantizer = quantizer_factory.make_quantizer(
          qkeras_gamma_quantizer)

    if not qkeras_inverse_quantizer:
      inverse_quantizer = quantizer_factory.make_default_quantizer(
          mode=cfg.default_interm_quantizer)
    else:
      inverse_quantizer = quantizer_factory.make_quantizer(
          qkeras_inverse_quantizer)

  # During inference, gamma, beta and variance are constants
  # if they are po2 quantizers, we need to modify their bits
  # with actual values and also update graph with the
  # corresponding output_quantizer on the edge.
  if is_inference:
    weights = qtools_util.get_weights(
        layer, model_weights_already_quantized)
    # If no scale(gamma), num_weights --
    # If no center(beta_quantizer) num_weights --
    num_weights = 4
    if not layer.scale:
      num_weights -= 1
    if not layer.center:
      num_weights -= 1

    if (layer.scale and gamma_quantizer is not None and gamma_quantizer.is_po2):
      gamma_quantizer.update_inference_values(weights[0])
    if (variance_quantizer is not None and variance_quantizer.is_po2):
      variance_quantizer.update_inference_values(
          weights[num_weights-1])

  return (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer,
          inverse_quantizer)


def update_output_quantizer_in_graph(graph, node_id, quantizer_factory,
                                     new_quantizer, for_reference):
  """update the edge with output quantizer type."""

  node = graph.nodes[node_id]
  qkeras_output_quantizer = node["out_quantizer"]

  # If existing graph doesn't have a valid output quantizer
  # update graph with the new quantizer
  if (for_reference or not qkeras_output_quantizer or
      not quantizer_factory.is_quantizer_supported(qkeras_output_quantizer)):
    qkeras_output_quantizer = new_quantizer
    qgraph.GraphUpdateEdge(graph, node_id, qkeras_output_quantizer)

  # If activation specified, convert activation quantizer to qtools quantizer
  # If activation not secified, convert the new quantizer to qtools quantizer
  output_quantizer = quantizer_factory.make_quantizer(qkeras_output_quantizer)

  # Output_quantizer is used for updating dictionary in json
  return output_quantizer


def generate_layer_data_type_map(
    graph, source_quantizer_list, is_inference,
    keras_quantizer=None, keras_accumulator=None,
    for_reference=False, debug=False,
    model_weights_already_quantized=True,
    hw_weight_dict=None):
  """main funciton to generate datatype for each layer.

  For each type of layer, this function calculates the sizes and minimum
  number of bits required to represent the parameters and variables (e.g.,
  weights, bias, multiplier and accumulator - MAC, etc.) embedded in
  these layers.

  Args:
    graph: input graph that traverses the model
    source_quantizer_list: a list of quantizers for model inputs
    is_inference: whether model is pre-trained with weights available
    keras_quantizer: default quantizer used to quantize weights and bias
    keras_accumulator: default MAC quantizer to quantize multiplier,
      accumulator and output
    for_reference: whether to generate a map for a baseline model
    debug: whether to print debug messages
    model_weights_already_quantized: bool. If model weights are already
      quantized, no need to apply quantizer to weights here in this function.
    hw_weight_dict: weight dictonary for hardware inference. For example, fused
      bn op inference in hardware will need additional fused weights, which
      can be extracted from this dictionary. This dictionary is the output from
      utils.py/model_save_quantized_weights function.

  Returns:
    a result containing the following fields:
    source_quantizer_list similar as input
    output_layers: names of the layers that are output layers
    input_layers: names of the layers that are input_layers,
    layer_data_type_map: data type map of each layer
  """

  quantizer_factory = quantizer_factory_module.QuantizerFactory()
  layer_data_type_map = collections.OrderedDict()

  # get the output layers

  output_layers = []
  input_layers = []
  predecessors = list(graph.predecessors(qgraph.SINK))
  successors = list(graph.successors(qgraph.SOURCE))

  for u in predecessors:
    if u == qgraph.SOURCE or u == qgraph.SINK:
      continue
    output_layers.append(graph.nodes[u]["layer"][0])

  for u in successors:
    if u == qgraph.SOURCE or u == qgraph.SINK:
      continue
    input_layers.append(graph.nodes[u]["layer"][0])

  for node_id in nx.topological_sort(graph):
    node = graph.nodes[node_id]
    node_type = node["type"][-1]
    layer = node["layer"][0]
    is_input_layer = layer in input_layers

    w_shapes = None
    b_shapes = None
    output_shapes = None
    qkeras_weight_quantizer = None

    if hasattr(layer, "output_shape"):
      output_shapes = layer.output_shape

    if hasattr(layer, "get_weights"):
      weights = layer.get_weights()
      if len(weights) != 0:
        w_shapes = layer.get_weights()[0].shape
        b_shapes = weights[0].shape[-1]

    if debug:
      print("########")
      if layer is not None:
        print(layer.name)
      else:
        print("None")

    # Deals with keras layer or lack of input quantizer in qkeras layer.
    input_qe_list = qtools_util.get_input_quantizers_advanced(
        graph, node_id, is_input_layer, quantizer_factory, cfg)

    if input_qe_list and node_id != qgraph.SINK:
      input_quantizer_list = []
      for node in input_qe_list:
        input_quantizer_list.append(node[0])

      # Calculates number of operations (multiplication/accumulation).
      # Previously Merge layers's inputs all have the same shape, however, in
      # MobilenetV3 we found that there is shape broadcast in the keras
      # Multiply layer. Therefore we use the shape with max size as the
      # input shape
      if len(input_qe_list) > 0:
        maxsize = -1
        max_id = 0
        for (idx, item) in enumerate(input_qe_list):
          shape = item[1]["shape"]
          size = np.prod(shape[1:])
          if size > maxsize:
            maxsize = size
            max_id = idx
        input_shape = input_qe_list[max_id][1]["shape"]
      else:
        (_, edge_0) = input_qe_list[0]
        input_shape = edge_0["shape"]

      operation_count = qtools_util.get_operation_count(
          layer, input_shape)

    # Merges layers with multiple inputs.
    if qtools_util.is_merge_layers(layer):

      # merge_factory.make_quantizer automatically calculates the merge output
      # quantizer bitwidth according to input quantizer type.
      merge_factory = quantized_operators.MergeFactory()
      merge_quantizer = merge_factory.make_quantizer(
          input_qe_list, layer.__class__.__name__)

      if for_reference:
        # The for_reference option overwrites the auto-calculated merge output
        # quantizer
        if keras_accumulator:
          # gate_factor and gate_bits remain the same as previously
          # calculated; only change output quantizer as the keras_accumulator
          merge_quantizer.output = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)
        else:
          merge_quantizer.output = quantizer_factory.make_default_quantizer(
              mode=cfg.default_interm_quantizer)

      output_quantizer = update_output_quantizer_in_graph(
          graph, node_id, quantizer_factory, merge_quantizer.output,
          for_reference)

      layer_data_type_map[layer] = LayerDataType(
          input_quantizer_list,
          merge_quantizer,
          None,

          None,
          None,

          None,
          None,

          output_quantizer,
          output_shapes,

          operation_count
      )

    # MaxPooling/reshape/flatten/UpSampling1D/2D/3D
    elif (qtools_util.is_shape_alternation_layers(layer) or
          "UpSampling" in layer.__class__.__name__):
      input_quantizer = input_quantizer_list[0]

      # Output quantizer
      output_quantizer = update_output_quantizer_in_graph(
          graph, node_id, quantizer_factory, input_quantizer, for_reference)

      layer_data_type_map[layer] = LayerDataType(
          input_quantizer_list,
          None,
          None,

          None,
          None,

          None,
          None,

          output_quantizer,
          output_shapes,

          operation_count
      )

    # AveragePooling and GlobalAveragePooling
    elif layer.__class__.__name__ in [
        "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D",
        "GlobalAveragePooling2D", "QAveragePooling2D",
        "QGlobalAveragePooling2D"]:
      (input_quantizer, _) = input_qe_list[0]
      qtools_average_quantizer = None
      # This is a hack. We don't want to implement a new accumulator class
      # just for averagpooling. So we re-use accumulator type in conv/dense
      # layers which need multiplier and kernel as input parameters.
      # In order to do so, we fake a multiplier which treat the pool_size as
      # the kernel. since kernel needs 4 dimension, k_h, k_w, C_in, C_out,
      # we set the last two dimension as [1, 1]
      if layer.__class__.__name__ in ["AveragePooling2D", "AvgPool2D",
                                      "QAveragePooling2D"]:
        pool_size = tuple(list(layer.pool_size) + [1, 1])
      else:
        pool_size = tuple(list(input_shape)[1:-1] + [1, 1])

      # Automatically calculates the accumulator bitwidth according to input
      # quantizer type for both quantized pooling and regular pooling layers
      multiplier_factory = quantized_operators.MultiplierFactory()
      fake_multiplier = multiplier_factory.make_multiplier(
          input_quantizer, input_quantizer)
      fake_multiplier.output = input_quantizer
      accumulator_factory = quantized_operators.AccumulatorFactory()
      accumulator = accumulator_factory.make_accumulator(
          pool_size, fake_multiplier, use_bias=False)

      # For quantized pooling layers, we also need to consider the division
      # precision, which is controlled by the average quantizer
      if layer.__class__.__name__ in ["QAveragePooling2D",
                                      "QGlobalAveragePooling2D"]:
        # For the quantized layer, there is an average_quantizer used for
        # the inverse of division operation.
        qkeras_average_quantizer = layer.get_quantizers()[0]
        qtools_average_quantizer = quantizer_factory.make_quantizer(
            qkeras_average_quantizer)
        multiplier = multiplier_factory.make_multiplier(
            accumulator.output, qtools_average_quantizer)
      else:
        multiplier = None
      if debug:
        print("accumulator:", accumulator.output.bits)

      # Re-calcualte accumulator/multiplier type when it's using
      # for_reference option
      if for_reference:
        if keras_accumulator:
          # If keras_accumulator exists, use keras_accumulator as multiplier
          # or accumulator type
          if multiplier:
            # Quantized layers need to define multiplier type
            multiplier.output = quantizer_factory.make_default_quantizer(
                mode=keras_accumulator)
          accumulator.output = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)
        else:
          # If user didn't provide keras_accumulator, use the default settings
          # in cfg to define multiplier/accumulator type
          if multiplier:
            multiplier.output = quantizer_factory.make_default_quantizer(
                mode=cfg.default_interm_quantizer)
          accumulator.output = quantizer_factory.make_default_quantizer(
              mode=cfg.default_interm_quantizer)
        layer_quantizer = accumulator.output

      # set the output quantizer
      if layer.__class__.__name__ in ["QAveragePooling2D",
                                      "QGlobalAveragePooling2D"]:
        # If is quantized layer, last operation is multiply (averaging).
        layer_quantizer = multiplier.output
      else:
        layer_quantizer = accumulator.output
      output_quantizer = update_output_quantizer_in_graph(
          graph, node_id, quantizer_factory, layer_quantizer, for_reference)

      layer_data_type_map[layer] = {
          "input_quantizer_list": input_quantizer_list,
          "average_quantizer": qtools_average_quantizer,
          "pool_sum_accumulator": accumulator,
          "pool_avg_multiplier": multiplier,
          "output_quantizer": output_quantizer,
          "output_shapes": output_shapes,
          "operation_count": operation_count
      }

    # If it's a Quantized Activation layer.
    elif node_type in ["QActivation", "QAdaptiveActivation", "Activation"]:

      if for_reference or not hasattr(layer, "quantizer"):
        # Keras activation layer -> use default_interm_quantizer
        layer_quantizer = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)

        if keras_accumulator:
          layer_quantizer = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)
      else:
        layer_quantizer = layer.quantizer

        if not quantizer_factory.is_quantizer_supported(layer_quantizer):
          raise TagMissingError(
              "Unsupported activation quantizer {} on this layer: {}".format(
                  layer_quantizer, layer))

        if not layer_quantizer:
          layer_quantizer = quantizer_factory.make_default_quantizer(
              mode=cfg.default_interm_quantizer)

      output_quantizer = update_output_quantizer_in_graph(
          graph, node_id, quantizer_factory, layer_quantizer, for_reference)

      layer_data_type_map[layer] = LayerDataType(
          input_quantizer_list,
          None,
          None,
          None,
          w_shapes,
          None,
          b_shapes,
          output_quantizer,
          output_shapes,
          operation_count
      )

    elif node_type in ["QBatchNormalization", "BatchNormalization"]:
      # If this batchnorm layer needs to be fused with the previous layer,
      # we pass the input quantizer type as the output type in qraph.

      (input_quantizer, _) = input_qe_list[0]

      if  (hw_weight_dict is not None and
           hw_weight_dict[layer.name]["enable_bn_fusing"]):
        if for_reference and keras_accumulator and not is_input_layer:
          input_quantizer = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)
        output_quantizer = update_output_quantizer_in_graph(
            graph, node_id, quantizer_factory, input_quantizer, for_reference)
        layer_data_type_map[layer] = {
            "input_quantizer_list": input_quantizer_list,
            "output_quantizer": output_quantizer,
            "output_shapes": input_shape,
            "operation_count": operation_count
        }
      else:
        (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer,
         _) = get_bn_quantizers(layer, quantizer_factory, cfg, keras_quantizer,
                                input_quantizer, is_inference, for_reference,
                                model_weights_already_quantized)

        qbn = quantized_operators.QBNFactory()
        qbn.make_quantizer(
            input_quantizer, gamma_quantizer, beta_quantizer,
            mean_quantizer, variance_quantizer, layer.scale, layer.center
        )

        def set_output(op, output):
          if op:
            op.output = output

        if for_reference or not hasattr(layer, "get_quantizers"):
          set_output(
              qbn.internal_divide_quantizer,
              quantizer_factory.make_default_quantizer(
                  mode=cfg.default_interm_quantizer))

          set_output(
              qbn.internal_multiplier,
              quantizer_factory.make_default_quantizer(
                  mode=cfg.default_interm_quantizer))

          set_output(
              qbn.internal_accumulator,
              quantizer_factory.make_default_quantizer(
                  mode=cfg.default_interm_quantizer))

          set_output(
              qbn.internal_output,
              quantizer_factory.make_default_quantizer(
                  mode=cfg.default_interm_quantizer))

          if keras_accumulator:
            set_output(
                qbn.internal_divide_quantizer,
                quantizer_factory.make_default_quantizer(
                    mode=keras_accumulator))

            set_output(
                qbn.internal_multiplier,
                quantizer_factory.make_default_quantizer(
                    mode=keras_accumulator))

            set_output(
                qbn.internal_accumulator,
                quantizer_factory.make_default_quantizer(
                    mode=keras_accumulator))

            set_output(
                qbn.internal_output.output,
                quantizer_factory.make_default_quantizer(
                    mode=keras_accumulator))

        gamma_range = None
        if hasattr(layer, "gamma_range"):
          gamma_range = layer.gamma_range

        beta_range = None
        if hasattr(layer, "beta_range"):
          beta_range = layer.beta_range

        if not layer.center:
          qbn.beta_quantizer = None

        if not layer.scale:
          qbn.gamma_quantizer = None

        layer_quantizer = qbn.internal_output.output
        output_quantizer = update_output_quantizer_in_graph(
            graph, node_id, quantizer_factory, layer_quantizer, for_reference)
        layer_data_type_map[layer] = {
            "input_quantizer_list": input_quantizer_list,
            "gamma_quantizer": gamma_quantizer,
            "beta_quantizer": beta_quantizer,
            "mean_quantizer": mean_quantizer,
            "variance_quantizer": variance_quantizer,
            "gamma_range": gamma_range,
            "beta_range": beta_range,
            "internal_divide_quantizer": qbn.internal_divide_quantizer,
            "internal_multiplier": qbn.internal_multiplier,
            "internal_accumulator": qbn.internal_accumulator,
            "output_quantizer": output_quantizer,
            "output_shapes": input_shape,
            "operation_count": operation_count
        }
    # If qdense, qconv, qpool, qoctave
    elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:
      (input_quantizer, _) = input_qe_list[0]

      if for_reference or not hasattr(layer, "get_quantizers"):
        # for_reference: force all quantizers to keras_quantizer
        weight_quantizer = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)
        bias_quantizer = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)

        if keras_quantizer:
          weight_quantizer = quantizer_factory.make_default_quantizer(
              mode=keras_quantizer)
          bias_quantizer = quantizer_factory.make_default_quantizer(
              mode=keras_quantizer)
      else:
        # qkeras layer
        qkeras_weight_quantizer = layer.get_quantizers()[0]
        qkeras_bias_quantizer = layer.get_quantizers()[1]

        if not quantizer_factory.is_quantizer_supported(
            qkeras_weight_quantizer):
          raise TagMissingError(
              "Unsupported weight quantizer {} on this layer: {}".format(
                  qkeras_weight_quantizer, layer))

        if not quantizer_factory.is_quantizer_supported(
            qkeras_bias_quantizer):
          raise TagMissingError(
              "Unsupported bias quantizer {} on this layer: {}".format(
                  qkeras_bias_quantizer, layer))

        weight_quantizer = quantizer_factory.make_quantizer(
            qkeras_weight_quantizer)
        bias_quantizer = quantizer_factory.make_quantizer(
            qkeras_bias_quantizer)

      # TODO(lishanok): During inference, if weight and bias is po2,
      #  need to update corresponding quantizer type with min and max
      #  of the constant values.
      if is_inference:
        weights = qtools_util.get_weights(
            layer, model_weights_already_quantized)
        if weight_quantizer.is_po2:
          weight_quantizer.update_inference_values(weights[0])

        if bias_quantizer.is_po2:
          bias_quantizer.update_inference_values(weights[1])

      multiplier_factory = quantized_operators.MultiplierFactory()
      multiplier = multiplier_factory.make_multiplier(
          weight_quantizer, input_quantizer)

      enable_bn_fusing = (
          hw_weight_dict is not None and hw_weight_dict.get(layer.name, None)
          and hw_weight_dict[layer.name].get("enable_bn_fusing", None))

      if enable_bn_fusing and qkeras_weight_quantizer:
        # When conv layer is fused wiht bn, multiplier bitwidth is ajusted by
        # kernel quantizer scale values (for auto_po2 type of quantizer only).
        # For conv layer without fusing, multiplier bitwidth is not adjusted
        # even if auto_po2 is used in quantizer. Instead, we directly adjusted
        # the accumulator and store that in fused_accumulator.
        qtools_util.adjust_multiplier_for_auto_po2(
            multiplier, qkeras_weight_quantizer)

      weights = layer.get_weights()
      kernel = weights[0]

      kernel_shape = kernel.shape
      # depthwise_kernel_shape = kernel_size + (input_dim, depth_multiplier)
      # When computing accumulator bitwidth for dw conv2d layer, we do not
      # need to count the last two dimensions
      if node_type in ["QDepthwiseConv2D", "DepthwiseConv2D"]:
        kernel_shape = kernel.shape[:-2] + (1, 1)

      kernel_accumulator_factory = quantized_operators.AccumulatorFactory()
      # Sets use_bias=False so that the accumulator doesn't account for bias
      # bitwdith.
      kernel_accumulator = kernel_accumulator_factory.make_accumulator(
          kernel_shape, multiplier, use_bias=False)

      if not layer.use_bias:
        bias_quantizer = None
        accumulator = kernel_accumulator
      else:
        # Add bias quantizer bitwidth to the overall accumulator
        bias_accumulator_instance = adder_factory.IAdder()
        accumulator = bias_accumulator_instance.make_quantizer(
            kernel_accumulator.output, bias_quantizer)
      if debug:
        print(layer.name or "None")
        print("weight_quantizer:", weight_quantizer.bits)
        print("input_quantizer:", input_quantizer.bits)
        print("multiplier_quantizer:", multiplier.output.bits)
        print("multiplier_gate_bits:", multiplier.gate_bits)
        print("accumulator:", accumulator.output.bits)

      if for_reference or not hasattr(layer, "get_quantizers"):
        accumulator.output = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)
        multiplier.output = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)

        if keras_accumulator:
          accumulator.output = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)
          multiplier.output = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)

      if enable_bn_fusing:
        bn_layer_name = hw_weight_dict[layer.name]["fused_bn_layer_name"]
        successor_ids = list(graph.successors(node_id))
        bn_layer = graph.nodes[successor_ids[0]]["layer"][0]
        assert bn_layer.name == bn_layer_name, (
            "Batchnorm layer in the graph has different name from hw_weight"
            f"_dict: {layer.name} vs {bn_layer_name}. Check both places to "
            "ensure they are matching.")

        # Add additional datatype for bn fused weights
        (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer,
         inverse_quantizer) = get_bn_quantizers(
             bn_layer, quantizer_factory, cfg, keras_quantizer, input_quantizer,
             is_inference, for_reference, model_weights_already_quantized)

        qkeras_inverse_quantizer = bn_layer.inverse_quantizer_internal
        fused_bn = FusedBNFactory()
        fused_bn.make_quantizer(
            prev_output_quantizer=kernel_accumulator.output,
            prev_bias_quantizer=bias_quantizer,
            beta_quantizer=beta_quantizer,
            mean_quantizer=mean_quantizer,
            inverse_quantizer=inverse_quantizer,
            use_beta=bn_layer.center,
            use_bias=layer.use_bias,
            qkeras_inverse_quantizer=qkeras_inverse_quantizer
        )
        if for_reference or not hasattr(layer, "get_quantizers"):
          fused_bn.internal_accumulator.output = (
              quantizer_factory.make_default_quantizer(
                  mode=cfg.default_interm_quantizer))
          if keras_accumulator:
            fused_bn.internal_accumulator.output = (
                quantizer_factory.make_default_quantizer(
                    mode=keras_accumulator))
          fused_bn.internal_output.output = fused_bn.internal_accumulator.output

        layer_quantizer = fused_bn.internal_accumulator.output
        output_quantizer = update_output_quantizer_in_graph(
            graph, node_id, quantizer_factory, layer_quantizer, for_reference)
        layer_data_type_map[layer] = {
            "input_quantizer_list": input_quantizer_list,
            "multiplier": multiplier,
            "accumulator": accumulator,
            "weight_quantizer": weight_quantizer,
            "w_shapes": w_shapes,
            "bias_quantizer": bias_quantizer,
            "b_shapes": b_shapes,
            "bn_inverse_quantizer": inverse_quantizer,
            "bn_mean_quantizer": mean_quantizer,
            "bn_beta_quantizer": beta_quantizer,
            "fused_accumulator": fused_bn.internal_accumulator,
            "output_quantizer": output_quantizer,
            "output_shapes": output_shapes,
            "operation_count": operation_count
        }
      else:
        # Correct accumulator bitwith with the scale values from
        # auto-po2 type of quantizers and store them in fused_accumulator.
        if (
            hasattr(qkeras_weight_quantizer, "__str__") and
            "quantized_bits" in qkeras_weight_quantizer.__str__() and
            qkeras_weight_quantizer.alpha == "auto_po2"):
          fused_accumulator = qtools_util.adjust_accumulator_for_auto_po2(
              layer, multiplier, qkeras_weight_quantizer, bias_quantizer)
        else:
          fused_accumulator = accumulator

        layer_quantizer = accumulator.output
        output_quantizer = update_output_quantizer_in_graph(
            graph, node_id, quantizer_factory, layer_quantizer, for_reference)

        layer_data_type_map[layer] = {
            "input_quantizer_list": input_quantizer_list,
            "multiplier": multiplier,
            "accumulator": accumulator,
            "weight_quantizer": weight_quantizer,
            "w_shapes": w_shapes,
            "bias_quantizer": bias_quantizer,
            "b_shapes": b_shapes,
            "fused_accumulator": fused_accumulator,
            "output_quantizer": output_quantizer,
            "output_shapes": output_shapes,
            "operation_count": operation_count
        }
    elif node_type in ["QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
      # Datatype for Folded Conv/DepthwiseConv layer
      # TODO(lishanok): Add additional support for Folded Dense layer
      (input_quantizer, _) = input_qe_list[0]
      if for_reference or not hasattr(layer, "get_quantizers"):
        # For_reference: force all quantizers to keras_quantizer.
        weight_quantizer = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)
        bias_quantizer = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)

        if keras_quantizer:
          weight_quantizer = quantizer_factory.make_default_quantizer(
              mode=keras_quantizer)
          bias_quantizer = quantizer_factory.make_default_quantizer(
              mode=keras_quantizer)
      else:
        # QKeras layer
        qkeras_weight_quantizer = layer.get_quantizers()[0]
        qkeras_bias_quantizer = layer.get_quantizers()[1]
        if not quantizer_factory.is_quantizer_supported(
            qkeras_weight_quantizer):
          raise TagMissingError(
              "Unsupported weight quantizer {} on this layer: {}".format(
                  qkeras_weight_quantizer, layer))

        if not quantizer_factory.is_quantizer_supported(
            qkeras_bias_quantizer):
          raise TagMissingError(
              "Unsupported bias quantizer {} on this layer: {}".format(
                  qkeras_bias_quantizer, layer))

        weight_quantizer = quantizer_factory.make_quantizer(
            qkeras_weight_quantizer)

        if qkeras_bias_quantizer:
          bias_quantizer = quantizer_factory.make_quantizer(
              qkeras_bias_quantizer)
        else:
          bias_quantizer = None

      # TODO(lishanok): During inference, if weight and bias is po2,
      #  need to update corresponding quantizer type with min and max
      #  of the constant values
      if is_inference:
        weights = qtools_util.get_weights(
            layer, model_weights_already_quantized)
        if weight_quantizer.is_po2:
          weight_quantizer.update_inference_values(weights[0])

        if bias_quantizer and bias_quantizer.is_po2:
          bias_quantizer.update_inference_values(weights[1])

      multiplier_factory = quantized_operators.MultiplierFactory()
      multiplier = multiplier_factory.make_multiplier(
          weight_quantizer, input_quantizer)
      if qkeras_weight_quantizer:
        qtools_util.adjust_multiplier_for_auto_po2(
            multiplier, qkeras_weight_quantizer)
      weights = layer.get_weights()
      kernel = weights[0]

      accumulator_factory = quantized_operators.AccumulatorFactory()
      accumulator = accumulator_factory.make_accumulator(
          kernel.shape, multiplier, use_bias=True if bias_quantizer else False)

      if not bias_quantizer:
        # Set bias the same as accumulator type.
        bias_quantizer = copy.deepcopy(accumulator.output)
        if not accumulator.output.is_floating_point:
          # For fixed point accumulator, needs to add 1 to its bits to avoid
          # possible satuation.
          accumulator.output.bits += 1
          accumulator.output.int_bits += 1
      if for_reference or not hasattr(layer, "get_quantizers"):
        accumulator.output = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)
        multiplier.output = quantizer_factory.make_default_quantizer(
            mode=cfg.default_interm_quantizer)

        if keras_accumulator:
          accumulator.output = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)
          multiplier.output = quantizer_factory.make_default_quantizer(
              mode=keras_accumulator)

      layer_quantizer = accumulator.output
      output_quantizer = update_output_quantizer_in_graph(
          graph, node_id, quantizer_factory, layer_quantizer, for_reference)

      layer_data_type_map[layer] = LayerDataType(
          input_quantizer_list,
          multiplier,
          accumulator,
          weight_quantizer,
          w_shapes,
          bias_quantizer,
          b_shapes,
          output_quantizer,
          output_shapes,
          operation_count
      )

    elif node_type:
      # Any other unsupported layer types -> pass the input quantizer
      # type to output in qraph
      print(f"[WARNING] QTools cannot parse {node_type}. The input quatnizer"
            " of this layer is directly passed through to the output!",
            file=sys.stderr)

      (input_quantizer, _) = input_qe_list[0]

      if for_reference and keras_accumulator and not is_input_layer:
        input_quantizer = quantizer_factory.make_default_quantizer(
            mode=keras_accumulator)

      output_quantizer = update_output_quantizer_in_graph(
          graph, node_id, quantizer_factory, input_quantizer, for_reference)

      layer_data_type_map[layer] = LayerDataType(input_quantizer_list, None,
                                                 None, None, None, None, None,
                                                 output_quantizer,
                                                 output_shapes, operation_count)

  result = {
      "source_quantizer_list": source_quantizer_list,
      "output_layers": output_layers,
      "input_layers": input_layers,
      "layer_data_type_map": layer_data_type_map
  }

  return result


================================================
FILE: qkeras/qtools/interface.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""I/O implementation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections

from qkeras.qtools import generate_layer_data_type_map
from qkeras.qtools import qtools_util


def print_qstats(graph):
  """Prints quantization statistics for the model."""

  layer_data_type_map = generate_layer_data_type_map(graph)

  multipliers_counter = collections.Counter()

  print("")
  print("Number of operations in model:")
  for layer, data_type in layer_data_type_map.items():
    multiplier = data_type.multiplier
    multiplier_detail_str = "{}_{}, total_bits:{}, int_bits:{}".format(
        "signed" if multiplier.output.is_signed == 1 else "unsigned",
        multiplier.implemented_as(),
        multiplier.output.bits,
        multiplier.output.int_bits,
    )

    print("{}: {} x {}".format(
        layer.name,
        data_type.operation_count,
        multiplier_detail_str,
    ))

    multipliers_counter[
        multiplier_detail_str] += data_type.operation_count

  print("")
  print("Number of operation types in model:")
  for (multiplier_detail_str,
       total_multiplier_operation_count) in multipliers_counter.items():
    print("{}, x {}".format(multiplier_detail_str,
                            total_multiplier_operation_count))


def populate_quantizer(quantizer, shape=None, implemented_as=None):
  """write all the needed fields in the quantizer to dictionary."""

  mydict = collections.OrderedDict()

  if quantizer is not None:
    mydict["quantizer_type"] = quantizer.name

    # floats
    if quantizer.is_floating_point:
      mydict["bits"] = quantizer.bits

    # po2
    elif quantizer.is_po2:
      mydict["bits"] = quantizer.bits
      mydict["is_signed"] = quantizer.is_signed
      mydict["max_value"] = quantizer.max_val_po2

    # binary
    elif quantizer.mode in [3, 4]:

      mydict["bits"] = quantizer.bits
      mydict["int_bits"] = quantizer.int_bits
      mydict["is_signed"] = quantizer.is_signed
      if quantizer.mode == 4:
        mydict["values"] = [0, 1]
      else:
        mydict["values"] = [-1, 1]

    # ternary(-1, 0, 1)
    elif quantizer.mode == 2:
      mydict["bits"] = 2
      mydict["int_bits"] = 2
      mydict["is_signed"] = 1
      mydict["values"] = [0, -1, 1]

    # quantized_bits
    elif quantizer.mode == 0:
      mydict["bits"] = quantizer.bits
      mydict["int_bits"] = quantizer.int_bits + quantizer.is_signed
      mydict["is_signed"] = quantizer.is_signed

    if shape is not None:
      if isinstance(shape, tuple) and shape[0] is None:
        shape = list(shape)
        shape[0] = -1
        mydict["shape"] = tuple(shape)
      else:
        mydict["shape"] = shape

    if implemented_as is not None:
      mydict["op_type"] = implemented_as
  return mydict


def map_to_json(mydict):
  """write the dictionary to json format."""

  source_quantizer_list = mydict["source_quantizer_list"]
  layer_data_type_map = mydict["layer_data_type_map"]

  output_dict = collections.OrderedDict()

  q_list = []
  for source_quantizer in source_quantizer_list:
    tmp = populate_quantizer(source_quantizer)
    q_list.append(tmp)

  if bool(q_list):
    output_dict["source_quantizers"] = q_list

  def set_layer_item(layer_item, key, feature, shape=None,
                     is_compound_datatype=False, output_key_name=None):
    """Generates the quantizer entry to a given layer_item.

    This function extracts relevanant quantizer fields using the key (
    quantizer name) from a given feature (layer entry from layer_data_type_map).

    Args:
      layer_item: Layer entry in the output dictionary. It includes the
        info such as quantizers, output shape, etc. of each layer
      key: Quantizer, such as kernel/bias quantizer, etc. If feature
      feature: layer_data_type_map entry of each layer. This feature will be
        parsed and converted to layer_item for the output dictionary.
      shape: quantizer input shape
      is_compound_datatype: Bool. Wether the quantizer is a compound
        or unitary quantizer type. For example, kernel quantizer and bias
        quantizer are unitary quantizer types, multiplier and accumulator
        are compound quantizer types.
      output_key_name: str. Change key to output_key_name in layer_item. If
        None, will use the existing key.

    Return:
      None
    """
    val = qtools_util.get_val(feature, key)
    if val is not None:
      quantizer = val
      implemented_as = None
      if is_compound_datatype:
        quantizer = val.output
        implemented_as = val.implemented_as()
      if output_key_name is None:
        key_name = key
      else:
        key_name = output_key_name
      tmp = populate_quantizer(
          quantizer, shape=shape, implemented_as=implemented_as)
      if bool(tmp):
        layer_item[key_name] = tmp

  for layer, feature in layer_data_type_map.items():
    layer_item = collections.OrderedDict()
    layer_item["layer_type"] = layer.__class__.__name__
    layer_item["input_quantizer_list"] = [
        populate_quantizer(q) for q in qtools_util.get_val(
            feature, "input_quantizer_list")]

    set_layer_item(layer_item, key="output_quantizer", feature=feature,
                   shape=qtools_util.get_val(feature, "output_shapes"))

    if layer_item["layer_type"] in [
        "QBatchNormalization", "BatchNormalization"]:

      for key in ["gamma_quantizer", "beta_quantizer", "mean_quantizer",
                  "variance_quantizer", "variance_quantizer"]:
        set_layer_item(layer_item, key=key, feature=feature)

      for key in ["internal_divide_quantizer",
                  "internal_multiplier", "internal_accumulator"]:
        set_layer_item(layer_item, key=key, feature=feature,
                       is_compound_datatype=True)

    elif layer_item["layer_type"] in [
        "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D",
        "GlobalAveragePooling2D", "QAveragePooling2D",
        "QGlobalAveragePooling2D"]:
      set_layer_item(layer_item, key="average_quantizer", feature=feature)
      for key in ["pool_sum_accumulator", "pool_avg_multiplier"]:
        set_layer_item(layer_item, key=key, feature=feature,
                       is_compound_datatype=True)

    else:
      # populate the feature to dictionary
      set_layer_item(layer_item, key="weight_quantizer", feature=feature,
                     shape=qtools_util.get_val(feature, "w_shapes"))
      set_layer_item(layer_item, key="bias_quantizer", feature=feature,
                     shape=qtools_util.get_val(feature, "b_shapes"))

      output_key_name = None
      if qtools_util.is_merge_layers(layer):
        output_key_name = layer.__class__.__name__ + "_quantizer"
      set_layer_item(layer_item, key="multiplier", feature=feature,
                     is_compound_datatype=True,
                     output_key_name=output_key_name)
      set_layer_item(layer_item, key="accumulator", feature=feature,
                     is_compound_datatype=True)

      if qtools_util.get_val(feature, "fused_accumulator"):
        # Add fused weights to the dictionary
        for key in ["bn_beta_quantizer", "bn_mean_quantizer",
                    "bn_inverse_quantizer"]:
          set_layer_item(layer_item, key=key, feature=feature)

        set_layer_item(layer_item, key="fused_accumulator", feature=feature,
                       is_compound_datatype=True)

    layer_item["operation_count"] = qtools_util.get_val(
        feature, "operation_count")
    output_dict[layer.name] = layer_item

  return output_dict


================================================
FILE: qkeras/qtools/qenergy/__init__.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Export qenergy package."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from .qenergy import energy_estimate


================================================
FILE: qkeras/qtools/qenergy/qenergy.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Calculate energy consumption of a given quantized model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

from qkeras.qtools.generate_layer_data_type_map import KERAS_LAYERS
from qkeras.qtools.generate_layer_data_type_map import QKERAS_LAYERS
from qkeras.qtools.quantized_operators.quantizer_impl import IQuantizer
from qkeras.qtools.settings import cfg
from qkeras.qtools import qtools_util


# Model based on：
#   Mark Horowitz, Computing’s Energy Problem (and what we can
#   do about it). IEEE ISSCC, pp. 10–14, 2014
#   www.youtube.com/watch?v=eZdOkDtYMoo&feature=youtu.be&t=497

# all metrics converted to pJ/bit

OP = {
    "fp32": {
        "add": lambda x: max(cfg.fp32_add(x), 0),
        "mul": lambda x: max(cfg.fp32_mul(x), 0)
    },
    "fp16": {
        "add": lambda x: max(cfg.fp16_add(x), 0),
        "mul": lambda x: max(cfg.fp16_mul(x), 0)
    },
    "fpm": {
        "add": lambda x: max(cfg.fpm_add(x), 0),
        "mux": lambda x: max(cfg.fpm_add(x), 0),
        "xor": lambda x: max(cfg.fpm_add(x), 0),
        "and": lambda x: max(cfg.fpm_add(x), 0),
        "or": lambda x: max(cfg.fpm_add(x), 0),
        "shifter": lambda x: max(cfg.fpm_add(x), 0),
        "mul": lambda x: max(cfg.fpm_mul(x), 0)
    },
    "sram": {"rd": lambda x: max(cfg.sram_rd(x), 0),
             "wr": lambda x: max(cfg.sram_rd(x), 0),
             "mul_factor": cfg.sram_mul_factor},
    "dram": {"rd": lambda x: max(cfg.dram_rd(x), 0),
             "wr": lambda x: max(cfg.dram_rd(x), 0),
             "mul_factor": cfg.dram_mul_factor}
}


def get_op_type(quantizer):
  assert isinstance(quantizer, IQuantizer)

  if quantizer.is_floating_point:
    return "fp" + str(quantizer.bits)
  else:
    return "fpm"


def memory_read_energy(is_input_layer, tensor_shape, mode, min_sram_size,
                       rd_wr_on_io, quantizer_bits, is_tensor=True):
  """compute energy to bring tensors from DRAM to SRAM."""

  if is_input_layer:
    if rd_wr_on_io:
      mode = "dram"
    else:
      mode = "sram"

  energy_mem = 0

  if is_tensor:
    tensor_shape = tensor_shape[1:]

  total_bits = np.prod(tensor_shape) * quantizer_bits
  total_bits_log2 = np.log2(max(total_bits, min_sram_size))

  if mode == "dram":
    # load input from dram; wx_sizes[1]-> input x quantizer bits
    # total_bits * 20
    energy_mem += OP["dram"]["rd"](total_bits)
    if rd_wr_on_io:
      # write input to sram
      # total_bits * sqrt(data_size/2^18)*0.3125
      # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape))
      # energy_mem += OP["sram"]["wr"](bits1)
      energy_mem += (
          np.ceil(total_bits * OP["sram"]["mul_factor"]) *
          OP["sram"]["wr"](total_bits_log2)
      )
  elif mode == "sram":
    # read input from sram
    # total_bits * sqrt(data_size/2^18)*0.3125
    # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape))
    # energy_mem += OP["sram"]["rd"](bits1)
    energy_mem += (
        np.ceil(total_bits * OP["sram"]["mul_factor"]) *
        OP["sram"]["rd"](total_bits_log2)
    )

  return energy_mem


def parameter_read_energy(
    layer, layer_item, weights_on_memory, min_sram_size, rd_wr_on_io):
  """read weights/bias from memory."""

  node_type = layer.__class__.__name__
  rd_energy = 0
  if node_type in ["QBatchNormalization", "BatchNormalization"]:
    gamma_quantizer = layer_item["gamma_quantizer"]
    beta_quantizer = layer_item["beta_quantizer"]
    mean_quantizer = layer_item["mean_quantizer"]
    variance_quantizer = layer_item["variance_quantizer"]

    # gamma, beta, mean, stddev
    weights = layer.get_weights()
    s = len(weights[0])
    for q in [gamma_quantizer, beta_quantizer, mean_quantizer,
              variance_quantizer]:
      if q:
        rd_energy += memory_read_energy(
            False, (s), weights_on_memory, min_sram_size, rd_wr_on_io,
            q.bits, is_tensor=False)

  elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:
    weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer")
    w_shapes = qtools_util.get_val(layer_item, "w_shapes")
    bias_quantizer = qtools_util.get_val(layer_item, "bias_quantizer")
    b_shapes = qtools_util.get_val(layer_item, "b_shapes")

    rd_energy += memory_read_energy(
        False, w_shapes, weights_on_memory, min_sram_size, rd_wr_on_io,
        weight_quantizer.bits, is_tensor=False
    )

    if bias_quantizer:
      # if use_bias=0, no bias
      bias_shapes = (b_shapes)
      rd_energy += memory_read_energy(
          False, bias_shapes, weights_on_memory, min_sram_size, rd_wr_on_io,
          bias_quantizer.bits, is_tensor=False
      )

  return rd_energy


def memory_write_energy(is_output_layer, tensor_shape, mode, min_sram_size,
                        rd_wr_on_io, quantizer_bits):
  """compute energy to bring tensors from SRAM to DRAM."""
  if is_output_layer:
    if rd_wr_on_io:
      mode = "dram"
    else:
      mode = "sram"

  energy_mem = 0

  tensor_shape = tensor_shape[1:]

  total_bits = np.prod(tensor_shape) * quantizer_bits
  total_bits_log2 = np.log2(max(total_bits, min_sram_size))

  if mode == "dram":
    # load input from dram; wx_sizes[1]-> input x quantizer bits
    if rd_wr_on_io:
      # read input from sram
      # total_bits * sqrt(data_size/2^18)*0.3125
      # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape))
      # energy_mem += OP["sram"]["rd"](bits1)
      energy_mem += (
          np.ceil(total_bits * OP["sram"]["mul_factor"]) *
          OP["sram"]["rd"](total_bits_log2)
      )
    # write output to dram
    energy_mem += OP["dram"]["wr"](total_bits)

  elif mode == "sram":
    # write to sram
    # total_bits * sqrt(data_size/2^18)*0.3125
    # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape))
    # energy_mem +=  OP["sram"]["wr"](bits1)
    energy_mem += (
        np.ceil(total_bits * OP["sram"]["mul_factor"]) *
        OP["sram"]["wr"](total_bits_log2)
    )

  return energy_mem


def energy_estimate(model, layer_map, weights_on_memory,
                    activations_on_memory, min_sram_size,
                    rd_wr_on_io):
  """estimate energy."""

  output_layers = layer_map["output_layers"]
  input_layers = layer_map["input_layers"]
  layer_data_type_map = layer_map["layer_data_type_map"]

  result = {}
  total_energy = 0

  # compute MAC and memory access energy for intermediate layers
  for layer in model.layers:
    if layer not in layer_data_type_map.keys():
      continue

    layer_item = layer_data_type_map[layer]
    input_quantizer_list = qtools_util.get_val(
        layer_item, "input_quantizer_list")
    operation_count = qtools_util.get_val(layer_item, "operation_count")
    output_shapes = qtools_util.get_val(layer_item, "output_shapes")
    output_quantizer = qtools_util.get_val(layer_item, "output_quantizer")

    is_input_layer = layer in input_layers
    is_output_layer = layer in output_layers

    input_rd_energy = 0
    energy_op = 0
    input_shape = layer.input_shape
    if not isinstance(input_shape, list):
      input_shape = [input_shape]

    for (input_shape, input_quantizer) in zip(
        input_shape, input_quantizer_list):
      input_rd_energy += memory_read_energy(
          is_input_layer, input_shape,
          activations_on_memory, min_sram_size, rd_wr_on_io,
          input_quantizer.bits)

    parameter_rd_energy = parameter_read_energy(
        layer, layer_item, weights_on_memory, min_sram_size,
        rd_wr_on_io)

    output_wr_energy = memory_write_energy(
        is_output_layer, output_shapes,
        activations_on_memory, min_sram_size, rd_wr_on_io,
        output_quantizer.bits)
    # QActivation Layer
    if layer.__class__.__name__ in ["QActivation", "QAdaptiveActivation",
                                    "Activation"]:
      pass

    # QBN Layer
    elif layer.__class__.__name__ in [
        "QBatchNormalization", "BatchNormalization"]:
      # assume QBN is embedded with conv/dense layers
      # -> no memory read/write cost

      divider = layer_item["internal_divide_quantizer"]
      if divider:
        gate_factor = divider.gate_factor
        mode = divider.implemented_as()
        energy_op += gate_factor * OP[
            get_op_type(divider.output)][mode](divider.gate_bits)

      multiplier = layer_item["internal_multiplier"]
      if multiplier:
        gate_factor = multiplier.gate_factor
        mode = multiplier.implemented_as()
        energy_op += gate_factor * OP[
            get_op_type(multiplier.output)][mode](multiplier.gate_bits)

      energy_op *= operation_count

    # Merge layer
    elif layer.__class__.__name__ in ["Add", "Multiply", "Subtract"]:

      # multiply or add operation energy
      # TODO(lishanok): check energy for concatenate
      merge_quantizer = qtools_util.get_val(layer_item, "multiplier")
      mode = merge_quantizer.implemented_as()
      number_of_inputs = len(qtools_util.get_val(
          layer_item, "input_quantizer_list"))
      gate_factor = merge_quantizer.gate_factor

      q = get_op_type(merge_quantizer.output)
      b = merge_quantizer.gate_bits
      energy_op = (number_of_inputs - 1) * operation_count * gate_factor * OP[
          q][mode](b)

    # AveragePooling and GlobalAveragePooling
    elif layer.__class__.__name__ in [
        "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D",
        "GlobalAveragePooling2D"]:
      # accumulation operation energy
      accumulator = qtools_util.get_val(layer_item, "accumulator")
      add_energy = OP[get_op_type(accumulator.output)]["add"](
          accumulator.output.bits)
      energy_op = operation_count * add_energy

    # MAC energy calculation
    elif layer.__class__.__name__ in ["QConv2D", "QConv1D", "QDepthwiseConv2D",
                                      "QDense", "Conv2D", "Conv1D",
                                      "DepthwiseConv2D", "Dense"]:
      multiplier = qtools_util.get_val(layer_item, "multiplier")
      accumulator = qtools_util.get_val(layer_item, "accumulator")

      # implementation mode: xor/andgate/shift etc.
      mode = multiplier.implemented_as()
      gate_factor = multiplier.gate_factor

      op = get_op_type(multiplier.output)
      bits = multiplier.gate_bits
      c1 = gate_factor * OP[op][mode](bits)
      c2 = OP[get_op_type(accumulator.output)]["add"](accumulator.output.bits)
      energy_op = operation_count * (c1 + c2)

    else:
      pass

    result[layer.name] = {
        "class_name": layer.__class__.__name__,
        "energy": {
            "inputs": float("{0:.2f}".format(input_rd_energy)),
            "outputs": float("{0:.2f}".format(output_wr_energy)),
            "parameters": float("{0:.2f}".format(parameter_rd_energy)),
            "op_cost": float("{0:.2f}".format(energy_op))
        }
    }
    total_energy += (input_rd_energy + output_wr_energy +
                     parameter_rd_energy + energy_op)

  result["total_cost"] = int(total_energy)

  return result


================================================
FILE: qkeras/qtools/qgraph.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Creates networkx graph from a model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging

import networkx as nx
import tensorflow.keras.backend as K

from tensorflow.keras.layers import InputLayer
from qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module
from qkeras.qtools.settings import cfg

SOURCE = -1
SINK = -2


class WrongInputQuantizerError(ValueError):
  pass


def GraphRemoveNode(graph, v):
  """Removes node "v" from u -> v -> w, connecting u -> w."""

  incoming = [u for u in graph.predecessors(v) if u != v]
  outgoing = [w for w in graph.successors(v) if w != v]

  # add incoming edges
  for u in incoming:
    for w in outgoing:
      in_attr = graph[u][v]
      out_attr = graph[v][w]

      assert list(in_attr["shape"]) == list(out_attr["shape"])

      graph.add_edges_from([(u, w, out_attr)])

  graph.remove_node(v)


def GraphRemoveNodeWithNodeType(graph, node_type):
  """Removes node with attribute node_type, reconnecting network."""

  nodes_to_remove = [v for v in graph.nodes
                     if graph.nodes[v]["type"][-1] == node_type]

  for v in nodes_to_remove:

    GraphRemoveNode(graph, v)


def  GraphAddHiddenInputLayer(model, graph, input_quantizer_map):
  """For Keras Sequential model api, input layer is hidden. Need to add it."""

  node_id = -1
  for (u, _) in graph.nodes.items():
    if u >= node_id:
      node_id = u
    if u == SOURCE or u == SINK:
      continue

    if graph.nodes[u]["type"][-1] == "InputLayer":
      return

  # determine a node id for the newly added input layer
  node_id += 1

  # find the first layer of the sequential model
  first_layer_nodes = []
  for u in graph.nodes:
    if u == SOURCE or u == SINK:
      continue
    predecessors = list(graph.predecessors(u))
     # find the first layer which doesn't have a parent
    if not predecessors:
      first_layer_nodes.append(u)
  assert len(first_layer_nodes) == 1
  # since it is a sequential model, there is only one first layer
  v_id = first_layer_nodes[0]

  # create a input layer node
  node_type = "InputLayer"
  input_shape = model.layers[0].input_shape
  layer = InputLayer(input_shape=input_shape[1:])
  o_shape = input_shape
  node = (node_id, {"layer": [layer], "type": [node_type],
                    "out_quantizer": None})
  graph.add_nodes_from([node])

  # insert input_quantizers on the edge between input layer and its next layer
  for (a, _) in input_quantizer_map.items():
    edge = (node_id, v_id, {
        "shape": [o_shape], "tensor": a,
        "quantizer": input_quantizer_map[a]})

  graph.add_edges_from([edge])


def GraphAddSingleSourceSingleSink(graph):

  """Connects graph to source and sink nodes."""

  edge_list = []

  for u in graph.nodes:

    if u == SOURCE or u == SINK:
      continue

    if graph.nodes[u]["type"][-1] == "InputLayer":
      # If the layer has multiple nodes, you can use get_output_at(node_index)
      tensor = graph.nodes[u]["layer"][-1].get_output_at(0)
      # if tf 1.0+, we can do tensor.shape with the same effect
      shape = tuple(tensor.get_shape().as_list())
      shape = [shape]

      edge_list.append((SOURCE, u, {
          "shape": shape, "tensor": tensor, "quantizer": None}))

    if graph.out_degree(u) == 0:
      tensor = graph.nodes[u]["layer"][-1].get_output_at(0)
      shape = tensor.shape

      edge_list.append((u, SINK, {
          "shape": shape, "tensor": tensor, "quantizer": None}))

  graph.add_edges_from(edge_list)


def GenerateInputQuantizerList(input_quantizers,
                               inputs_length,
                               default_source_quantizer):
  """Generates the list of input quantizers."""
  # generate a list of input quantizers
  input_quantizer_list = []
  quantizer_factory = quantizer_factory_module.QuantizerFactory()
  if input_quantizers is None:
    logging.warning(
        "************ SOURCE has no quantizer type."
        " Use default quantizer instead")

    for _ in range(inputs_length):
      input_quantizer_list.append(
          quantizer_factory.make_default_quantizer(
              mode=default_source_quantizer))
  else:
    if inputs_length == len(input_quantizers):
      for quantizer in input_quantizers:
        input_quantizer_list.append(quantizer_factory.make_quantizer(
            quantizer))
    # pass a single quantizer which will be used for all q list.
    elif not isinstance(input_quantizers, list):
      for _ in range(inputs_length):
        input_quantizer_list.append(quantizer_factory.make_quantizer(
            input_quantizers))
    else:
      raise WrongInputQuantizerError(
          "ERROR: Numer of input (%d) must be the same as number of source"
          " quantizers (%d)"%(inputs_length, len(input_quantizers)))

  return input_quantizer_list


def AddToNodeDict(layer_items,
                  layer,
                  nodes_dict):
  """Adds layer to a node_dict, indexed by layer.(input or output).ref"""
  i_list = layer_items
  if not isinstance(layer_items, list):
    i_list = [i_list.ref()]
  else:
    i_list = [tmp.ref() for tmp in i_list]

  for i in i_list:
    # dict: tensor -> layers have this tensor as input
    if i not in nodes_dict.keys():
      nodes_dict[i] = [layer]
    else:
      nodes_dict[i].append(layer)


def GenerateGraphFromModel(model,
                           input_quantizers,
                           default_source_quantizer):
  """Generates single source, single sink graph from model."""

  # node represents layers with attributes [layer, type(class_name)]
  # edge represents the tensor flowing between two layers,
  # attributes is [tensor, output_shape, QA(activation quantizer]

  # input_quantizers are tagged on the edge between input
  # layer and the following layer

  # generate a list of input quantizers
  input_quantizer_list = GenerateInputQuantizerList(input_quantizers,
                                                    len(model.inputs),
                                                    default_source_quantizer)

  # dict that map input_tensor to its quantizer
  input_quantizer_map = {}
  for (idx, tensor) in enumerate(model.inputs):
    input_quantizer_map[tensor.ref()] = input_quantizer_list[idx]

  graph = nx.DiGraph()

  source = SOURCE
  sink = SINK

  node_list = [
      (source, {"layer": [None], "type": [None], "out_quantizer": None}),
      (sink, {"layer": [None], "type": [None], "out_quantizer": None})
  ]

  for i, layer in enumerate(model.layers):

    node_type = layer.__class__.__name__

    node = (i, {"layer": [layer], "type": [node_type], "out_quantizer": None})
    node_list.append(node)

  node_dict = {layer: i for i, layer in enumerate(model.layers)}

  graph.add_nodes_from(node_list)

  # nodes = tensors
  in_nodes = {}
  out_nodes = {}
  for layer in model.layers:
    AddToNodeDict(layer.input, layer, in_nodes)
    AddToNodeDict(layer.output, layer, out_nodes)

  # union of all tensors; non-redundant
  attr_set = set(in_nodes.keys()) | set(out_nodes.keys())

  # add edges. we want edges annotated with tensors and shapes
  edge_list = []

  for a in attr_set:
    # for a given tensor a, find the layer u that outputs this tensor
    # and the layer v that has this tensor as input
    u_list = out_nodes.get(a, [None])
    v_list = in_nodes.get(a, [None])

    for u in u_list:
      for v in v_list:
        if not u or not v:
          continue

        o_shape = u.output_shape

        # layer -> layer_id
        u_id = node_dict[u]
        v_id = node_dict[v]

        # insert input_quantizers on the edge between
        # input layer and its next layer
        if a in input_quantizer_map.keys():
          edge_list.append((u_id, v_id, {
              "shape": o_shape, "tensor": a,
              "quantizer": input_quantizer_map[a]}))
        else:
          edge_list.append((u_id, v_id, {
              "shape": o_shape, "tensor": a,
              "quantizer": None}))

  graph.add_edges_from(edge_list)
  GraphAddHiddenInputLayer(model, graph, input_quantizer_map)

  return (graph, input_quantizer_list)


def GraphGetInputs(graph):

  """Returns edges SOURCE->u that are inputs."""

  successors = list(graph.successors(SOURCE))

  input_tensors = []

  for u in successors:

    if u == SOURCE or u == SINK:
      continue

    input_tensors.append(graph[SOURCE][u])

  return input_tensors


def GraphGetOutputs(graph):

  """Returns edges u->SINK that are outputs."""

  predecessors = list(graph.predecessors(SINK))

  output_tensors = []

  for u in predecessors:

    if u == SOURCE or u == SINK:
      continue

    output_tensors.append(graph[u][SINK])

  return output_tensors


def GraphPropagateActivationsToEdges(graph, debug=False):
  """Traverses graph and move activations to edges.

  1.If current dense/conv layer is specified with QA:
    outgoing edge (output data type) will be QA type
  2.If current dense/conv layer has no QA:
    default type (float32) is used as output
  3.If current layer is QA layer:
    float32 is used by default as output type on the edge

  Args:
    graph: graph to inject activations to.
    debug: debug mode

  Returns:
    None
  """

  scheduler = list(nx.topological_sort(graph))

  for vertex in scheduler[1:-1]:
    # get rid of source and sink vertex
    if debug:
      print("########### GraphPropagateActivationsToEdges ############")
      print("vertex:", vertex)

    for u, v in graph.edges(vertex):
      # u=vertex, v: outgoing edge vertex

      if debug:
        print("  outgoing ->", v, graph.nodes[v]["layer"][0].name)

      layer = graph.nodes[u]["layer"][0]
      result = None
      # if current layer has no QA specified
      if not hasattr(layer, "activation"):
        result = None
      else:
        activation_name = layer.activation.__name__ if hasattr(
            layer.activation, "__name__") else None
        q_activation_class_name = layer.activation.__class__.__name__ if hasattr(
            layer.activation, "__class__") else None

        if debug:
          print("  layer type:", layer.__class__.__name__)
          print("  activation object:", layer.activation)
          print("  activation_name:", activation_name)
          print("  q_activation_class_name:", q_activation_class_name)

        # if current layer is QA
        if (graph.nodes[u]["type"][0] in ["QActivation"] or
            graph.nodes[u]["type"][0] in ["QAdaptiveActivation"]):
          result = layer.quantizer

        # if current layer is not QA layer but has QA specified within
        elif hasattr(layer, "activation"):
          if activation_name == "linear":
            result = None
          else:
            result = layer.activation

      if debug:
        print("  {}->{}: {}".format(u, v, result))

      graph[u][v]["quantizer"] = result
      # all edge_quantizer is the same for all edges starting
      # from current vertex to different nodes
      graph.nodes[vertex]["out_quantizer"] = result


def PrintGraph(graph, msg=""):
  """Print graph structure."""

  print()
  print(msg)
  print()
  print("nodes:",
        [(u, graph.nodes[u]["layer"][
            0].name if graph.nodes[u]["layer"][0] is not None else "",
          graph.nodes[u]["type"]) for u in graph.nodes])
  print()
  print("edges:",
        [(u, v, graph[u][v]["shape"],
          graph[u][v]["quantizer"]) for u, v in graph.edges])


def CreateGraph(model, input_quantizers=None,
                default_source_quantizer=cfg.default_source_quantizer,
                debug=False):
  """create graph."""

  K.set_image_data_format("channels_last")

  (graph, source_quantizer_list) = GenerateGraphFromModel(
      model, input_quantizers, default_source_quantizer)
  GraphAddSingleSourceSingleSink(graph)
  GraphRemoveNodeWithNodeType(graph, "Dropout")
  GraphRemoveNodeWithNodeType(graph, "InputLayer")

  scheduler = list(nx.topological_sort(graph))

  if debug:
    for vertex in scheduler[1:-1]:
      for _, v in graph.edges(vertex):
        if v == SINK:
          continue
        print("... calling", graph.nodes[v][
            "layer"][0].name, graph.nodes[v]["type"])

  return (graph, source_quantizer_list)


def GraphUpdateEdge(graph, node_id, quantizer_on_edge):
  """update the graph edges outgoing from node_id with new quantizer."""

  for u, v in graph.edges(node_id):
    graph[u][v]["quantizer"] = quantizer_on_edge


================================================
FILE: qkeras/qtools/qtools_util.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""utility functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
import sys
import numpy as np
import tensorflow.keras.backend as K
import tensorflow as tf
from qkeras.qtools import quantized_operators


def get_val(feature, key, default_val=None):
  # Return feature[key] or feature.key
  if isinstance(feature, dict):
    val = feature.get(key, default_val)
  else:
    val = getattr(feature, key, default_val)
  return val


def is_shape_alternation_layers(layer):
  lname = layer.__class__.__name__
  if lname:
    return "MaxPool" in lname or "Reshape" in lname or "Flatten" in lname
  return False


def is_merge_layers(layer):

  if layer.__class__.__name__ in [
      "Add", "Multiply", "Subtract", "Average", "Maximum", "Minimum",
      "Concatenate", "Dot"]:
    return True
  else:
    return False


def get_input_quantizers(graph, node_id, quantizer_factory, debug=False):
  """get the current layer's input quantizer."""

  # in merge layers, therea are more than 1 input

  output = []
  for parent_node_id in graph.predecessors(node_id):

    edge = graph.edges[(parent_node_id, node_id)]

    if debug:
      print("parent_node_id:", parent_node_id)
      print(edge)

    quantizer_on_edge = edge["quantizer"]
    input_quantizer = quantizer_factory.make_quantizer(quantizer_on_edge)

    output.append((input_quantizer, edge))

  return output


def get_input_quantizers_advanced(graph, node_id,
                                  is_input_layer, quantizer_factory,
                                  cfg, debug=False):
  """get input quantizer, deal with keras layer or lack of input quantizer in qkeras layer."""

  # in merge layers, therea are more than 1 input
  default_source_quantizer = cfg.default_source_quantizer
  default_interm_quantizer = cfg.default_interm_quantizer

  output = []
  for parent_node_id in graph.predecessors(node_id):

    edge = graph.edges[(parent_node_id, node_id)]

    if debug:
      print("parent_node_id:", parent_node_id)
      print(edge)

    quantizer_on_edge = edge["quantizer"]
    input_quantizer = quantizer_factory.make_quantizer(quantizer_on_edge)

    if is_input_layer and not input_quantizer:
      # input layer without input_quantizer specified
      #   ->use default_source_quantizer
      input_quantizer = quantizer_factory.make_default_quantizer(
          mode=default_source_quantizer)
    elif not input_quantizer:
      # if no input quantizer is available
      #   -> use default quantizer from config.json
      input_quantizer = quantizer_factory.make_default_quantizer(
          mode=default_interm_quantizer)

    output.append((input_quantizer, edge))

  return output


def get_operation_count(layer, input_shape):
  """Determines number of multiplier operations in a qkeras layer."""

  # Check if the inputs are a list of Dimensions
  if isinstance(input_shape, list):
    input_shape = input_shape[0]

  operation_count = 0

  if is_merge_layers(layer) or is_shape_alternation_layers(layer):
    operation_count = np.prod(input_shape[1:])

  elif layer.__class__.__name__ in [
      "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D",
      "GlobalAveragePooling2D", "QGlobalAveragePooling2D"
  ]:

    if hasattr(layer, "pool_size"):
      pool_size = layer.pool_size
    else:
      pool_size = input_shape[1:-1]
    add_ops = np.prod(pool_size)

    output_shape = layer.compute_output_shape(input_shape)
    channels_o = output_shape[-1]

    # total number of add ops
    operation_count = channels_o * add_ops

  elif "UpSampling" in layer.__class__.__name__:
    # UpSampling1D/2D/3D
    output_shape = layer.compute_output_shape(input_shape)
    operation_count = np.prod(output_shape[1:])

  elif ("Activation" in layer.__class__.__name__ or
        "BatchNormalization" in layer.__class__.__name__):
    operation_count = np.prod(input_shape[1:])

  elif layer.__class__.__name__ in [
      "QConv2D", "Conv2D", "QConv2DBatchnorm",
      "QConv2DTranspose", "Conv2DTranspose"]:

    output_shape = layer.compute_output_shape(input_shape)
    _, _, _, channels_i = input_shape

    _, height_o, width_o, channels_o = output_shape

    weight = layer.get_weights()[0]

    kernel_h, kernel_w, _, _ = weight.shape

    operation_count = (
        height_o * width_o * channels_o * kernel_h * kernel_w * channels_i)

  elif layer.__class__.__name__ in ["QConv1D", "Conv1D"]:
    output_shape = layer.compute_output_shape(input_shape)
    _, _, channels_i = input_shape

    _, time_o, channels_o = output_shape

    weight = layer.get_weights()[0]

    kernel_length, _, _ = weight.shape

    operation_count = (
        time_o * channels_o * kernel_length * channels_i)

  elif layer.__class__.__name__ in ["QDepthwiseConv2D", "DepthwiseConv2D"]:
    output_shape = layer.compute_output_shape(input_shape)
    _, _, _, channels_i = input_shape

    _, height_o, width_o, channels_o = output_shape

    weight_1 = layer.get_weights()[0]

    kernel_h, kernel_w, _, _ = weight_1.shape

    operation_count = (
        kernel_h * kernel_w * height_o * width_o * channels_i)

  elif layer.__class__.__name__ in ["QDense", "Dense"]:
    output_shape = layer.compute_output_shape(input_shape)
    # Find the input and output shapes out of all possible dimensions.
    # Usually, the first shape dimension will be the batch size, and the second
    # shape dimension will be the number of channels. However, if the
    # Dense layer is in Squeeze-and-Excite, the first shape dimension
    # will be the batch size, the second and third shape dimension will be the
    # spatial sizes (should both be 1), and the fourth shape dimensions will
    # be the number of channels
    #
    # Note: asserts have been changed to sum(*shape > 1) <= 1 to avoid the case
    # when the dense layer has an output with shape (None, 1), which results in
    # sum(oshape > 1) = 0.
    ishape = np.array([i for i in input_shape if i is not None])
    assert sum(ishape > 1) <= 1, ("Input Tensor shape in %s has "
                                  "multiple >1 size dims") % layer.name
    size_i = np.max(ishape)

    oshape = np.array([i for i in output_shape if i is not None])
    assert sum(oshape > 1) <= 1, ("Output Tensor shape in %s has " +
                                  "multiple >1 size dims") % layer.name
    size_o = np.max(oshape)

    operation_count = (size_i * size_o)

  else:
    print("operation count for {} is defaulted to 0".format(
        layer))

  return int(operation_count)


def get_weights(layer, model_weights_already_quantized=True):
  """Get layer weights.

  Args:
    layer: given qkeras/keras layer
    model_weights_already_quantized: bool. whether the given layer's weights
      are already quantized. This is necessary because with certain quantizers,
      eg., quantized_bits(alpha="auto_po2"), we cannot quantize the same
      weights more than once, as it will lead to different results.

  Returns:
    Quantized layer weights.
  """

  weights = layer.get_weights()
  out = copy.deepcopy(weights)
  if not model_weights_already_quantized:
    for j, weight in enumerate(weights):
      if hasattr(layer, "get_quantizers") and layer.get_quantizers()[j]:
        out[j] = K.eval(
            layer.get_quantizers()[j](K.constant(weight)))
  return out


def get_scale_from_quantized_bits_with_auto_po2(quantizer):
  """Get scale from quantized_bits with alpha=auto_po2."""
  if hasattr(quantizer.scale, "numpy"):
    return quantizer.scale.numpy()
  elif isinstance(quantizer.scale, np.ndarray):
    return quantizer.scale
  else:
    return None


def adjust_multiplier_for_auto_po2(multiplier, qkeras_weight_quantizer):
  """Adjust multiplier when weight quantizer is auto_po2 type.

  Multiplier_bits = bits_x + bits_w
  Multiplier_intbits = log2(scale) + intbits_x + intbits_w

  Because we might have different scale for auto_po2 quantizer at different
  output channels, multiplier will have different integer bits at different
  output channel accordingly, which is not desirable in hardware implementation.
  Therefore we set a general multiplier quantizers so that it provides enough
  fractional bits and integer bits for all output channels.
  """
  print("adjust multiplier for auto_po2 ...")
  output_quantizer = multiplier.output
  if (hasattr(qkeras_weight_quantizer, "__str__") and
      "quantized_bits" in qkeras_weight_quantizer.__str__() and
      qkeras_weight_quantizer.alpha == "auto_po2"):
    bits = output_quantizer.bits
    int_bits = output_quantizer.int_bits
    scale = get_scale_from_quantized_bits_with_auto_po2(
        qkeras_weight_quantizer)
    if scale is not None:
      if isinstance(scale, np.ndarray):
        scale = np.squeeze(scale)
        max_shift = int(np.log2(np.max(scale)))
        min_shift = int(np.log2(np.min(scale)))
      elif isinstance(scale, float):
        max_shift = int(np.log2(scale))
        min_shift = max_shift
      else:
        raise ValueError(f"Scale should be either numpy array or float,"
                         f"{type(scale)} is found instead!")

      # In order to set a general quantizer for different output channels,
      # we need to set both fractional bits and integer bits as the max required
      # bits for different output channels
      max_fractional_bits = bits - int_bits - min_shift
      max_int_bits = int_bits + max_shift
      total_bits = max_int_bits + max_fractional_bits

      output_quantizer.bits = total_bits
      output_quantizer.int_bits = max_int_bits
    else:
      # If scale is None, it means the quantizer has
      # never being called. Therfore we skip the bitwidth adjustment steps
      print("[WARNING] The weight quantizer is never called even though it has "
            "alpha=auto_po2. In this case we do not adjust the multiplier and "
            "accumulator bit width since we don't know the exact values of "
            "scale", file=sys.stderr)
  elif hasattr(qkeras_weight_quantizer, "alpha") and (
      qkeras_weight_quantizer.alpha == "auto_po2"):
    print("[WARNING] auto_po2 is detected on a non-quantized_bits quantizer."
          "Currently in QTools we do not yet support the auto_po2 with the "
          f" given quantizer type: {type(qkeras_weight_quantizer)}."
          "Therefore we do not adjust the multiplier and accumulator bit width")


def adjust_accumulator_for_auto_po2(
    layer, multiplier, qkeras_weight_quantizer, bias_quantizer):
  """Adjust accumulator when weight quantizer is auto_po2 type."""

  fused_multiplier = copy.deepcopy(multiplier)
  adjust_multiplier_for_auto_po2(fused_multiplier, qkeras_weight_quantizer)
  weights = layer.get_weights()
  kernel = weights[0]

  kernel_shape = kernel.shape
  # depthwise_kernel_shape = kernel_size + (input_dim, depth_multiplier)
  # When computing accumulator bitwidth for dw conv2d layer, we do not
  # need to count the last two dimensions
  if layer.__class__.__name__ in ["QDepthwiseConv2D", "DepthwiseConv2D"]:
    assert kernel_shape[-1] == 1, ("depth_multiplier must be 1, "
                                   f"{kernel_shape[-1]} found instead!")
    kernel_shape = kernel.shape[:-2] + (1, 1)

  kernel_accumulator_factory = quantized_operators.AccumulatorFactory()
  # Sets use_bias=False so that the accumulator doesn't account for bias
  # bitwdith.
  fused_kernel_accumulator = kernel_accumulator_factory.make_accumulator(
      kernel_shape, fused_multiplier, use_bias=False)

  if not layer.use_bias:
    bias_quantizer = None
    fused_accumulator = fused_kernel_accumulator
  else:
    # Add bias quantizer bitwidth to the overall accumulator
    bias_accumulator_instance = quantized_operators.adder_factory.IAdder()
    fused_accumulator = bias_accumulator_instance.make_quantizer(
        fused_kernel_accumulator.output, bias_quantizer)

  return fused_accumulator


def find_divisors(num):
  return [i for i in range(1, num + 1) if num % i == 0]


def get_layer_info(layer: tf.keras.layers.Layer, attr_name: str):

  layer_type = layer.__class__.__name__
  supported_layer_types = [
      "QDense", "QConv2D", "QDepthwiseConv2D", "MaxPooling2D",
      "GlobalMaxPooling2D", "QAveragePooling2D", "QGlobalAveragePooling2D",
      "UpSampling2D", "Concatenate", "QBatchNormalization", "QActivation",
      "Activation", "Dropout", "Reshape", "ZeroPadding2D"]
  assert layer_type in supported_layer_types, (
      f"For now only {supported_layer_types} layers are supported. "
      f"Found {layer_type} instead.")

  # Get layer info such as input/output channels, kernel size and quantizers.
  input_channel = layer.input_shape[-1]
  output_channel = layer.output_shape[-1]

  # Change default kernel_size to 1 to represent Dense Layer with Conv Layers.
  kernel_height, kernel_width = layer.kernel_size if hasattr(
      layer, "kernel_size") else (1, 1)

  layer_dict = {
      "layer_type": layer_type,
      "input_channel": input_channel,
      "output_channel": output_channel,
      "kernel_height": kernel_height,
      "kernel_width": kernel_width
  }
  return layer_dict.get(attr_name, None)


def is_upsampled(layer: tf.keras.layers.Layer):
  # Evaluate if a given layer is doing upsampling.
  return "UpSampling" in layer.__class__.__name__


================================================
FILE: qkeras/qtools/quantized_operators/__init__.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Export quantizer package."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from .accumulator_factory import AccumulatorFactory
from .multiplier_factory import MultiplierFactory
from .multiplier_impl import IMultiplier, FloatingPointMultiplier, FixedPointMultiplier, Mux, AndGate, Adder, XorGate, Shifter
from .accumulator_impl import IAccumulator, FloatingPointAccumulator, FixedPointAccumulator
from .quantizer_impl import IQuantizer, QuantizedBits, Binary, QuantizedRelu, Ternary, FloatingPoint, PowerOfTwo, ReluPowerOfTwo
from .quantizer_factory import QuantizerFactory
from .qbn_factory import QBNFactory
from .fused_bn_factory import FusedBNFactory
from .merge_factory import MergeFactory
from .divider_factory import IDivider
from .subtractor_factory import ISubtractor


================================================
FILE: qkeras/qtools/quantized_operators/accumulator_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Create accumulator quantizers."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy

from qkeras.qtools.quantized_operators import accumulator_impl
from qkeras.qtools.quantized_operators import multiplier_impl


class AccumulatorFactory:
  """interface for accumulator type."""

  def make_accumulator(
      self, kernel_shape,
      multiplier: multiplier_impl.IMultiplier,
      use_bias=True
  ) -> accumulator_impl.IAccumulator:
    """Create an accumulator instance."""

    # Creates a local deep copy so that any changes we made to the multiplier
    # will not impact the input multiplier type. This is necessary in case
    # we call this function multiple times to get different multipliers.
    local_multiplier = copy.deepcopy(multiplier)

    # The type and bit width of the accumulator is deteremined from the
    # multiplier implementation, and the shape of both kernel and bias

    if local_multiplier.output.is_floating_point:
      accumulator = accumulator_impl.FloatingPointAccumulator(
          local_multiplier)

    # po2*po2 is implemented as Adder; output type is po2
    # in multiplier, po2 needs to be converted to FixedPoint
    elif local_multiplier.output.is_po2:
      accumulator = accumulator_impl.Po2Accumulator(
          kernel_shape, local_multiplier, use_bias)

    # fixed point
    else:
      accumulator = accumulator_impl.FixedPointAccumulator(
          kernel_shape, local_multiplier, use_bias)

    return accumulator


================================================
FILE: qkeras/qtools/quantized_operators/accumulator_impl.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Accumulator operation implementation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
from absl import logging
import numpy as np

from qkeras.qtools.quantized_operators import multiplier_impl
from qkeras.qtools.quantized_operators import quantizer_impl


def po2_to_qbits(quantizer: quantizer_impl.IQuantizer):
  """convert po2 type to qbits type."""

  (min_exp, max_exp) = quantizer.get_min_max_exp()
  # min_exp is number of bits needed on the right in qbits
  # max_exp is number of bits needed on the left in qbits
  unsigned_bits = min_exp + max_exp
  int_bits = max_exp
  sign_bit = quantizer.is_signed
  bits = sign_bit + unsigned_bits

  return (int(bits), int(int_bits))


class IAccumulator(abc.ABC):
  """abstract class for accumulator."""

  @staticmethod
  @abc.abstractmethod
  def implemented_as():
    pass


class FloatingPointAccumulator(IAccumulator):
  """class for floating point accumulator."""

  def __init__(
      self,
      multiplier: multiplier_impl.IMultiplier
  ):
    super().__init__()

    self.multiplier = multiplier
    self.output = quantizer_impl.FloatingPoint(
        bits=self.multiplier.output.bits)
    self.output.bits = self.multiplier.output.bits
    self.output.int_bits = -1
    self.output.is_signed = self.multiplier.output.is_signed
    self.output.is_floating_point = True
    self.output.op_type = "accumulator"

  @staticmethod
  def implemented_as():
    return "add"


class FixedPointAccumulator(IAccumulator):
  """class for fixed point accumulator."""

  def __init__(
      self,
      kernel_shape,
      multiplier: multiplier_impl.IMultiplier,
      use_bias=True
  ):
    super().__init__()

    if len(kernel_shape) not in (
        2,
        4,
    ):
      logging.fatal(
          "unsupported kernel shape, "
          "it is neither a dense kernel of length 2,"
          " nor a convolution kernel of length 4")

    kernel_shape_excluding_output_dim = kernel_shape[:-1]
    kernel_add_ops = np.prod(kernel_shape_excluding_output_dim)

    # bias are associate with filters; each filter adds 1 bias
    bias_add = 1 if use_bias else 0

    add_ops = kernel_add_ops + bias_add
    self.log_add_ops = int(np.ceil(np.log2(add_ops)))

    self.multiplier = multiplier
    self.output = quantizer_impl.QuantizedBits()
    self.output.bits = self.log_add_ops + self.multiplier.output.bits
    self.output.int_bits = self.log_add_ops + self.multiplier.output.int_bits
    self.output.is_signed = self.multiplier.output.is_signed
    self.output.op_type = "accumulator"

    assert not self.multiplier.output.is_floating_point
    self.output.is_floating_point = False

  @staticmethod
  def implemented_as():
    return "add"


class Po2Accumulator(FixedPointAccumulator):
  """accumulator for po2."""

  # multiplier is po2. multiplier output needs to convert
  # to Fixedpoint before Accumulator.

  def __init__(
      self,
      kernel_shape,
      multiplier: multiplier_impl.IMultiplier,
      use_bias=True
  ):
    super().__init__(kernel_shape, multiplier, use_bias)

    assert multiplier.output.is_po2
    # convert multiplier output from po2 to quantized_bits
    (bits_from_po2multiplier, int_bits_from_po2multiplier) = po2_to_qbits(
        multiplier.output)

    self.output.bits = self.log_add_ops + int(bits_from_po2multiplier)
    self.output.int_bits = self.log_add_ops + int(int_bits_from_po2multiplier)
    self.output.op_type = "accumulator"

  @staticmethod
  def implemented_as():
    return "add"


================================================
FILE: qkeras/qtools/quantized_operators/adder_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""implement adder quantizer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import copy

from absl import logging
from qkeras.qtools.quantized_operators import adder_impl
from qkeras.qtools.quantized_operators import quantizer_impl


class IAdder(abc.ABC):
  """abstract class for adder."""

  def __init__(self):
    self.adder_impl_table = [
        [
            adder_impl.FixedPointAdder,
            adder_impl.Po2FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FloatingPointAdder
        ],
        [
            adder_impl.Po2FixedPointAdder,
            adder_impl.Po2Adder,
            adder_impl.Po2FixedPointAdder,
            adder_impl.Po2FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FloatingPointAdder
        ],
        [
            adder_impl.FixedPointAdder,
            adder_impl.Po2FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FloatingPointAdder
        ],
        [
            adder_impl.FixedPointAdder,
            adder_impl.Po2FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FloatingPointAdder
        ],
        [
            adder_impl.FixedPointAdder,
            adder_impl.Po2FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FixedPointAdder,
            adder_impl.FloatingPointAdder
        ],
        [
            adder_impl.FloatingPointAdder,
            adder_impl.FloatingPointAdder,
            adder_impl.FloatingPointAdder,
            adder_impl.FloatingPointAdder,
            adder_impl.FloatingPointAdder,
            adder_impl.FloatingPointAdder
        ]
    ]

  def make_quantizer(self, quantizer_1: quantizer_impl.IQuantizer,
                     quantizer_2: quantizer_impl.IQuantizer):
    """make adder quantizer."""

    local_quantizer_1 = copy.deepcopy(quantizer_1)
    local_quantizer_2 = copy.deepcopy(quantizer_2)

    mode1 = local_quantizer_1.mode
    mode2 = local_quantizer_2.mode

    adder_impl_class = self.adder_impl_table[mode1][mode2]
    logging.debug(
        "qbn adder implemented as class %s",
        adder_impl_class.implemented_as())

    return adder_impl_class(
        local_quantizer_1,
        local_quantizer_2
    )


================================================
FILE: qkeras/qtools/quantized_operators/adder_impl.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""adder operation implementation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc

from qkeras.qtools.quantized_operators import accumulator_impl
from qkeras.qtools.quantized_operators import quantizer_impl


def po2_qbits_converter(po2_quantizer: quantizer_impl.IQuantizer):
  """convert a po2 quantizer to fixedpoint quantizer."""

  (bits_from_po2, int_bits_from_po2) = accumulator_impl.po2_to_qbits(
      po2_quantizer)
  qbits_quantizer = quantizer_impl.QuantizedBits()
  qbits_quantizer.bits = bits_from_po2
  qbits_quantizer.int_bits = int_bits_from_po2
  qbits_quantizer.is_signed = po2_quantizer.is_signed

  return qbits_quantizer


class IAdderImpl(abc.ABC):
  """abstract class for adder."""

  @staticmethod
  @abc.abstractmethod
  def implemented_as():
    pass


class FixedPointAdder(IAdderImpl):
  """adder for fixed point."""

  def __init__(self, quantizer_1, quantizer_2):
    self.output = quantizer_impl.QuantizedBits()
    self.output.int_bits = max(quantizer_1.int_bits,
                               quantizer_2.int_bits) + 1
    fractional_bits1 = (quantizer_1.bits - int(quantizer_1.is_signed)
                        - quantizer_1.int_bits)
    fractional_bits2 = (quantizer_2.bits - int(quantizer_2.is_signed)
                        - quantizer_2.int_bits)
    fractional_bits = max(fractional_bits1, fractional_bits2)
    self.output.is_signed = quantizer_1.is_signed | quantizer_2.is_signed
    self.output.bits = (self.output.int_bits + int(self.output.is_signed) +
                        fractional_bits)
    self.output.mode = 0
    self.output.is_floating_point = False
    self.output.is_po2 = 0

  @staticmethod
  def implemented_as():
    return "add"


class FloatingPointAdder(IAdderImpl):
  """floating point adder."""

  def __init__(self, quantizer_1, quantizer_2):
    bits = max(quantizer_1.bits, quantizer_2.bits)
    self.output = quantizer_impl.FloatingPoint(
        bits=bits)

  @staticmethod
  def implemented_as():
    return "add"


class Po2FixedPointAdder(IAdderImpl):
  """adder between po2 and fixed point."""

  def __init__(self, quantizer_1, quantizer_2):

    if quantizer_1.is_po2:
      po2_quantizer = quantizer_1
      fixedpoint_quantizer = quantizer_2
    else:
      po2_quantizer = quantizer_2
      fixedpoint_quantizer = quantizer_1

    # convert po2 to qbits first
    po2_qbits_quantizer = po2_qbits_converter(po2_quantizer)

    # qbits + qbits -> FixedPointAdder
    self.output = FixedPointAdder(po2_qbits_quantizer,
                                  fixedpoint_quantizer).output

  @staticmethod
  def implemented_as():
    return "add"


class Po2Adder(IAdderImpl):
  """adder for po2 type."""

  def __init__(self, quantizer_1, quantizer_2):
    qbits_quantizer_1 = po2_qbits_converter(quantizer_1)
    qbits_quantizer_2 = po2_qbits_converter(quantizer_2)
    self.output = FixedPointAdder(qbits_quantizer_1,
                                  qbits_quantizer_2).output

  @staticmethod
  def implemented_as():
    return "add"


================================================
FILE: qkeras/qtools/quantized_operators/divider_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
""""create divider quantizer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import copy

from absl import logging
from qkeras.qtools.quantized_operators import divider_impl
from qkeras.qtools.quantized_operators import quantizer_impl


class UnacceptedQuantizerError(ValueError):
  pass


class IDivider(abc.ABC):
  """abstract class for divider."""

  def __init__(self):
    # also attached the output datatype in the table
    self.divider_impl_table = [
        [
            # when qbits is denominator, use default bits for float result
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=quantizer_impl.FLOATINGPOINT_BITS)),
            (divider_impl.Shifter, quantizer_impl.QuantizedBits()),
            (None, None),
            (None, None),
            (None, None),
            # when bits sets to None, will decide f16/f32 according
            # to input quantizer
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None))
        ],
        [
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=quantizer_impl.FLOATINGPOINT_BITS)),
            (divider_impl.Subtractor, quantizer_impl.PowerOfTwo()),
            (None, None),
            (None, None),
            (None, None),
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None))
        ],
        [
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=quantizer_impl.FLOATINGPOINT_BITS)),
            (divider_impl.Shifter, quantizer_impl.QuantizedBits()),
            (None, None),
            (None, None),
            (None, None),
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None))
        ],
        [
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=quantizer_impl.FLOATINGPOINT_BITS)),
            (divider_impl.Shifter, quantizer_impl.PowerOfTwo()),
            (None, None),
            (None, None),
            (None, None),
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None))
        ],
        [
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=quantizer_impl.FLOATINGPOINT_BITS)),
            (divider_impl.Shifter, quantizer_impl.PowerOfTwo()),
            (None, None),
            (None, None),
            (None, None),
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None))
        ],
        [
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None)),
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None)),
            (None, None),
            (None, None),
            (None, None),
            (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint(
                bits=None))
        ]
    ]

  def make_quantizer(self, numerator_quantizer: quantizer_impl.IQuantizer,
                     denominator_quantizer: quantizer_impl.IQuantizer):
    """make the quantizer."""

    # Create a local copy so that the changes made here won't change the input
    local_numerator_quantizer = copy.deepcopy(numerator_quantizer)
    local_denominator_quantizer = copy.deepcopy(denominator_quantizer)

    mode1 = local_numerator_quantizer.mode
    mode2 = local_denominator_quantizer.mode

    (divider_impl_class, output_quantizer) = self.divider_impl_table[
        mode1][mode2]

    local_output_quantizer = copy.deepcopy(output_quantizer)

    if divider_impl_class is None:
      raise UnacceptedQuantizerError(
          "denominator quantizer {} not accepted!".format(
              denominator_quantizer.name))

    logging.debug(
        "qbn adder implemented as class %s",
        divider_impl_class.implemented_as())

    return divider_impl_class(
        local_numerator_quantizer,
        local_denominator_quantizer,
        local_output_quantizer
    )


================================================
FILE: qkeras/qtools/quantized_operators/divider_impl.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Divider operation implementation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import numpy as np


class IDividerImpl(abc.ABC):
  """abstract class for divider."""

  def __init__(self, numerator_quantizer, denominator_quantizer,
               output_quantizer):
    self.numerator_quantizier = numerator_quantizer
    self.denominator_quantizer = denominator_quantizer
    self.output = output_quantizer

  @staticmethod
  @abc.abstractmethod
  def implemented_as():
    pass


class FloatingPointDivider(IDividerImpl):
  """floating point divider."""

  def __init__(self, numerator_quantizer, denominator_quantizer,
               output_quantizer):

    super().__init__(numerator_quantizer, denominator_quantizer,
                     output_quantizer)
    if self.output.bits is None:
      # decide f16/f32 according to numerator/denominator type
      bits = 0
      if numerator_quantizer.is_floating_point:
        bits = max(bits, numerator_quantizer.bits)
      if denominator_quantizer.is_floating_point:
        bits = max(bits, denominator_quantizer.bits)

      self.output.bits = bits

    self.gate_bits = self.output.bits
    self.gate_factor = 1

  @staticmethod
  def implemented_as():
    # TODO(lishanok): change cost from "mul" to "divide"
    return "mul"


class Shifter(IDividerImpl):
  """shifter type."""

  # other_datatype/po2
  def __init__(self, numerator_quantizer, denominator_quantizer,
               output_quantizer):
    super().__init__(numerator_quantizer, denominator_quantizer,
                     output_quantizer)

    qbit_quantizer = numerator_quantizer
    po2_quantizer = denominator_quantizer

    (min_exp, max_exp) = po2_quantizer.get_min_max_exp()

    # since it's a divider, min_exp and max_exp swap
    # for calculating right and left shift
    tmp = min_exp
    min_exp = max_exp
    max_exp = tmp

    qbits_bits = qbit_quantizer.bits
    qbits_int_bits = qbit_quantizer.int_bits

    self.output.bits = int(qbits_bits + max_exp + min_exp)
    if (not qbit_quantizer.is_signed) and po2_quantizer.is_signed:
      # if qbit is signed, qbits_bits already has the sign_bit,
      # no need to +1,
      # if qbit is un_signed, po2 is unsigned, no need to +1
      # if qbit is un_signed, po2 is signed, min_exp and max_exp
      # didnot include sign_bit,
      # therefore need to +1
      self.output.bits += 1

    self.output.int_bits = int(qbits_int_bits + max_exp)
    self.output.is_signed = qbit_quantizer.is_signed |\
                            po2_quantizer.is_signed
    self.output.is_floating_point = False

    if po2_quantizer.inference_value_counts > 0:
      # during qbn inference, count number of unique values
      self.gate_factor = po2_quantizer.inference_value_counts * 0.3
      self.gate_bits = qbits_bits
    else:
      # programmable shifter, similar to sum gate
      self.gate_factor = 1
      b = np.sqrt(2 ** po2_quantizer.bits * qbits_bits)
      self.gate_bits = b * np.log10(b)

  @staticmethod
  def implemented_as():
    return "shifter"


class Subtractor(IDividerImpl):
  """subtractor quantizer."""

  # subtractor is only possible when numerator and denominator
  # are both po2 quantizers.

  def __init__(self, numerator_quantizer, denominator_quantizer,
               output_quantizer):
    super().__init__(numerator_quantizer, denominator_quantizer,
                     output_quantizer)

    self.output.bits = max(numerator_quantizer.bits,
                           denominator_quantizer.bits) + 1
    self.output.int_bits = max(numerator_quantizer.int_bits,
                               denominator_quantizer.int_bits) + 1
    self.output.is_signed = 1
    self.output.is_floating_point = False
    self.output.is_po2 = 1

    if (numerator_quantizer.max_val_po2 == -1 or
        denominator_quantizer.max_val_po2 == -1):
      self.output.max_val_po2 = -1
    else:
      # Adder is two po2_value multiply with each other
      self.output.max_val_po2 = numerator_quantizer.max_val_po2 /\
                                denominator_quantizer.max_val_po2

    if "po2" in output_quantizer.name:
      # po2 * po2
      if self.output.is_signed:
        output_quantizer.name = "quantized_po2"
      else:
        output_quantizer.name = "quantized_relu_po2"

    self.gate_bits = self.output.bits
    self.gate_factor = 1

  @staticmethod
  def implemented_as():
    return "add"


================================================
FILE: qkeras/qtools/quantized_operators/fused_bn_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""quantized batch normliaztion quantizer implementation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
import math

import numpy as np

from qkeras import base_quantizer
from qkeras.qtools import qtools_util
from qkeras.qtools.quantized_operators import adder_factory
from qkeras.qtools.quantized_operators import divider_factory
from qkeras.qtools.quantized_operators import multiplier_factory
from qkeras.qtools.quantized_operators import quantizer_impl

class FusedBNFactory:
  """determine which quantizer implementation to use.

  Create an fused bn instance. The type and bit width of the output_quantizer
  is deteremined from both the previous layer and batchnorm weight types:

  z = bn(y) = bn_inv * x - fused_bias is the output of the previous
  layer and the following bn layer, with:
    bn_inv = gamma * rsqrt(variance^2+epsilon) is computed from the
      bn layer weights with inverse_quantizer datatype
    x is the previous layer's output
    fused_bias = bn_inv * bias + beta - bn_inv*mean where bias is
      the bias term from the previous layer, beta and mean are the bn
      layer weights.
  """

  def make_quantizer(
      self,
      prev_output_quantizer: quantizer_impl.IQuantizer,
      beta_quantizer: quantizer_impl.IQuantizer,
      mean_quantizer: quantizer_impl.IQuantizer,
      inverse_quantizer: quantizer_impl.IQuantizer,
      prev_bias_quantizer: quantizer_impl.IQuantizer,
      use_beta: bool,
      use_bias: bool,
      qkeras_inverse_quantizer: base_quantizer.BaseQuantizer,
  ):
    """Makes a fused_bn quantizer.

    Args:
      prev_output_quantizer: IQuantizer type. Previous layer output quantizer
      beta_quantizer: IQuantizer type. bn layer beta quantizer
      mean_quantizer: IQuantizer type.  layer mean quantizer
      inverse_quantizer: IQuantizer type. bn layer inverse quantizer
      prev_bias_quantizer: IQuantizer type. conv layer bias quantizer
      use_beta: Bool. whether enabling beta in batch_normalization layer
      use_bias: Bool. Whether bias is used in conv layer.
      qkeras_inverse_quantizer: QKeras quantizer type. bn layer inverse
        quantizer with QKeras quantizer type
    Returns:
      None
    """

    assert not isinstance(inverse_quantizer, quantizer_impl.FloatingPoint), (
        "inverse_quantizer in batchnorm layer has to be set for "
        "fused bn inference in hardware!")

    # bn_inv * x
    multiplier_instance = multiplier_factory.MultiplierFactory()
    multiplier_x = multiplier_instance.make_multiplier(
        inverse_quantizer, prev_output_quantizer)

    qtools_util.adjust_multiplier_for_auto_po2(
        multiplier_x, qkeras_inverse_quantizer)

    # fused_bias = bn_inv * bias + beta - bn_inv*mean
    # This step derives the datatype for bn_inv * mean
    multiplier_mean = multiplier_instance.make_multiplier(
        inverse_quantizer, mean_quantizer)

    qtools_util.adjust_multiplier_for_auto_po2(
        multiplier_mean, qkeras_inverse_quantizer)

    adder_instance = adder_factory.IAdder()
    if use_bias:
      # Derives datatype of bn_inv*bias
      multiplier_bias = multiplier_instance.make_multiplier(
          inverse_quantizer, prev_bias_quantizer)

      qtools_util.adjust_multiplier_for_auto_po2(
          multiplier_bias, qkeras_inverse_quantizer)

      # Derives datatype of bn_inv*bias - bn_inv*mean
      adder_1 = adder_instance.make_quantizer(
          multiplier_bias.output, multiplier_mean.output)
    else:
      # There is no bias from the previous layer,
      # therefore datatype of bn_inv*bias - bn_inv*mean is the same
      # as bn_inv*mean
      adder_1 = multiplier_mean

    if use_beta:
      # Derives datatype of fused_bias = bn_inv * bias + beta - bn_inv*mean
      adder_bias = adder_instance.make_quantizer(
          adder_1.output, beta_quantizer)
    else:
      # Since beta is not used, fused_bias = bn_inv * bias - bn_inv*mean
      adder_bias = adder_1

    # bn_inv * x - fused_bias
    adder = adder_instance.make_quantizer(
        multiplier_x.output, adder_bias.output)
    self.internal_accumulator = adder
    self.internal_output = adder


================================================
FILE: qkeras/qtools/quantized_operators/merge_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""create merge layer output quantizers."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc

from qkeras.qtools.quantized_operators import adder_impl
from qkeras.qtools.quantized_operators import multiplier_factory
from qkeras.qtools.quantized_operators import quantizer_impl


class MergeFactory:
  """determine which merge implementation to use."""

  def make_quantizer(self, input_qe_list, layer_type):
    """make quantier."""

    if layer_type == "Add":
      return Add(input_qe_list)
    elif layer_type == "Multiply":
      return Multiply(input_qe_list)
    elif layer_type == "Maximum":
      return Maximum(input_qe_list)
    elif layer_type == "Minimum":
      return Minimum(input_qe_list)
    elif layer_type == "Average":
      return Average(input_qe_list)
    elif layer_type == "Concatenate":
      return Concatenate(input_qe_list)
    elif layer_type == "Dot":
      return Dot(input_qe_list)


class IMerger(abc.ABC):
  """abstract class for merge quantizer."""

  def __init__(self, input_qe_list):
    self.input_quantizers = []
    self.edges = []

    for node in input_qe_list:
      self.input_quantizers.append(node[0])
      self.edges.append(node[1])


class Add(IMerger):
  """add a list of inputs."""

  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).

  def __init__(self, input_qe_list):
    super().__init__(input_qe_list)

    max_bits = -1
    max_int_bits = -1
    is_signed = False

    bits = 0
    is_floating_point = False
    for quantizer in self.input_quantizers:
      if quantizer.is_floating_point:
        is_floating_point = True
        bits = max(bits, quantizer.bits)
      else:
        if quantizer.is_po2:
          qbits_quantizer = adder_impl.po2_qbits_converter(
              quantizer)
        else:
          qbits_quantizer = quantizer

        if qbits_quantizer.bits > max_bits:
          max_bits = qbits_quantizer.bits

        if qbits_quantizer.int_bits > max_int_bits:
          max_int_bits = qbits_quantizer.int_bits

      is_signed |= quantizer.is_signed

    if is_floating_point:
      self.output = quantizer_impl.FloatingPoint(
          bits=bits)
    else:
      self.output = quantizer_impl.QuantizedBits()
      self.output.bits = max_bits + 1
      self.output.int_bits = max_int_bits + 1
      self.output.is_signed = is_signed
      self.output.mode = 0
      self.output.is_floating_point = False
      self.output.is_po2 = 0

    self.gate_factor = 1
    self.gate_bits = self.output.bits

  def implemented_as(self):
    return "add"


class Multiply(IMerger):
  """multiplies (element-wise) a list of inputs."""

  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).

  def  __init__(self, input_qe_list):
    super().__init__(input_qe_list)
    multiplier_instance = multiplier_factory.MultiplierFactory()

    quantizer = self.input_quantizers[0]
    for cur in self.input_quantizers[1:]:
      tmp = multiplier_instance.make_multiplier(quantizer, cur)
      quantizer = tmp.output

    self.output = quantizer

    # TODO(lishanok): only use the last multiplier here
    self.impl_class = tmp
    self.gate_factor = tmp.gate_factor
    self.gate_bits = tmp.gate_bits

  def implemented_as(self):
    return self.impl_class.implemented_as()


class Maximum(IMerger):
  """maximum of a list of inputs."""

  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).

  def __init__(self, input_qe_list):
    super().__init__(input_qe_list)

    is_same = True
    is_floating_point = False
    bits = 0

    quantizer = self.input_quantizers[0]
    for cur in self.input_quantizers[1:]:
      if (quantizer.name != cur.name or quantizer.bits != cur.bits or
          quantizer.int_bits != cur.int_bits or
          quantizer.is_signed != cur.is_signed):
        is_same = False
        break

    if is_same:
      self.output = quantizer
    else:
      max_bits = -1
      max_int_bits = -1
      is_signed = False
      for quantizer in self.input_quantizers:
        if quantizer.is_floating_point:
          is_floating_point = True
          bits = max(bits, quantizer.bits)
        else:
          if quantizer.is_po2:
            qbits_quantizer = adder_impl.po2_qbits_converter(
                quantizer)
          else:
            qbits_quantizer = quantizer

          if qbits_quantizer.bits > max_bits:
            max_bits = qbits_quantizer.bits

          if qbits_quantizer.int_bits > max_int_bits:
            max_int_bits = qbits_quantizer.int_bits

        is_signed |= quantizer.is_signed

      if is_floating_point:
        self.output = quantizer_impl.FloatingPoint(
            bits=bits)
      else:
        self.output = quantizer_impl.QuantizedBits()
        self.output.bits = max_bits
        self.output.int_bits = max_int_bits
        self.output.is_signed = is_signed
        self.output.mode = 0
        self.output.is_floating_point = False
        self.output.is_po2 = 0

    self.gate_factor = 0.2
    self.gate_bits = self.output.bits

  @staticmethod
  def implemented_as():
    return "add"


class Minimum(Maximum):
  """minimum (element-wise) a list of inputs."""

  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).
  pass


class Average(Maximum):
  """average (element-wise) a list of inputs."""

  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).
  def __init__(self, input_qe_list):
    super().__init__(input_qe_list)

    self.gate_factor = 1
    self.gate_bits = self.output.bits


class Concatenate(Maximum):
  """Layer that concatenates a list of inputs."""

  # It takes as input a list of tensors, all of the same
  # shape except for the concatenation axis, and returns
  # a single tensor, the concatenation of all inputs..
  def __init__(self, input_qe_list):
    super().__init__(input_qe_list)

    self.gate_factor = 0
    self.gate_bits = self.output.bits


# TODO(lishanok): finish DOT ndimension tensor logic
class Dot(IMerger):
  """dot product between samples in two tensors."""

  # E.g. if applied to a list of two tensors a and b
  # of shape (batch_size, n), the
  # output will be a tensor of shape (batch_size, 1)
  # where each entry i will be\
  # the dot product between a[i] and b[i].

  pass


================================================
FILE: qkeras/qtools/quantized_operators/multiplier_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Create multiplier quantizer."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import logging
import copy

from qkeras.qtools.quantized_operators import multiplier_impl
from qkeras.qtools.quantized_operators import quantizer_impl


class MultiplierFactory:
  """determine which multiplier implementation to use."""

  def __init__(self):
    # the table below is found in this slides:
    # https://docs.google.com/presentation/d/1pcmoB6ZpX0IqjhSwgzO-oQwpMRYwIcDe/edit#slide=id.p40
    # also attached the output datatype in the table
    self.multiplier_impl_table = [
        [
            (
                multiplier_impl.FixedPointMultiplier,
                quantizer_impl.QuantizedBits()
            ),
            (multiplier_impl.Shifter, quantizer_impl.QuantizedBits()),
            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),
            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),
            (multiplier_impl.AndGate, quantizer_impl.QuantizedBits()),
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(
                    bits=None)
            )
        ],
        [
            (multiplier_impl.Shifter, quantizer_impl.QuantizedBits()),
            (multiplier_impl.Adder, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.AndGate, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.FloatingPointMultiplier,
             quantizer_impl.FloatingPoint(bits=None)
            )
        ],
        [
            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),
            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.Mux, quantizer_impl.Ternary()),
            (multiplier_impl.Mux, quantizer_impl.Ternary()),
            (multiplier_impl.AndGate, quantizer_impl.Ternary()),
            (multiplier_impl.FloatingPointMultiplier,
             quantizer_impl.FloatingPoint(bits=None))
        ],
        [
            (multiplier_impl.Mux, quantizer_impl.QuantizedBits()),
            (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.Mux, quantizer_impl.Ternary()),
            (multiplier_impl.XorGate, quantizer_impl.Binary(
                use_01=False)),
            (multiplier_impl.AndGate, quantizer_impl.Ternary()),
            (multiplier_impl.FloatingPointMultiplier,
             quantizer_impl.FloatingPoint(bits=None))
        ],
        [
            (multiplier_impl.AndGate, quantizer_impl.QuantizedBits()),
            (multiplier_impl.AndGate, quantizer_impl.PowerOfTwo()),
            (multiplier_impl.AndGate, quantizer_impl.Ternary()),
            (multiplier_impl.AndGate, quantizer_impl.Ternary()),
            (multiplier_impl.AndGate, quantizer_impl.Binary(
                use_01=True)),
            (multiplier_impl.FloatingPointMultiplier,
             quantizer_impl.FloatingPoint(bits=None))
        ],
        [
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(bits=None)
            ),
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(bits=None)
            ),
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(bits=None)
            ),
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(bits=None)
            ),
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(bits=None)
            ),
            (
                multiplier_impl.FloatingPointMultiplier,
                quantizer_impl.FloatingPoint(bits=None)
            )
        ]
    ]

  def make_multiplier(
      self, weight_quantizer: quantizer_impl.IQuantizer,
      input_quantizer: quantizer_impl.IQuantizer
  ) -> multiplier_impl.IMultiplier:
    """Create a multiplier instance.

    The type and bit width of the multiplier is deteremined from the
    quantizer type of both the kernel (weight) and input tensor.

    The table below illustrates the rule of inferring multiplier type from the
    quantizer type of both the kernel (weight) and input tensor

                                        x
                      qb(n)   +/-,exp  t(-1,0,+1) b(-1,+1) b(0,1) float32
        qb(n)            *     << >>,-     ?,-       ?,-       ?
        +/-,exp        << >>,-   +         ?,-        ^      ?,-
      w t(-1,0,+1)      ?,-     ?,-        ?,^       ?,^      ^
        b(-1,+1)        ?,-      ^         ?,^        ^       ^
        b(0,1)           ?      ?,-         ^         ^       ^      &
        float32

    Args:
      weight_quantizer: weight quantizer type
      input_quantizer: input quantizer type

    Returns:
      An IMultiplier instance.
    """

    assert weight_quantizer is not None
    assert input_quantizer is not None

    (multiplier_impl_class, output_quantizer) = self.multiplier_impl_table[
        weight_quantizer.mode][input_quantizer.mode]

    # Need to create local copies becuase different multiplier instances
    # created from the factory might make changes to these quantizers.

    local_weight_quantizer = copy.deepcopy(weight_quantizer)
    local_input_quantizer = copy.deepcopy(input_quantizer)
    local_output_quantizer = copy.deepcopy(output_quantizer)
    logging.debug(
        "multiplier implemented as class %s",
        multiplier_impl_class.implemented_as())

    assert issubclass(multiplier_impl_class, multiplier_impl.IMultiplier)

    return multiplier_impl_class(
        local_weight_quantizer,
        local_input_quantizer,
        local_output_quantizer
    )


================================================
FILE: qkeras/qtools/quantized_operators/multiplier_impl.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""multiplier operation implementations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import numpy as np

from qkeras.qtools.quantized_operators import quantizer_impl


class IMultiplier(abc.ABC):
  """abstract class for multiplier.

  This class is about how multiplier is implemented in hardware, which can be
     mux gate, shifter, adder, etc.
  """

  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,
               input_quantizer: quantizer_impl.IQuantizer,
               output_quantizer: quantizer_impl.IQuantizer):
    self.input = input_quantizer
    self.weights = weight_quantizer
    self.output = output_quantizer
    self.output.op_type = "multiplier"

  @staticmethod
  @abc.abstractmethod
  def implemented_as():
    pass

  def name(self) -> str:
    return self.output.name

  def output_quantizer(self):
    return self.output


def assert_neither_input_and_weights_is_floating_point(
    multiplier: IMultiplier):
  """assert non float type."""

  assert not multiplier.input.is_floating_point
  assert not multiplier.weights.is_floating_point


class Mux(IMultiplier):
  """Use mux for the hardware implementation of multiplier."""

  # binary(1,-1)/ternary * other_datatype
  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,
               input_quantizer: quantizer_impl.IQuantizer,
               output_quantizer: quantizer_impl.IQuantizer):
    super().__init__(weight_quantizer, input_quantizer,
                     output_quantizer)
    self.output.is_signed = self.input.is_signed | self.weights.is_signed

    if any(s in weight_quantizer.name for s in ["binary", "ternary"]):
      self.output.bits = input_quantizer.bits
      self.output.int_bits = input_quantizer.int_bits
      if not input_quantizer.is_signed and weight_quantizer.is_signed:
        self.output.bits += 1

      # multiplier factor for gate counts
      # gate_factor is the relative energy of given gate comparing
      # to an Add gate, giving that Add gate is 1
      if "binary" in weight_quantizer.name:
        self.gate_factor = 0.3
      else:
        self.gate_factor = 2 * 0.3
      self.gate_bits = input_quantizer.bits

    else:
      self.output.bits = weight_quantizer.bits
      self.output.int_bits = weight_quantizer.int_bits
      if not weight_quantizer.is_signed and input_quantizer.is_signed:
        self.output.bits += 1

      # multiplier factor for gate counts
      if input_quantizer.name == "binary":
        self.gate_factor = 0.3
      else:
        self.gate_factor = 2 * 0.3
      self.gate_bits = weight_quantizer.bits

    if "po2" in output_quantizer.name:
      if self.output.is_signed:
        output_quantizer.name = "quantized_po2"
      else:
        output_quantizer.name = "quantized_relu_po2"

      if "po2" in weight_quantizer.name:
        self.output.max_val_po2 = weight_quantizer.max_val_po2
      else:
        self.output.max_val_po2 = input_quantizer.max_val_po2

      self.output.int_bits = self.output.bits

  @staticmethod
  def implemented_as():
    return "mux"


class XorGate(IMultiplier):
  """Use XorGate for hardware implementation of a multiplier."""

  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,
               input_quantizer: quantizer_impl.IQuantizer,
               output_quantizer: quantizer_impl.IQuantizer):
    super().__init__(weight_quantizer, input_quantizer, output_quantizer)

    if output_quantizer.name != "ternary":
      self.output.bits = max(self.input.bits, self.weights.bits)
      self.output.int_bits = max(self.input.int_bits, self.weights.int_bits)
      self.output.is_signed = self.input.is_signed | self.weights.is_signed
      assert_neither_input_and_weights_is_floating_point(self)
      self.output.is_floating_point = False

    self.gate_factor = 0.3
    self.gate_bits = 1

  @staticmethod
  def implemented_as():
    return "xor"


class Shifter(IMultiplier):
  """shifter gate.

  po2*qbit is implemented as a shifter. output is qbits type.

  determin number of bits in the output qbits type:
    1. min_exp in po2: number of bits to be expanded on the
        right (decimal bits) in qbits
        for example, min_exp = -2 -> po2 =2^min_exp = 2^(-2) :
        this means, po2*qbit -> qbit value right shifted for 2 bits
    2. max_exp in po2: number of bits to be expanded on
        the left (int_bits) in qbits

  How to calculate min_exp and max_exp:
    1.if po2 is_signed (quantized_po2)
      *one bit for sign for the entire po2 value;
      *exp has non_sign_bits = bits - 1 number of bits,
      *furthermore, 1 bit from non_sign_bits is used as sign bit in exp;
      *value range for exp is [-2 ** (non_sign_bits - 1),
       2 ** (non_sign_bits - 1) - 1]
    2.if not_signed (quantized_relu_po2)
      * 0 bit for the entire po2 value
      * exp has non_sign_bits = bits
      * rest is the same as above

  determine sign bit in the output qbits:
    1. qbits no_sign and po2 is_sign: since max_exp and min_exp
        are computed without sign bit
       we need to add 1 sign bit to the final result;
    2. qbits is_sign: since qbits already has a sign bit,
        no extra sign bit needed
    3. qbits no_sign and po2 no_sign: no extra sign bit needed

  Attributes:
    input: input_quantizer
    weight: weight_quantizer
    output: output_quantizer
    gate_factor: relative energy comparing to an Adder
    gate_bits: number of bits for energy calculation.
  """

  def __init__(
      self, weight_quantizer: quantizer_impl.IQuantizer,
      input_quantizer: quantizer_impl.IQuantizer,
      output_quantizer: quantizer_impl.IQuantizer
  ):
    super().__init__(weight_quantizer, input_quantizer, output_quantizer)

    # locate the po2 quantizer
    mode_w = weight_quantizer.mode
    if mode_w == 1:
      po2_quantizer = weight_quantizer
      qbit_quantizer = input_quantizer
    else:
      po2_quantizer = input_quantizer
      qbit_quantizer = weight_quantizer

    # find min_exp and max_exp of po2 quantizer
    (min_exp, max_exp) = po2_quantizer.get_min_max_exp()
    qbits_bits = qbit_quantizer.bits
    qbits_int_bits = qbit_quantizer.int_bits

    self.output.bits = int(qbits_bits + max_exp + min_exp)
    if (not qbit_quantizer.is_signed) and po2_quantizer.is_signed:
      # if qbit is signed, qbits_bits already has the sign_bit, no need to +1
      # if qbit is un_signed, po2 is unsigned, no need to +1
      # if qbit is un_signed, po2 is signed, min_exp and max_exp
      # didnot include sign_bit,
      # therefore need to +1
      self.output.bits += 1

    self.output.int_bits = int(qbits_int_bits + max_exp)
    self.output.is_signed = qbit_quantizer.is_signed | po2_quantizer.is_signed

    assert_neither_input_and_weights_is_floating_point(self)
    self.output.is_floating_point = False

    if po2_quantizer.inference_value_counts > 0:
      self.gate_factor = po2_quantizer.inference_value_counts * 0.3
      self.gate_bits = qbits_bits
    else:
      self.gate_factor = 1
      b = np.sqrt(2 ** po2_quantizer.bits * qbits_bits)
      self.gate_bits = b * np.log10(b)

  @staticmethod
  def implemented_as():
    return "shifter"


class AndGate(IMultiplier):
  """and gate implementation."""

  # binary(0,1) * any_datatype
  def __init__(
      self, weight_quantizer: quantizer_impl.IQuantizer,
      input_quantizer: quantizer_impl.IQuantizer,
      output_quantizer: quantizer_impl.IQuantizer
  ):
    super().__init__(weight_quantizer, input_quantizer, output_quantizer)

    # if output is ternary, no need for further computation
    if self.output.name != "ternary":
      self.output.bits = max(self.input.bits, self.weights.bits)

      self.output.is_signed = self.input.is_signed | self.weights.is_signed
      self.output.is_floating_point = self.input.is_floating_point |\
                                      self.weights.is_floating_point

      if weight_quantizer.name == "binary" and weight_quantizer.use_01:
        # binary(0,1) * datatype -> int_bits = datatype.int_bits
        self.output.int_bits = input_quantizer.int_bits
      else:
        self.output.int_bits = weight_quantizer.int_bits

      if "po2" in output_quantizer.name:
        # binary * po2
        if self.output.is_signed:
          output_quantizer.name = "quantized_po2"
        else:
          output_quantizer.name = "quantized_relu_po2"

        if "po2" in weight_quantizer.name:
          self.output.max_val_po2 = weight_quantizer.max_val_po2
        else:
          self.output.max_val_po2 = input_quantizer.max_val_po2

    self.gate_bits = self.output.bits
    self.gate_factor = 0.1

  @staticmethod
  def implemented_as():
    return "and"


class Adder(IMultiplier):
  """adder implementation."""

  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,
               input_quantizer: quantizer_impl.IQuantizer,
               output_quantizer: quantizer_impl.IQuantizer):
    super().__init__(weight_quantizer, input_quantizer,
                     output_quantizer)
    self.output.bits = max(self.input.bits, self.weights.bits) + 1
    self.output.int_bits = max(self.input.int_bits,
                               self.weights.int_bits) + 1
    self.output.is_signed = self.input.is_signed | self.weights.is_signed
    assert_neither_input_and_weights_is_floating_point(self)
    self.output.is_floating_point = False
    self.output.is_po2 = 1

    if self.input.max_val_po2 == -1 or self.weights.max_val_po2 == -1:
      self.output.max_val_po2 = -1
    else:
      # Adder is two po2_value multiply with each other
      self.output.max_val_po2 = self.input.max_val_po2 * self.weights.max_val_po2

    if "po2" in output_quantizer.name:
      # po2 * po2
      if self.output.is_signed:
        output_quantizer.name = "quantized_po2"
      else:
        output_quantizer.name = "quantized_relu_po2"

    self.gate_bits = self.output.bits
    self.gate_factor = 1

  @staticmethod
  def implemented_as():
    return "add"


class FloatingPointMultiplier(IMultiplier):
  """multiplier for floating point."""

  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,
               input_quantizer: quantizer_impl.IQuantizer,
               output_quantizer: quantizer_impl.IQuantizer):
    super().__init__(weight_quantizer, input_quantizer,
                     output_quantizer)

    self.output.bits = max(
        self.input.bits * self.input.is_floating_point,
        self.weights.bits * self.weights.is_floating_point,
    )
    self.output.int_bits = -1
    self.output.is_signed = 1

    assert self.input.is_floating_point | self.weights.is_floating_point
    self.output.is_floating_point = True

    self.gate_factor = 1
    self.gate_bits = self.output.bits

  @staticmethod
  def implemented_as():
    return "mul"


class FixedPointMultiplier(IMultiplier):
  """multiplier for fixed point."""

  def __init__(self, weight_quantizer: quantizer_impl.IQuantizer,
               input_quantizer: quantizer_impl.IQuantizer,
               output_quantizer: quantizer_impl.IQuantizer):
    super().__init__(weight_quantizer, input_quantizer,
                     output_quantizer)

    # Total int bits is the sum of individual int bits.
    self.output.int_bits = self.input.int_bits + self.weights.int_bits

    # Total fractional bits is the sum of individual fractional bits
    fractional_bits1 = (self.input.bits - int(self.input.is_signed)
                        - self.input.int_bits)
    fractional_bits2 = (self.weights.bits - int(self.weights.is_signed)
                        - self.weights.int_bits)
    fractional_bits = fractional_bits1 + fractional_bits2

    self.output.is_signed = self.input.is_signed | self.weights.is_signed

    # Total bits is the sum of int bits, fractional bits and sign bit
    self.output.bits = self.output.int_bits + fractional_bits + int(
        self.output.is_signed)

    assert_neither_input_and_weights_is_floating_point(self)
    self.output.is_floating_point = False

    self.gate_factor = 1
    self.gate_bits = np.sqrt(self.input.bits * self.weights.bits)

  @staticmethod
  def implemented_as():
    return "mul"


================================================
FILE: qkeras/qtools/quantized_operators/qbn_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""quantized batch normliaztion quantizer implementation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
import math

import numpy as np
from qkeras.qtools.quantized_operators import adder_factory
from qkeras.qtools.quantized_operators import divider_factory
from qkeras.qtools.quantized_operators import multiplier_factory
from qkeras.qtools.quantized_operators import quantizer_impl


class QBNFactory:
  """determine which quantizer implementation to use.

  Create an qbn instance. The type and bit width of the output_quantizer
  is deteremined from gamma, beta, mean and variance quantizer
  y = gamma * (x - mean)/stddev + beta
  """

  def make_quantizer(
      self, input_quantizer: quantizer_impl.IQuantizer,
      gamma_quantizer: quantizer_impl.IQuantizer,
      beta_quantizer: quantizer_impl.IQuantizer,
      mean_quantizer: quantizer_impl.IQuantizer,
      variance_quantizer: quantizer_impl.IQuantizer,
      use_scale,
      use_center
  ):
    """make a qbn quantizer."""

    self.input_quantizer = input_quantizer
    self.gamma_quantizer = gamma_quantizer
    self.beta_quantizer = beta_quantizer
    self.mean_quantizer = mean_quantizer
    self.variance_quantizer = variance_quantizer
    self.use_scale = use_scale
    self.use_center = use_center

    multiplier = None
    accumulator = None

    # convert variance po2 quantizer to stddev po2 quantizer
    stddev_quantizer = copy.deepcopy(variance_quantizer)
    if stddev_quantizer.is_po2:
      if variance_quantizer.max_val_po2 >= 0:
        stddev_quantizer.max_val_po2 = np.round(math.sqrt(
            variance_quantizer.max_val_po2))
      else:
        stddev_quantizer.max_val_po2 = variance_quantizer.max_val_po2

      stddev_quantizer.bits = variance_quantizer.bits - 1
      stddev_quantizer.int_bits = stddev_quantizer.bits

    divider_instance = divider_factory.IDivider()

    if use_scale:
      # gamma/var
      divider = divider_instance.make_quantizer(
          gamma_quantizer, stddev_quantizer)

      # update the actual number of values in divider quantizer during inference
      count = -1
      if gamma_quantizer.is_po2 and gamma_quantizer.inference_value_counts > 0:
        count = gamma_quantizer.inference_value_counts
        if stddev_quantizer.is_po2 and stddev_quantizer.inference_value_counts > 0:
          count *= stddev_quantizer.inference_value_counts
        else:
          count = -1
      if count > 0:
        divider.output.inference_value_counts = count

      # gamma/var * x
      multiplier_instance = multiplier_factory.MultiplierFactory()
      multiplier = multiplier_instance.make_multiplier(
          divider.output, input_quantizer)
      accumulator_input = multiplier

    else:
      # x/var
      divider = divider_instance.make_quantizer(
          input_quantizer, stddev_quantizer)
      accumulator_input = divider

    if use_center:
      # y = gamma/var * x + beta
      accumulator_instance = adder_factory.IAdder()
      accumulator = accumulator_instance.make_quantizer(
          accumulator_input.output, beta_quantizer)
      output_q = accumulator
    else:
      output_q = accumulator_input

    self.internal_divide_quantizer = divider
    self.internal_multiplier = multiplier
    self.internal_accumulator = accumulator
    self.internal_output = output_q


================================================
FILE: qkeras/qtools/quantized_operators/quantizer_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""decides which quantizer implementation to use."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
from qkeras import quantizers
# from qkeras.google_internals import experimental_quantizers
# from qkeras.google_internals import experimental_quantizer_impl
from qkeras.qtools.quantized_operators import quantizer_impl
from qkeras.qtools.settings import cfg


class QuantizerFactory:
  """Convert qkeras quantizer to qtools quantizer type."""

  def __init__(self):
    self.quantizer_lookup = {
        quantizers.quantized_bits:
            quantizer_impl.QuantizedBits,
        quantizers.binary:
            quantizer_impl.Binary,
        quantizers.quantized_relu:
            quantizer_impl.QuantizedRelu,
        quantizers.ternary:
            quantizer_impl.Ternary,
        quantizers.quantized_relu_po2:
            quantizer_impl.ReluPowerOfTwo,
        quantizers.quantized_po2:
            quantizer_impl.PowerOfTwo,
        quantizers.stochastic_ternary:
            quantizer_impl.StochasticTernary,
        quantizers.stochastic_binary:
            quantizer_impl.StochasticBinary,
        quantizers.bernoulli:
            quantizer_impl.Bernoulli,
        quantizers.quantized_tanh:
            quantizer_impl.QuantizedTanh,
        quantizers.quantized_ulaw:
            quantizer_impl.QuantizedUlaw,
        # experimental_quantizers.quantized_bits_learnable_scale:
            # experimental_quantizer_impl.QuantizedBitsLearnableScale,
        # experimental_quantizers.parametric_quantizer_d_xmax:
            # experimental_quantizer_impl.ParametricQuantizer,

        # add following quantizer types for the use in GraphUpdateEdge
        quantizer_impl.QuantizedBits:
            quantizer_impl.QuantizedBits,
        quantizer_impl.Binary:
            quantizer_impl.Binary,
        quantizer_impl.QuantizedRelu:
            quantizer_impl.QuantizedRelu,
        quantizer_impl.Ternary:
            quantizer_impl.Ternary,
        quantizer_impl.ReluPowerOfTwo:
            quantizer_impl.ReluPowerOfTwo,
        quantizer_impl.PowerOfTwo:
            quantizer_impl.PowerOfTwo,
        quantizer_impl.FloatingPoint:
            quantizer_impl.FloatingPoint,
        quantizer_impl.StochasticTernary:
            quantizer_impl.StochasticTernary,
        quantizer_impl.StochasticBinary:
            quantizer_impl.StochasticTernary,
        quantizer_impl.Bernoulli:
            quantizer_impl.StochasticTernary,
        quantizer_impl.QuantizedTanh:
            quantizer_impl.StochasticTernary,
        quantizer_impl.QuantizedUlaw:
            quantizer_impl.StochasticTernary,
        # experimental_quantizer_impl.QuantizedBitsLearnableScale:
            # experimental_quantizer_impl.QuantizedBitsLearnableScale,
        #experimental_quantizer_impl.ParametricQuantizer:
            # experimental_quantizer_impl.ParametricQuantizer,
    }

    self._default_interm_quantizer = cfg.default_interm_quantizer

  def _make_quantizer_util(self, quantizer) -> quantizer_impl.IQuantizer:
    """make quantizer util function."""
    if quantizer in ["int8", "int16", "int32", "fp16", "fp32"]:
      return self.make_default_quantizer(mode=quantizer)

    elif isinstance(quantizer, tuple(self.quantizer_lookup.keys())):
      quantizer_class = self.quantizer_lookup[type(quantizer)]
      if quantizer_class == type(quantizer):
        return self.clone_quantizer(quantizer)
      else:
        q = quantizer_class()
        q.convert_qkeras_quantizer(quantizer)
        return q

    return None

  def make_quantizer(self, quantizer) -> quantizer_impl.IQuantizer:
    """create quantizer according to input qkeras quantizer."""

    q = None
    if quantizer is not None:
      q = self._make_quantizer_util(quantizer)

    if q is None:
      return self.make_default_quantizer(
          mode=self._default_interm_quantizer)

    return q

  def is_quantizer_supported(self, quantizer) -> bool:
    if quantizer is None:
      # if None, will use default quantizer defined in config.json
      return True

    return isinstance(quantizer, tuple(self.quantizer_lookup.keys()))

  def make_default_quantizer(self, mode) -> quantizer_impl.IQuantizer:
    """make quantizer given qkeras quantizer type."""
    if mode == "fp32":
      return quantizer_impl.FloatingPoint(
          bits=32)
    elif mode == "fp16":
      return quantizer_impl.FloatingPoint(
          bits=16)
    elif mode == "int8":
      qbits = quantizer_impl.QuantizedBits()
      qbits.convert_qkeras_quantizer(
          quantizers.quantized_bits(8, 0, 1))
      return qbits
    elif mode == "int16":
      qbits = quantizer_impl.QuantizedBits()
      qbits.convert_qkeras_quantizer(
          quantizers.quantized_bits(16, 7, 1))
      return qbits
    elif mode == "int32":
      qbits = quantizer_impl.QuantizedBits()
      qbits.convert_qkeras_quantizer(
          quantizers.quantized_bits(32, 10, 1))
      return qbits
    else:
      try:
        # string to quantizer object
        q_name = "quantizers." + mode
        qkeras_object = eval(q_name)  # pylint: disable=eval-used
        return self._make_quantizer_util(qkeras_object)
      except:  # pylint: disable=bare-except
        raise ValueError("unaccepted quantizer {}!".format(mode))

  def clone_quantizer(
      self, quantizer: quantizer_impl.IQuantizer) -> quantizer_impl.IQuantizer:
    """clone the given quantizer."""
    return copy.deepcopy(quantizer)


================================================
FILE: qkeras/qtools/quantized_operators/quantizer_impl.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""atomic quantizer implementation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import math

import numpy as np
from qkeras import quantizers

FLOATINGPOINT_BITS = 32


def get_np_value(val):
  if hasattr(val, "numpy"):
    val = val.numpy()
    if isinstance(val, np.ndarray) and len(val) == 1:
      return val[0]
    else:
      return val
  else:
    return val


def get_exp(quantizer):
  """get max/min exp value for relu_po2 or quantized_po2."""

  if quantizer.is_signed:
    non_sign_bits = quantizer.bits - 1
  else:
    non_sign_bits = quantizer.bits

  min_exp = -2 ** (non_sign_bits - 1)
  max_exp_orig = 2 ** (non_sign_bits - 1) - 1

  max_exp = max_exp_orig
  # max_value caps how many int_bits actually allowed
  if quantizer.max_val_po2 != -1:
    if quantizer.max_val_po2 <= 0:
      max_exp = 0
    else:
      max_exp = math.ceil(np.log2(quantizer.max_val_po2))
      max_exp = min(max_exp, max_exp_orig)

  # if max_exp<0. no need to expand int_bits
  max_exp = max(0, max_exp)

  return (-min_exp, max_exp)


class IQuantizer(abc.ABC):
  """abstract class for quantizer."""

  def __init__(self):
    self.mode = -1
    self.bits = -1
    self.int_bits = -1
    self.is_signed = 0
    self.is_floating_point = False
    self.max_val_po2 = -1
    self.is_po2 = 0
    self.name = None
    self.op_type = "quantizer"


class QuantizedBits(IQuantizer):
  """quantized bits.

  Attributes:
    mode: index of the current quantizer in
          MultiplierFactory.multiplier_impl_table
    bits: total bits
    int_bits: integer bits
    is_signed: if a signed number
    name: quantizer name
  """

  def __init__(self):
    super().__init__()
    self.mode = 0
    self.is_signed = 1
    self.name = "quantized_bits"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.quantized_bits):
    self.mode = 0
    self.bits = quantizer.bits
    self.int_bits = get_np_value(quantizer.integer)
    self.is_signed = quantizer.keep_negative

  def convert_to_qkeras_quantizer(
      self, symmetric=1, alpha=None, use_stochastic_rounding=False,
      scale_axis=None, qnoise_factor=1.0, elements_per_scale=None,
      min_po2_exponent=None, max_po2_exponent=None):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.quantized_bits(
        bits=self.bits, integer=self.int_bits, keep_negative=self.is_signed,
        symmetric=symmetric, alpha=alpha,
        use_stochastic_rounding=use_stochastic_rounding,
        scale_axis=scale_axis, qnoise_factor=qnoise_factor,
        elements_per_scale=elements_per_scale,
        min_po2_exponent=min_po2_exponent, max_po2_exponent=max_po2_exponent)


class QuantizedTanh(QuantizedBits):
  """same as quantized bits."""

  def __init__(self):
    super().__init__()
    self.name = "quantized_tanh"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.quantized_tanh):
    self.mode = 0
    self.bits = quantizer.bits
    self.is_signed = 1

  def convert_to_qkeras_quantizer(
      self, symmetric=False, use_stochastic_rounding=False):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.quantized_tanh(
        bits=self.bits, use_stochastic_rounding=use_stochastic_rounding,
        symmetric=symmetric)


class QuantizedUlaw(QuantizedBits):
  """quantized ulaw type."""

  # same as quantized bits
  def __init__(self):
    super().__init__()
    self.name = "quantized_ulaw"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.quantized_ulaw):
    self.mode = 0
    self.bits = quantizer.bits
    self.int_bits = get_np_value(quantizer.integer)
    self.is_signed = 1

  def convert_to_qkeras_quantizer(self, symmetric=0, u=255.0):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.quantized_ulaw(
        bits=self.bits, integer=self.int_bits, symmetric=symmetric, u=u)


class Binary(IQuantizer):
  """binary quantizer."""

  def __init__(self, use_01=False):
    super().__init__()
    if use_01:
      self.mode = 4
      self.is_signed = 0
    else:
      self.mode = 3
      self.is_signed = 1

    self.bits = 1
    self.int_bits = 1
    self.use_01 = use_01
    self.name = "binary"

  def convert_qkeras_quantizer(self, quantizer: quantizers.binary):
    if quantizer.use_01:
      self.mode = 4
      self.is_signed = 0
    else:
      self.mode = 3
      self.is_signed = 1

    self.use_01 = quantizer.use_01

  def convert_to_qkeras_quantizer(self, alpha=None,
                                  use_stochastic_rounding=False):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.binary(use_01=self.use_01, alpha=alpha,
                             use_stochastic_rounding=use_stochastic_rounding)


class StochasticBinary(Binary):
  """stochastic binary quantizer."""

  # same as binary(-1, 1)
  def __init__(self):
    super().__init__(use_01=False)
    self.name = "stochastic_binary"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.stochastic_binary):
    """convert qkeras quantizer to qtools quantizer."""

    pass

  def convert_to_qkeras_quantizer(self, alpha=None, temperature=6.0,
                                  use_real_sigmoid=True):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.stochastic_binary(alpha=alpha, temperature=temperature,
                                        use_real_sigmoid=use_real_sigmoid)


class Bernoulli(Binary):
  """bernoulli quantizer. same as binary(0, 1)."""

  def __init__(self):
    super().__init__(use_01=True)
    self.name = "bernoulli"

  def convert_qkeras_quantizer(self, quantizer: quantizers.bernoulli):
    pass

  def convert_to_qkeras_quantizer(self, alpha=None, temperature=6.0,
                                  use_real_sigmoid=True):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.bernoulli(alpha=alpha, temperature=temperature,
                                use_real_sigmoid=use_real_sigmoid)


class QuantizedRelu(IQuantizer):
  """quantized relu quantizer."""

  def __init__(self):
    super().__init__()
    self.is_signed = 0
    self.name = "quantized_relu"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.quantized_relu):
    """convert from qkeras quantizer."""

    bits = quantizer.bits
    int_bits = get_np_value(quantizer.integer)

    if bits == 1 and int_bits == 1:
      mode = 4
    else:
      mode = 0

    self.mode = mode
    self.bits = bits
    self.int_bits = int_bits
    if hasattr(quantizer, "negative_slope") and quantizer.negative_slope != 0:
      self.is_signed = 1

  def convert_to_qkeras_quantizer(
      self, use_sigmoid=0, negative_slope=0.0, use_stochastic_rounding=False,
      relu_upper_bound=None, is_quantized_clip=True, qnoise_factor=1.0):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.quantized_relu(
        bits=self.bits, integer=self.int_bits, use_sigmoid=use_sigmoid,
        negative_slope=negative_slope,
        use_stochastic_rounding=use_stochastic_rounding,
        relu_upper_bound=relu_upper_bound,
        is_quantized_clip=is_quantized_clip,
        qnoise_factor=qnoise_factor)


class Ternary(IQuantizer):
  """ternary(0, 1, -1)."""

  def __init__(self):
    super().__init__()
    self.mode = 2
    self.bits = 2
    self.int_bits = 2
    self.is_signed = 1
    self.name = "ternary"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.ternary):
    pass

  def convert_to_qkeras_quantizer(
      self, alpha=None, threshold=None, use_stochastic_rounding=False,
      number_of_unrolls=5):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.ternary(
        alpha=alpha, threshold=threshold,
        use_stochastic_rounding=use_stochastic_rounding,
        number_of_unrolls=number_of_unrolls)


class StochasticTernary(Ternary):
  """stochastic ternary."""

  def __init__(self):
    super().__init__()
    self.name = "stochastic_ternary"

  # same as ternary
  def convert_qkeras_quantizer(
      self, quantizer: quantizers.stochastic_ternary):
    pass

  def convert_to_qkeras_quantizer(
      self, alpha=None, threshold=None, temperature=8.0,
      use_real_sigmoid=True, number_of_unrolls=5):
    """convert qtools quantizer to qkeras quantizer."""

    return quantizers.stochastic_ternary(
        alpha=alpha, threshold=threshold, temperature=temperature,
        use_real_sigmoid=use_real_sigmoid,
        number_of_unrolls=number_of_unrolls)


class FloatingPoint(IQuantizer):
  """float32."""

  def __init__(self, bits):
    super().__init__()
    self.mode = 5
    self.bits = bits
    self.int_bits = -1
    self.is_signed = 1
    self.is_floating_point = True
    self.name = "floating_point"

  def convert_qkeras_quantizer(self, bits):
    pass

  def convert_to_qkeras_quantizer(self, bits):
    pass


class PowerOfTwo(IQuantizer):
  """po2."""

  def __init__(self, is_signed=True):
    super().__init__()
    self.mode = 1
    self.is_po2 = 1
    self.is_signed = is_signed
    self.inference_value_counts = -1

    if is_signed:
      self.name = "quantized_po2"
    else:
      self.name = "quantized_relu_po2"

  def convert_qkeras_quantizer(self, quantizer):
    """convert qkeras quantizer to qtools quantizer."""

    assert "po2" in quantizer.__class__.__name__

    if quantizer.__class__.__name__ == "quantized_po2":
      self.is_signed = 1
      self.name = "quantized_po2"

    elif quantizer.__class__.__name__ == "quantized_relu_po2":
      super().__init__()
      self.is_signed = 0
      self.name = "quantized_relu_po2"

    bits = quantizer.bits
    max_val_po2 = quantizer.max_value
    if not max_val_po2:
      self.max_val_po2 = -1
    else:
      self.max_val_po2 = max_val_po2
    self.bits = bits
    self.int_bits = bits

  def convert_to_qkeras_quantizer(
      self, negative_slope=0, use_stochastic_rounding=False,
      quadratic_approximation=False):
    """convert qtools quantizer to qkeras quantizer."""

    if self.is_signed:
      # quantized_po2
      return quantizers.quantized_po2(
          bits=self.bits,
          max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None,
          use_stochastic_rounding=use_stochastic_rounding,
          quadratic_approximation=quadratic_approximation)
    else:
      # quantized_relu_po2
      return quantizers.quantized_relu_po2(
          bits=self.bits,
          max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None,
          negative_slope=negative_slope,
          use_stochastic_rounding=use_stochastic_rounding,
          quadratic_approximation=quadratic_approximation)

  def get_min_max_exp(self):
    return get_exp(self)

  def quantizer_bits_calculator(self, val):
    """calculate how many bits needed."""

    # calculate how many bits are required to represent a po2 value.
    # val can be +/- values, can be integer or franctional number.
    # needs to be dealt seperately.

    sign_bit = val < 0

    # get rid of sign
    val = abs(val)

    if val == 0:
      # val of 0 is special case; qkeras uses mininmum
      # number to represent 0
      non_sign_bits = self.bits - sign_bit
    else:
      exp_value = np.log2(val)

      # exp_value should be integer
      if abs(np.round(exp_value) - exp_value) > 0:
        raise ValueError("ERROR: {} is not a po2 value!".format(val))

      exp_value = int(exp_value)

      # for n bits, the range of values it can represent is:
      # min_val = -2 ** (n - 1)
      # max_val = 2 ** (n - 1) - 1
      if exp_value == 0:
        non_sign_bits = 1
      elif exp_value > 0:
        # e.g., 16 needs 5 bits + 1 exp sign bit,
        # 15 needs 4 bits + 1 exp sign bit
        non_sign_bits = math.floor(np.log2(exp_value)) + 1 + 1
      else:
        # e.g., -16 needs 4 bits + 1 exp sign bit
        non_sign_bits = math.ceil(np.log2(abs(exp_value))) + 1

    return (sign_bit, non_sign_bits)

  def update_quantizer(self, val, reset=False):
    """update quantizer bits according to the input value.

    Args:
      val: input value
      reset: True->disregard current quantizer bits and reset
        it according to the given value; False-> update the quantizer
        bits with given value.
        quantizer.bits = min(existing_bits, bits required by val)

    Returns:
      Update existing po2 quantizer bits by val.
       quantizer.bits = min(existing_bits, bits required by val)
    """
    (sign_bit, non_sign_bits) = self.quantizer_bits_calculator(val)

    if reset:
      self.bits = sign_bit + non_sign_bits
    else:
      # avoid input value exceeding quantizer limit
      self.bits = min(self.bits, sign_bit + non_sign_bits)

    self.int_bits = self.bits
    self.max_val_po2 = min(val, self.max_val_po2)
    self.is_signed = sign_bit

    if sign_bit:
      self.name = "quantized_po2"
    else:
      self.name = "quantized_relu_po2"

  def update_inference_values(self, weights):
    """find how many different values in weights in the po2 quantizer."""

    inference_value_counts = len(set(weights.flatten()))
    self.inference_value_counts = inference_value_counts


class ReluPowerOfTwo(PowerOfTwo):
  """relu po2."""

  def __init__(self):
    super().__init__()
    self.mode = 1
    self.is_po2 = 1
    self.is_signed = 0
    self.name = "quantized_relu_po2"

  def convert_qkeras_quantizer(
      self, quantizer: quantizers.quantized_relu_po2):

    self.bits = quantizer.bits
    self.int_bits = quantizer.bits
    if not quantizer.max_value:
      self.max_val_po2 = -1
    else:
      self.max_val_po2 = quantizer.max_value

  def convert_to_qkeras_quantizer(
      self, negative_slope=0, use_stochastic_rounding=False,
      quadratic_approximation=False):
    """convert qtools quantizer to qkeras quantizer."""

    # quantized_relu_po2
    return quantizers.quantized_relu_po2(
        bits=self.bits,
        max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None,
        negative_slope=negative_slope,
        use_stochastic_rounding=use_stochastic_rounding,
        quadratic_approximation=quadratic_approximation)


================================================
FILE: qkeras/qtools/quantized_operators/subtractor_factory.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
""""create subtractor quantizer."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from qkeras.qtools.quantized_operators import adder_factory
from qkeras.qtools.quantized_operators import adder_impl
from qkeras.qtools.quantized_operators import quantizer_impl


class ISubtractor(adder_factory.IAdder):
  """Create a subtractor instance.

  The methods in subtractor is mostly inherited from adder
  with a few exceptions.
  """

  def make_quantizer(self, quantizer_1: quantizer_impl.IQuantizer,
                     quantizer_2: quantizer_impl.IQuantizer):
    """make an ISubtractor instance.

    if quantizer1 and quantizer2 are both non-signed, result should change
    to signed; else since a sign bit is already present,
    no need to add extra sign bit

    Args:
      quantizer_1: first operand
      quantizer_2: second operand

    Returns:
      An ISubtractor instance
    """
    quantizer = super().make_quantizer(quantizer_1, quantizer_2)

    if not isinstance(quantizer, adder_impl.FloatingPoint_Adder):
      if not quantizer_1.is_signed and not quantizer_2.is_signed:
        quantizer.output.is_signed = 1
        quantizer.output.bits += 1

    return quantizer


================================================
FILE: qkeras/qtools/run_qtools.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for running qtools and qenergy."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import numpy as np

from qkeras.qtools import generate_layer_data_type_map
from qkeras.qtools import interface
from qkeras.qtools import qgraph
from qkeras.qtools import qtools_util
from qkeras.qtools.config_public import config_settings
from qkeras.qtools.qenergy import qenergy
from qkeras.qtools.settings import cfg


class QTools:
  """integration of different qtools functions."""

  def __init__(self, model, process, source_quantizers=None,
               is_inference=False, weights_path=None,
               keras_quantizer=None, keras_accumulator=None,
               for_reference=False,
               model_weights_already_quantized=True,
               hw_weight_dict=None):

    if model is not None:
      self._model = model

    if weights_path is not None:
      self._model.load_weights(weights_path)

    cfg.update(process, config_settings)

    # if source_quantizers is None, CreateGraph will use
    # default_source_quantizers defined in cfg
    (graph, source_quantizer_list) = qgraph.CreateGraph(
        model, source_quantizers, cfg.default_source_quantizer)

    # qgraph.PrintGraph(graph)
    qgraph.GraphPropagateActivationsToEdges(graph)
    self._layer_map = generate_layer_data_type_map.generate_layer_data_type_map(
        graph, source_quantizer_list, is_inference,
        keras_quantizer, keras_accumulator, for_reference,
        model_weights_already_quantized=model_weights_already_quantized,
        hw_weight_dict=hw_weight_dict)

    self._output_dict = interface.map_to_json(self._layer_map)
    self.source_quantizer_list = source_quantizer_list

  def qtools_stats_to_json(self, json_name):
    """dump the layer stats to a json file."""

    with open(json_name, "w") as outfile:
      json.dump(self._output_dict, outfile, indent=4)

  def qtools_stats_print(self):
    """print out the layer stats."""

    dict_to_json = json.dumps(self._output_dict, indent=4)
    print(dict_to_json)

  def pe(self, weights_on_memory="dram",
         activations_on_memory="dram",
         min_sram_size=0,
         rd_wr_on_io=True,
         verbose=False):
    """energy consumption calculation."""

    assert weights_on_memory in ["dram", "sram", "fixed"]
    energy_dict = qenergy.energy_estimate(
        self._model, self._layer_map, weights_on_memory,
        activations_on_memory, min_sram_size,
        rd_wr_on_io)

    if verbose:
      print("COST:")
      dict_to_json = json.dumps(energy_dict, indent=4)
      print(dict_to_json)

    return energy_dict

  def extract_energy_sum(self, cfg_setting, energy_dict):
    """extracted energy needed in caculating sum."""

    value = 0
    for layer in energy_dict.keys():
      if layer == "total_cost":
        continue

      class_name = energy_dict[layer]["class_name"]
      keys = cfg_setting.get(class_name, cfg_setting.get("default", []))
      value += sum([energy_dict[layer]["energy"][key] for key in keys])

    return int(value)

  def extract_energy_profile(self, cfg_setting, energy_dict):
    """extract energy consumption in each layer."""

    energy = {}
    for layer in energy_dict.keys():
      if layer == "total_cost":
        continue

      class_name = energy_dict[layer]["class_name"]
      keys = cfg_setting.get(class_name, cfg_setting.get("default", []))
      energy[layer] = {}
      energy[layer]["energy"] = energy_dict[layer]["energy"]
      energy[layer]["total"] = sum(
          [energy_dict[layer]["energy"][key] for key in keys])

    return energy

  def calculate_ace(self, default_float_bits):
    """Computes ACE numbers from conv/dense layers."""

    def _get_ace(layer):
      ace = 0
      ace_float = 0
      if layer.name in self._output_dict:
        layer_item = self._output_dict[layer.name]
        # Here we only consider the number of multiplication as the
        # operation count. To include the number of
        # accumulators, we should multiply the value by 2, assuming
        # accumulation count ~= multiplication count.
        operation_count = layer_item["operation_count"]

        # Input bitwidth.
        input_quantizer_list = layer_item["input_quantizer_list"]
        input_bits = input_quantizer_list[0]["bits"]

        # Weight bitwidth
        weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer")
        if weight_quantizer:
          # Only layers such as Conv/Dense have weight_quantizers.
          w_bits = weight_quantizer["bits"]
          ace = operation_count * input_bits * w_bits
          ace_float = operation_count * default_float_bits * default_float_bits
      return (ace, ace_float)

    print("WARNING: ACE are computed from conv/dense layers only!")
    return (sum([_get_ace(l)[0] for l in self._model.layers]),
            sum([_get_ace(l)[1] for l in self._model.layers]))

  def calculate_output_bytes(self, include_model_input_size,
                             default_float_bits):
    """Computes activation layers' output size in bytes."""

    def _get_activation_size(layer):
      # Since in hardare previous conv/dense layers will be fused with
      # the following activation layers, we only consider the output of
      # Activation layers when calculating output sizes.
      if layer.__class__.__name__ in ["QActivation"]:
        layer_item = self._output_dict[layer.name]

        output_quantizer = layer_item["output_quantizer"]
        output_shape = output_quantizer["shape"]
        o_bits = output_quantizer["bits"]
        return (int(np.prod(output_shape[1:]) * o_bits / 8.0),
                int(np.prod(output_shape[1:]) * default_float_bits / 8.0))
      else:
        return (0, 0)

    output_bytes = sum([_get_activation_size(l)[0] for l in self._model.layers])
    output_bytes_float = sum([_get_activation_size(l)[1] for l in
                              self._model.layers])
    if include_model_input_size:
      # Include model input size.
      output_bytes += (np.prod(self._model.input_shape[1:])
                       * self.source_quantizer_list[0].bits / 8.0)
      output_bytes_float += (np.prod(self._model.input_shape[1:]) *
                             default_float_bits/ 8.0)

    return (output_bytes, output_bytes_float)

  def calculate_weight_bytes(self, default_float_bits):
    """Computes weight size in bytes from conv/dense layers."""

    def _get_weight_size(layer):
      weight_bytes = 0
      weight_bytes_float = 0

      if layer.name in self._output_dict:
        layer_item = self._output_dict[layer.name]
        weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer")

        if weight_quantizer:
          # Calculates kernel bytes.
          w_bits = weight_quantizer["bits"]
          weight_bytes += int(np.prod(layer.weights[0].shape) * w_bits / 8.0)
          weight_bytes_float += int(np.prod(layer.weights[0].shape) *
                                    default_float_bits / 8.0)
          # Calculates bias bytes.
          if hasattr(layer, "use_bias") and layer.use_bias:
            bias_quantizer = qtools_util.get_val(layer_item, "bias_quantizer")

            assert bias_quantizer is not None, (
                f"{layer.name} has no bias_quantizer!")
            b_bits = bias_quantizer["bits"]
            weight_bytes += int(np.prod(layer.weights[1].shape) * b_bits / 8.0)
            weight_bytes_float += int(np.prod(layer.weights[1].shape) *
                                      default_float_bits / 8.0)
      return (weight_bytes, weight_bytes_float)

    return (sum([_get_weight_size(l)[0] for l in self._model.layers]),
            sum([_get_weight_size(l)[1] for l in self._model.layers]))

  def get_roofline_numbers(self, include_model_input_size=True,
                           default_float_bits=32):
    """Extracts model numbers for roofline model analysis."""

    return {"ACE": self.calculate_ace(default_float_bits)[0],
            "weight_in_bytes": self.calculate_weight_bytes(
                default_float_bits)[0],
            "activation_in_bytes": self.calculate_output_bytes(
                include_model_input_size, default_float_bits)[0],
            "ACE_float": self.calculate_ace(
                default_float_bits)[1],
            "weight_in_bytes_float": self.calculate_weight_bytes(
                default_float_bits)[1],
            "activation_in_bytes_float": self.calculate_output_bytes(
                include_model_input_size, default_float_bits)[1]}


================================================
FILE: qkeras/qtools/settings.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""configurations."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np


class ConfigClass:
  """configuration class."""

  def __init__(self):

    self.default_source_quantizer = "quantized_bits(8, 0, 1)"
    self.default_interm_quantizer = "fp32"

    # Horowitz estimates from ISSCC 2014

    self.fpm_add = np.poly1d([0.003125, 0])
    self.fpm_mul = np.poly1d([0.002994791667, 0.001041666667, 0])
    self.fp16_add = np.poly1d([0.4])
    self.fp16_mul = np.poly1d([1.1])
    self.fp32_add = np.poly1d([0.9])
    self.fp32_mul = np.poly1d([3.7])

    self.sram_rd = np.poly1d([0.02455, -0.2656, 0.8661])
    self.dram_rd = np.poly1d([20.3125, 0])
    self.sram_mul_factor = 1/64.
    self.dram_mul_factor = 1.0

    self.include_energy = {}
    self.include_energy["default"] = ["inputs", "parameters", "op_cost"]
    self.include_energy["QActivation"] = ["outputs"]
    self.include_energy["QAdaptiveActivation"] = ["outputs"]
    self.include_energy["Activation"] = ["outputs"]
    self.include_energy["QBatchNormalization"] = ["parameters"]
    self.include_energy["BatchNormalization"] = ["parameters"]
    self.include_energy["Add"] = ["op_cost"]
    self.include_energy["Subtract"] = ["op_cost"]
    self.include_energy["MaxPooling2D"] = ["op_cost"]
    self.include_energy["default"] = ["inputs", "parameters", "op_cost"]

  def update(self, process, cfg_setting):
    """update config."""

    # pylint: disable=bare-except
    try:
      self.default_source_quantizer = cfg_setting[
          "default_source_quantizer"]
    except:
      pass

    try:
      self.default_interm_quantizer = cfg_setting[
          "default_interm_quantizer"]
    except:
      pass

    try:
      self.fpm_add = np.poly1d(cfg_setting[process]["fpm_add"])
    except:
      pass

    try:
      self.fpm_mul = np.poly1d(cfg_setting[process]["fpm_mul"])
    except:
      pass

    try:
      self.fp16_add = np.poly1d(cfg_setting[process]["fp16_add"])
    except:
      pass

    try:
      self.fp16_mul = np.poly1d(cfg_setting[process]["fp16_mul"])
    except:
      pass

    try:
      self.fp32_add = np.poly1d(cfg_setting[process]["fp32_add"])
    except:
      pass

    try:
      self.fp32_mul = np.poly1d(cfg_setting[process]["fp32_mul"])
    except:
      pass

    try:
      self.sram_rd = np.poly1d(cfg_setting[process]["sram_rd"])
    except:
      pass

    try:
      self.dram_rd = np.poly1d(cfg_setting[process]["dram_rd"])
    except:  # pylint: disable=broad-except
      pass

    try:
      for key in cfg_setting["include_energy"]:
        self.include_energy[key] = cfg_setting["include_energy"][key]
        if "Q" == key[0]:
	        # use the same rule for keras layer and qkeras layer
          self.include_energy[key[1:]] = cfg_setting["include_energy"][key]
    except:
      pass


cfg = ConfigClass()


================================================
FILE: qkeras/quantizer_imports.py
================================================
# Copyright 2025 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Imports for QKeras quantizers."""

from .quantizers import bernoulli
from .quantizers import binary
from .quantizers import quantized_bits
from .quantizers import quantized_hswish
from .quantizers import quantized_linear
from .quantizers import quantized_po2
from .quantizers import quantized_relu
from .quantizers import quantized_relu_po2
from .quantizers import quantized_sigmoid
from .quantizers import quantized_tanh
from .quantizers import quantized_ulaw
from .quantizers import stochastic_binary
from .quantizers import stochastic_ternary
from .quantizers import ternary


================================================
FILE: qkeras/quantizer_registry.py
================================================
# Copyright 2024 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Registry for QKeras quantizers."""

from . import registry

# Global registry for all QKeras quantizers.
_QUANTIZERS_REGISTRY = registry.Registry()


def register_quantizer(quantizer):
  """Decorator for registering a quantizer."""
  _QUANTIZERS_REGISTRY.register(quantizer)
  # Return the quantizer after registering. This ensures any registered
  # quantizer class is properly defined.
  return quantizer


def lookup_quantizer(name):
  """Retrieves a quantizer from the quantizers registry."""
  return _QUANTIZERS_REGISTRY.lookup(name)


================================================
FILE: qkeras/quantizers.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import re
from typing import Any, List, Tuple, cast

import numpy as np
import six
from six.moves import range
import tensorflow.compat.v2 as tf
from tensorflow.keras import initializers
import tensorflow.keras.backend as K
from tensorflow.keras.utils import deserialize_keras_object

from . import base_quantizer
from . import quantizer_registry
# from .google_internals.experimental_quantizers import parametric_quantizer_d_xmax
# from .google_internals.experimental_quantizers import quantized_bits_learnable_scale
from .safe_eval import safe_eval
from tensorflow.python.framework import smart_cond as tf_utils

#
# Library of auxiliary functions
#


def get_weight_scale(quantizer, x=None):
  """Gets the scales of weights for (stochastic_)binary and ternary quantizers.

  Arguments:
    quantizer: A binary or teneray quantizer class.
    x: A weight tensor.  We keep it here for now for backward compatibility.

  Returns:
    Weight scale per channel for binary and ternary
    quantizers with auto or auto_po2 alpha/threshold.
  """
  if hasattr(quantizer, "scale") and quantizer.scale is not None:
    return K.eval(quantizer.scale)
  return 1.0


def _get_integer_bits(min_value,
                      max_value,
                      bits=8,
                      symmetric=False,
                      keep_negative=False,
                      is_clipping=True):
  """Estimates the integer bit(number of bits to the left of the binary point)
  satisfying the input argument constraints.

  Args:
    min_value: A tensor object. Its elements are in float representing the
      minimum values of ranges.
    max_value: A tensor object. Its elements are in float representing the
      maximum values of ranges.
    bits: number of bits to perform quantization.
    symmetric: boolean type. if true, it enforces negative and positive ranges
      to be symmetric.
    keep_negative: boolean type. if true, we do not clip negative numbers.
    is_clipping: boolean type. if true, the min_value and max_value are clipped
      to nearest powers-of-2.

  Returns:
    integer_bits : number of bits to the left of the binary point.
  """
  # Max the min and max values positive if only using positive values
  if not keep_negative:
    min_value = K.maximum(min_value, 0)
    max_value = K.maximum(max_value, 0)

  # The number of bits excluding the sign bit
  unsigned_bits = bits - keep_negative

  # log2 of absolute min_value and max_value
  min_value_log2 = K.log(K.abs(min_value)) / np.log(2.0)
  max_value_log2 = K.log(K.abs(max_value)) / np.log(2.0)

  # Estimate integer_bits
  if is_clipping:
    min_int_bits = tf.math.round(
        tf.where(min_value_log2 > 0, min_value_log2, 0))
    max_int_bits = tf.math.round(
        tf.where(max_value_log2 > 0, max_value_log2, 0))
  else:
    min_int_bits = tf.math.ceil(tf.where(min_value_log2 > 0, min_value_log2, 0))
    max_int_bits = tf.math.ceil(tf.where(max_value_log2 > 0, max_value_log2, 0))
    # Checks max_value is bounded by the maximum positive value of
    # pow(2,integer_bits) - pow(2,-fractional_bits).
    max_value_po2 = pow(2.0, max_int_bits) - pow(
        2.0, K.minimum(max_int_bits - unsigned_bits, 0))
    max_int_bits = tf.where(max_value <= max_value_po2, max_int_bits,
                            max_int_bits + 1)
    if symmetric:
      # Checks min_value is bounded by the minimum negative value of
      # - pow(2,integer_bits) + pow(2,-fractional_bits).
      min_value_po2 = -pow(2.0, min_int_bits) + pow(
          2.0, K.minimum(min_int_bits - unsigned_bits, 0))
      min_int_bits = tf.where(min_value_po2 <= min_value, min_int_bits,
                              min_int_bits + 1)

  # To cover both negative and positive ranges with integer_bits.
  # (For keep_negative=False, min_int_bits is 0.)
  integer_bits = tf.cast(K.maximum(min_int_bits, max_int_bits), dtype=tf.int32)
  # It assumes that integer_bits cannot be greater than unsigned_bits
  integer_bits = K.minimum(unsigned_bits, integer_bits)

  return integer_bits


def _get_scaling_axis(scale_axis: Any, len_axis: int) -> List[int]:
  """Get the axis/axes to perform auto scaling at.

  Args:
    scale_axis: int or List[int] representing which axis/axes to calculate
     scale at.
    len_axis: int representing the shape of the tensor on which scaling is
      performed.

  Returns:
    List[int] representing the scaling axes.

  """

  if scale_axis is not None:
    # if scale_axis is set, scale over all axis except the scale_axis.
    if isinstance(scale_axis, list):
      axis = [i for i in range(len_axis) if i not in scale_axis]
    else:
      axis = tf.range(scale_axis)
      axis = tf.concat([axis, tf.range(scale_axis + 1, len_axis)], axis=0)
  else:
    # if scale_axis is not set, scale over all axis except the channel axis.
    if K.image_data_format() == "channels_last":
      axis = tf.range(tf.math.maximum(len_axis - 1, 0))
    else:
      axis = tf.range(1, len_axis)
  return axis


def _get_unrolled_shape(input_shape: List[int], unroll_factor: Any,
                        unroll_axis: Any) -> Tuple[List[int], Any]:
  """Gets the shape of the unrolled tensor given unroll_factor and unroll_axis.

  Both unroll_factor and unroll_axis can either be ints or List[int]. If they
  are List[int], their lengths must match, and their values represent every
  unroll axis and its corresponding unroll factor.

  Examples:
  1. If input_shape = [16, 32], the unroll_factor = 4, and unroll_axis = 1. This
     means that axis 1 of the input should be unrolled by a factor of 4. This
     function would return a tuple; the first element represents the unrolled
     shape [16, 8, 4], and the second element represents the updated unroll axis
     in the unrolled shape which, in this case, is still 1.
  2. If input_shape = [16, 32], the unroll_factor = [2, 4], and unroll_axis =
     [0, 1]. This means that axis 0 of the input should be unrolled by a factor
     of 2, while axis 1 of the input should be unrolled by a factor of 4. This
     function would return a tuple; the first element represents the unrolled
     shape [4, 2, 8, 4], and the second element represents the updated unroll
     axis in the unrolled shape which, in this case, will be [0, 2].

  Args:
    input_shape: List[int]. The shape of the input tensor to be unrolled.
    unroll_factor: int or List[int] representing the unrolling factor(s) across
      various dimensions of the input tensors. If a list is used, its length has
      to match unroll_axis.
    unroll_axis: int or List[int] representing which axis/axes to unroll. If
      a list is used, its length has to match unroll_factor.

  Returns:
    Tuple of (List of ints representing the shape of the unrolled tensor,
    Int or List[int] representing updated scale_axis after unrolling.
  """
  def _unroll_one_axis(shape, factor, axis):
    shape[axis] = shape[axis] // factor
    shape.insert(axis + 1, factor)

  unrolled_shape = input_shape.copy()

  if isinstance(unroll_factor, int) and isinstance(unroll_axis, int):
    unrolled_scale_axis = unroll_axis
    _unroll_one_axis(unrolled_shape, unroll_factor, unroll_axis)

  elif isinstance(unroll_factor, list) and isinstance(unroll_axis, list):
    # axis_shift shifts the pre-defined axis every time we add a new
    # unrolled axis
    assert len(unroll_axis) == len(unroll_factor), (
        "unroll_axis and unroll_factor must have the same length")

    unrolled_scale_axis = unroll_axis.copy()
    axis_shift = 0
    for idx, (axis, factor) in enumerate(zip(unroll_axis, unroll_factor)):
      unrolled_scale_axis[idx] += axis_shift
      _unroll_one_axis(unrolled_shape, factor, axis+axis_shift)
      axis_shift += 1
  else:
    raise ValueError(
        "Both unroll_factor and unroll_axis has to be either ints or lists"
    )
  return unrolled_shape, unrolled_scale_axis


def _get_rolled_back_shape(input_shape: List[int], roll_axis: Any) -> List[int]:
  """Gets the shape of the rolled back tensor given roll_axis.

  If roll_axis is an int, the input shape will be rolled back once along the
  roll_axis. If roll_axis is List[int], the input shape will be rolled back
  len(roll_axis) times.

  Examples:
  1. If input_shape = [4, 2, 8, 4] and roll_axis = 1. This means that the axis
     following axis 1 will be rolled back to axis 1. This function would return
     a the rolled back shape which is [4, 16, 4] in this case.
  2. If input_shape = [4, 2, 8, 4] and roll_axis = [0, 2]. This means that the
     axis following axis 0 will be rolled back to axis 0, and the axis following
     axis 2 will be rolled back to axis 2. This function would return the rolled
     back shape which is [16, 32] in this case.

  Args:
    input_shape: List[int]. The shape of the input tensor to be rolled back.
    roll_axis: int or List[int] representing which axis/axes of the tensor to
      roll back.

  Returns:
    List of ints representing the shape of the rolled back tensor.
  """
  def _roll_back_one_axis(shape, axis):
    shape[axis] *= shape[axis+1]
    shape.pop(axis + 1)

  rolled_shape = input_shape.copy()

  if isinstance(roll_axis, int):
    _roll_back_one_axis(rolled_shape, roll_axis)

  elif isinstance(roll_axis, list):
    # axis_shift shifts the pre-defined axis every time we roll back an axis.
    axis_shift = 0
    for axis in roll_axis:
      _roll_back_one_axis(rolled_shape, axis+axis_shift)
      axis_shift -= 1

  return rolled_shape


def _validate_axis_and_eps(x_shape: List[int], scale_axis: Any,
                           elements_per_scale: Any) -> Tuple[Any, Any]:
  """Validates scale_axis and elements_per_scale.

  This function verifies that the values for scale_axis and elements_per_scale
  are valid and perform any required transformations returning a Tuple of
  verified (scale_axis, elements_per_scale)

  This fuction accepts scale_axis and elements_per_scale to be either ints or
  list of ints, so it verifies 4 different scenarios:
  1. If both scale_axis and elements_per_scale are ints. The function verifies
     that the x_shape is divisible by elements_per_scale at the scale_axis.
  2. If scale_axis is an int while elements_per_scale is a list. The function
     raises an error since this is an ambigious state.
  3. If scale_axis is a list and elements_per_scale is an int. The function
     modifies elements_per_scale to a list of length scale_axis, and it verifies
     that the x_shape is divisible by the elements_per_scale at the
     corresponding scale_axis.
  4. If scale_axis is a list and elements_per_scale is a list. The function
     verifies that the length of the two lists match, and that the x_shape is
     divisible by the corresponding elements_per_scale at the corresponding
     scale_axis.

  Examples:
  - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=4 --> Valid
  - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=3 --> Invalid
  - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=[2, 4]
    --> Invalid
  - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=2
    --> Valid
  - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=[2, 4]
    --> Valid
  - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=[1, 2, 4]
    --> Invalid

  Args:
    x_shape: List[int] representing the shape of the input tensor.
    scale_axis: Int or List[int] representing the axis/axes to perform auto
      scaling at.
    elements_per_scale: Int or List[int] representing the number of
     elements/values associated with every scale along the corresponding
     scale_axis.

  Returns:
    A Tuple of verified (scale_axis, elements_per_scale).
  """

  assert (
      scale_axis is not None
  ), "scale_axis must be set if elements_per_scale is used."

  # if both are ints
  if isinstance(scale_axis, int) and isinstance(elements_per_scale, int):
    assert x_shape[scale_axis] % elements_per_scale == 0, (
        f"scaling axis of dimension {x_shape[scale_axis]} has to be divisible "
        f"by thenumber of elements per scale, given {elements_per_scale}."
    )

  # if scale_axis is int and elements_per_scale is a list of ints
  elif isinstance(scale_axis, int) and isinstance(elements_per_scale, list):
    raise ValueError(
        f"scale_axis is an integer {scale_axis}, "
        f"while {elements_per_scale} is a list of values which is ambigious."
    )

  # if scale_axis is list of ints and elements_per_scale is an int
  elif isinstance(scale_axis, list) and isinstance(elements_per_scale, int):
    for axis in scale_axis:
      assert x_shape[axis] % elements_per_scale == 0, (
          f"scaling axis of dimension {x_shape[axis]} has to be divisible by "
          f"number of elements per scale, given {elements_per_scale}."
      )
    # duplicate the elements_per_scale to match length of scale_axis
    elements_per_scale = [elements_per_scale] * len(scale_axis)

  # if both scale_axis and elements_per_scale are lists
  else:
    assert len(scale_axis) == len(
        elements_per_scale
    ), (f"both scale_axis and elements_per_scale lists must match in length; "
        f"Got {len(scale_axis)} and {len(elements_per_scale)}")
    for axis, eps in zip(scale_axis, elements_per_scale):
      assert x_shape[axis] % eps == 0, (
          f"scaling axis of dimension {x_shape[axis]} has to be divisible by"
          f" the corresponding number of elements per scale, given {eps}."
      )

  assert (
      isinstance(scale_axis, int) and isinstance(elements_per_scale, int)
  ) or (isinstance(scale_axis, list) and isinstance(elements_per_scale, list))

  return scale_axis, elements_per_scale


def _repeat_along_axis(x: tf.Tensor, axis: int, repeats: int) -> tf.Tensor:
  """Repeats the elements in a tensor along the specified axis."""
  return tf.repeat(x, repeats=repeats, axis=axis)


def _repeat_along_axes(x: tf.Tensor, axis: Any, repeats: Any) -> tf.Tensor:
  """Repeats the elements in a tensor along the specified axes."""
  if isinstance(axis, int) and isinstance(repeats, int):
    x = _repeat_along_axis(x, axis, repeats)
  elif isinstance(axis, list) and isinstance(repeats, list):
    for a, r in zip(axis, repeats):
      x = _repeat_along_axis(x, axis=a, repeats=r)
  return x


def _get_scale_mean(
    scale_axis: Any, x: tf.Tensor, q: tf.Tensor, elements_per_scale: Any
):
  """Gets the mean of the tensor along the specified scaling axis/axes.

  Args:
    scale_axis: int or List[int] representing which axis/axes to calculate
     scale at.
    x: A tensor object. Its elements are in float.
    q: A tensor object. Its elements are in quantized format of x.
    elements_per_scale: if set to an int or List[int], we create multiple scales
      per axis across scale_axis, where 'elements_per_scale' represents the
      number of elements/values associated with every separate scale value.

  Returns:
    A tuple of two tensors representing the mean of x and its quantized format
    along the specified scaling axis/axes.
  """
  if elements_per_scale is not None:
    # Get the input shape
    x_shape = x.shape.as_list()

    scale_axis, elements_per_scale = _validate_axis_and_eps(
        x_shape, scale_axis, elements_per_scale)

    # get the shape of unrolled tensors x and q
    unrolled_shape, unrolled_scale_axis = _get_unrolled_shape(
        x_shape, elements_per_scale, scale_axis)

    # Unroll x and q
    x1 = tf.reshape(x, unrolled_shape)
    q1 = tf.reshape(q, unrolled_shape)

    # Get the mean along the unroll axis/axes
    axes_of_mean = _get_scaling_axis(unrolled_scale_axis, len(unrolled_shape))
    qx = K.mean(tf.math.multiply(x1, q1), axis=axes_of_mean, keepdims=True)
    qq = K.mean(tf.math.multiply(q1, q1), axis=axes_of_mean, keepdims=True)

    # Reshape qx and qq to be divisible by the input shape.
    # To achieve this, qx and qq are first rolled back along unroll axis.
    # Then, the values along the scale_axis are repeated "elements_per_scale"
    # times to match the original shape.
    rolled_back_shape = _get_rolled_back_shape(qx.shape.as_list(),
                                               roll_axis=unrolled_scale_axis)

    qx = tf.reshape(qx, rolled_back_shape)
    qx = _repeat_along_axes(qx, repeats=elements_per_scale, axis=scale_axis)

    qq = tf.reshape(qq, rolled_back_shape)
    qq = _repeat_along_axes(qq, repeats=elements_per_scale, axis=scale_axis)
  else:
    len_axis = len(x.shape)
    axis = _get_scaling_axis(scale_axis, len_axis)
    qx = K.mean(tf.math.multiply(x, q), axis=axis, keepdims=True)
    qq = K.mean(tf.math.multiply(q, q), axis=axis, keepdims=True)
  return qx, qq


def _clip_po2_scale(scale: tf.Tensor, min_po2_exponent: Any,
                    max_po2_exponent: Any):
  """Clip power-of-two scales given minimum and maximum po2 exponenets."""

  min_po2 = None if min_po2_exponent is None else 2**min_po2_exponent
  max_po2 = None if max_po2_exponent is None else 2**max_po2_exponent
  scale = K.clip(scale, min_value=min_po2, max_value=max_po2)
  return scale


def _get_least_squares_scale(
  alpha: Any, x: tf.Tensor, q: tf.Tensor, scale_axis: Any = None,
  per_channel_scale: bool = True, elements_per_scale: Any = None,
  min_po2_exponent: Any = None, max_po2_exponent: Any = None):
  """Gets scaling factor for scaling the tensor per channel.

  It uses the least squares method to find the scaling factor.

  (https://en.wikipedia.org/wiki/Linear_least_squares)

  Arguments:
    alpha: A float or string. When it is string, it should be either "auto" or
      "auto_po2", and scale = sum(x * q, axis=all but last) / sum(q * q,
      axis=all but last)
    x: A tensor object. Its elements are in float.
    q: A tensor object. Its elements are in quantized format of x.
    scale_axis: int or List[int] representing which axis/axes to calculate
     scale from.
    per_channel_scale: A bool. Whether to perform per-channel scaling or not.
    elements_per_scale: if set to an int or List[int], we create multiple scales
      per axis across scale_axis, where 'elements_per_scale' represents the
      number of elements/values associated with every separate scale value.
    min_po2_exponent: if set while using "auto_po2", it represents the minimum
      allowed power of two exponent.
    max_po2_exponent: if set while using "auto_po2", it represents the maximum
      allowed power of two exponent.

  Returns:
    A scaling factor tensor or scalar for scaling tensor per channel.
  """

  if isinstance(alpha, six.string_types) and "auto" in alpha:
    assert alpha in ["auto", "auto_po2"]
    # in different tensorflow version (e.g., 2.4)
    # x.shape is a tuple which doesn't have as_list() method
    try:
      x_shape = x.shape.as_list()
    except AttributeError:
      x_shape = list(x.shape)

    len_axis = len(x_shape)
    if not per_channel_scale:
      qx = K.mean(x * q, keepdims=True)
      qq = K.mean(q * q, keepdims=True)
    else:
      if len_axis > 1:
        qx, qq = _get_scale_mean(scale_axis, x, q, elements_per_scale)
      else:
        # No summing (averaging) along the channel axis to get per-channel
        # scales.
        qx = x * q
        qq = q * q

    scale = qx / (qq + K.epsilon())
    if alpha == "auto_po2":
      scale = K.pow(2.0,
                    tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)))

      if min_po2_exponent is not None or max_po2_exponent is not None:
        scale = _clip_po2_scale(scale, min_po2_exponent, max_po2_exponent)

  elif alpha is None:
    scale = 1.0
  elif isinstance(alpha, np.ndarray):
    scale = alpha
  else:
    scale = float(alpha)
  return scale

def _get_scale(*args, **kwargs):
  """Old name for _get_least_squares_scale. Kept for backwards compatibility."""
  return _get_least_squares_scale(*args, **kwargs)

def smooth_sigmoid(x):
  """Implements a linear approximation of a sigmoid function."""

  # if we use 2.65 as the clipping point, MSE w.r.t. original sigmoid is
  # smaller than hard_simoid but the arithmetic for it is (x >> 3) +
  # (x >> 4) + 0.5, which is also not bad.

  return tf.keras.backend.clip(0.1875 * x + 0.5, 0.0, 1.0)


def hard_sigmoid(x):
  """Computes hard_sigmoid function that saturates between 0 and 1."""

  return tf.keras.backend.clip(0.5 * x + 0.5, 0.0, 1.0)


def binary_sigmoid(x):
  """Computes binary_sigmoid."""

  return _round_through(hard_sigmoid(x))


# we use a version of approximated sigmoid everywhere in this code.
# we can set it to hard_sigmoid(x) or smooth_sigmoid(x).

_default_sigmoid_type = "hard"
_sigmoid = None


def set_internal_sigmoid(mode):
  """Sets _sigmoid to either real, hard or smooth."""

  global _sigmoid

  if mode not in ["real", "hard", "smooth"]:
    raise ValueError("mode has to be 'real', 'hard' or 'smooth'.")

  if mode == "hard":
    _sigmoid = hard_sigmoid
  elif mode == "smooth":
    _sigmoid = smooth_sigmoid
  elif mode == "real":
    _sigmoid = tf.keras.backend.sigmoid


set_internal_sigmoid(_default_sigmoid_type)


def binary_tanh(x):
  """Computes binary_tanh function that outputs -1 and 1."""
  return 2.0 * binary_sigmoid(x) - 1.0


def hard_tanh(x):
  """Computes hard_tanh function that saturates between -1 and 1."""
  return 2.0 * hard_sigmoid(x) - 1.0


def smooth_tanh(x):
  """Computes smooth_tanh function that saturates between -1 and 1."""
  return 2.0 * smooth_sigmoid(x) - 1.0


def stochastic_round(x, precision=0.5):
  """Performs stochastic rounding to the first decimal point."""
  scale = 1.0 / precision
  scale_x = x * scale
  fraction = scale_x - tf.floor(scale_x)

  result = tf.where(fraction < tf.random.uniform(tf.shape(x)),
                    tf.math.floor(scale_x), tf.math.ceil(scale_x))
  return result / scale


def stochastic_round_po2(x):
  """Performs stochastic rounding for the power of two."""
  # TODO(b/237832905): test stochastic_round_po2 and constraint.
  # because quantizer is applied after constraint.
  y = tf.abs(x)
  eps = tf.keras.backend.epsilon()
  log2 = tf.keras.backend.log(2.0)

  x_log2 = tf.round(tf.keras.backend.log(y + eps) / log2)
  po2 = tf.cast(pow(2.0, tf.cast(x_log2, dtype="float32")), dtype="float32")
  left_val = tf.where(po2 > y, x_log2 - 1, x_log2)
  right_val = tf.where(po2 > y, x_log2, x_log2 + 1)
  # sampling in [2**left_val, 2**right_val].
  minval = 2 ** left_val
  maxval = 2 ** right_val
  val = tf.random.uniform(tf.shape(y), minval=minval, maxval=maxval)
  # use y as a threshold to keep the probabliy [2**left_val, y, 2**right_val]
  # so that the mean value of the sample should be y
  x_po2 = tf.where(y < val, left_val, right_val)
  """
  x_log2 = stochastic_round(tf.keras.backend.log(y + eps) / log2)
  sign = tf.sign(x)
  po2 = (
      tf.sign(x) *
      tf.cast(pow(2.0, tf.cast(x_log2, dtype="float32")), dtype="float32")
  )
  """
  return x_po2


def _round_through(x, use_stochastic_rounding=False, precision=0.5):
  """Rounds x but using straight through estimator.

  We use the trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182).

  Straight through estimator is a biased estimator for the rounding
  operation defined by Hinton"s Coursera Lecture 9c where dL/dx is made
  equal to dL/dy for y = f(x) during gradient computation, where f(x) is
  a non-derivable function. In that case, we assume df/dx = 1 in:

  dL   dL df   dL
  -- = -- -- = --
  dx   df dx   dy

  (https://www.youtube.com/watch?v=LN0xtUuJsEI&list=PLoRl3Ht4JOcdU872GhiYWf6jwrk_SNhz9&index=41)

  Arguments:
    x: tensor to perform round operation with straight through gradient.
    use_stochastic_rounding: if true, we perform stochastic rounding.
    precision: by default we will use 0.5 as precision, but that can overriden
      by the user.

  Returns:
    Rounded tensor.
  """
  if use_stochastic_rounding:
    output = tf_utils.smart_cond(
        K.learning_phase(),
        lambda: x + tf.stop_gradient(-x + stochastic_round(x, precision)),
        lambda: x + tf.stop_gradient(-x + tf.round(x)))
  else:
    output = x + tf.stop_gradient(-x + tf.round(x))
  return output


def _sign_through(x):
  """Computes the sign operation using the straight through estimator."""

  # tf.sign generates -1, 0 or +1, so it should not be used when we attempt
  # to generate -1 and +1.

  k_sign = tf.sign(x)

  return x + tf.stop_gradient(-x + k_sign)


def _ceil_through(x):
  """Computes the ceiling operation using straight through estimator."""

  return x + tf.stop_gradient(-x + tf.ceil(x))


def _floor_through(x):
  """Computes the floor operation using straight through estimator."""

  return x + tf.stop_gradient(-x + tf.floor(x))

#
# Activation functions for quantized networks.
#
# Please note some of these functions can be used as well
# as quantizer functions for weights of dense and convolutional
# layers.
#


@quantizer_registry.register_quantizer
class quantized_linear(base_quantizer.BaseQuantizer):
  """Linear quantization with fixed number of bits.

  This quantizer maps inputs to the nearest value of a fixed number of
  outputs that are evenly spaced, with possible scaling and stochastic
  rounding. This is an updated version of the legacy quantized_bits.

  The core computation is:
    1. Divide the tensor by a quantization scale
    2. Clip the tensor to a specified range
    3. Round to the nearest integer
    4. Multiply the rounded result by the quantization scale

  This clip range is determined by
    - The number of bits we have to represent the number
    - Whether we want to have a symmetric range or not
    - Whether we want to keep negative numbers or not

    The quantization scale is defined by either the quantizer parameters or the
    data passed to the __call__ method. See documentation for the `alpha`
    parameter to find out more.

    For backprop purposes, the quantizer uses the straight-through estimator
    for the rounding step (https://arxiv.org/pdf/1903.05662.pdf). Thus the
    gradient of the __call__ method is 1 on the interval
    [quantization_scale * clip_min, quantization_scale * clip_max] and 0
    elsewhere.

  The quantizer also supports a number of other optional features:
  - Stochastic rounding (see the `stochastic_rounding` parameter)
  - Quantization noise (see the `qnoise_factor` parameter)

  Notes on the various "scales" in quantized_linear:

      - The quantization scale is the scale used in the core computation (see
        above). You can access it via the `quantization_scale` attribute.
      - The data type scale is the scale is determined by the type of data
        stored on hardware on a small device running a true quantized model.
        It is the quantization scale needed to represent `bits` bits, `integer`
        of which are integer bits, and one bit is reserved for the sign if
        `keep_negative` is True. It can be calculated as
        2 ** (integer - bits + keep_negative). You can access it via the
        `data_type_scale` attribute.
      - The `scale` attribute stores the quotient of the quantization scale and
        the data type scale. This is also the scale that can be directly
        specified by the user, via the `alpha` parameter.

    These three quantities are related by the equation
    scale = quantization_scale / data_type_scale.

    See the diagram below of scale usage in a quantized conv layer.

    +------------------------------------------------------------------------+
    |     data_type_scale        --------------->     stored_weights         |
    | (determines decimal point)                            |                |
    |                                                       V                |
    |                                                    conv op             |
    |                                                       |                |
    |                                                       V                |
    |                                                  accumulator           |
    |                                                       |                |
    |  determines quantization                              V                |
    |    range and precision     --------------->   quantization_scale       |
    |       (per channel)                                   |                |
    |                                                       V                |
    |                                                   activation           |
    +------------------------------------------------------------------------+

      # TODO: The only fundamentally necessary scale is the quantization scale.
      # We should consider removing the data type scale and scale attributes,
      # but know that this will require rewriting much of how qtools and HLS4ML
      # use these scale attributes.

    Note on binary quantization (bits=1):
      The core computation is modified here when `keep_negative` is True to
      perform a scaled sign function. This is needed because the core
      computation as defined above requires that 0 be mapped to 0, which does
      not allow us to keep both positive and negative outputs for binary
      quantization. Special shifting operations are used to achieve this.

    Example usage:

  # 8-bit quantization with 3 integer bits
  >>> q = quantized_linear(8, 3)
  >>> x = tf.constant([0.0, 0.5, 1.0, 1.5, 2.0])
  >>> q(x).numpy()
  array([0., 0., 1., 2., 2.], dtype=float32)

  # 2-bit quantization with "auto" and tensor alphas
  >>> q_auto = quantized_linear(2, alpha="auto")
  >>> x = tf.constant([0.0, 0.5, 1.0, 1.5, 2.0])
  >>> q_auto(x).numpy()
  array([0., 0., 0., 2., 2.], dtype=float32)
  >>> q_auto.scale.numpy()
  array([4.], dtype=float32)
  >>> q_auto.quantization_scale.numpy()
  array([2.], dtype=float32)
  >>> q_fixed = quantized_linear(2, alpha=q_auto.scale)
  >>> q_fixed(x)
  array([0., 0., 0., 2., 2.], dtype=float32)

    Args:
      bits (int): Number of bits to represent the number. Defaults to 8.
      integer (int): Number of bits to the left of the decimal point, used for
        data_type_scale. Defaults to 0.
      symmetric (bool): If true, we will have the same number of values
        for positive and negative numbers. Defaults to True.
      alpha (str, Tensor, None): Instructions for determining the quantization
        scale. Defaults to None.
        - If None: the quantization scale is the data type scale, determined
          by `integer`, `bits`, and `keep_negative`.
        - If "auto", the quantization scale is calculated as the minimum
          floating point scale per-channel that does not clip the max of x.
        - If "auto_po2", the quantization scale is chosen as the
          power of two per-channel that minimizes squared error between the
          quantized x and the original x.
        - If Tensor: The quantization scale is the Tensor passed in
          multiplied by the data type scale.
      keep_negative (bool): If false, we clip negative numbers. Defaults to
        True.
      use_stochastic_rounding (bool): If true, we perform stochastic rounding
        (https://arxiv.org/pdf/1502.02551.pdf).
      scale_axis (int, None): Which axis to calculate scale from. If None, we
        perform per-channel scaling based off of the image data format. Note
        that each entry of a rank-1 tensor is considered its own channel by
        default. See `_get_scaling_axis` for more details. Defaults to None.
      qnoise_factor (float): A scalar from 0 to 1 that represents the level of
        quantization noise to add. This controls the amount of the
        quantization noise to add to the outputs by changing the weighted
        sum of (1 - qnoise_factor) * unquantized_x + qnoise_factor *
        quantized_x. Defaults to 1.0, which means that the result is fully
        quantized.
      use_variables (bool): If true, we use tf.Variables to store certain
        parameters. See the BaseQuantizer implementation for more details.
        Defaults to False. If set to True, be sure to use the special attribute
        update methods detailed in the BaseQuantizer.
      var_name (str or None): A variable name shared between the tf.Variables
        created in on initialization, if use_variables is true. If None, the
        variable names are generated automatically based on the parameter names
        along with a uid. Defaults to None.

  Returns:
    function: Function that computes linear quantization.

  Raises:
    ValueError:
      - If `bits` is not positive, or is too small to represent `integer`.
      - If `integer` is negative.
      - If `alpha` is a string but not one of ("auto", "auto_po2").
  """

  # string options for alpha parameter
  ALPHA_STRING_OPTIONS = ("auto", "auto_po2")

  def __init__(
      self,
      bits=8,
      integer=0,
      symmetric=1,
      keep_negative=True,
      alpha=None,
      use_stochastic_rounding=False,
      scale_axis=None,
      qnoise_factor=1.0,
      var_name=None,
      use_variables=False,
  ):
    super().__init__()

    self.var_name = var_name

    # Error checking
    self._check_bits(bits)
    self._check_alpha(alpha)

    # Set non-modifyable attributes
    self._bits = bits
    self._integer = integer
    self._keep_negative = keep_negative
    self._use_stochastic_rounding = use_stochastic_rounding
    self._scale_axis = scale_axis
    self._use_variables = use_variables

    # Set modifyable attributes
    self.alpha = alpha
    self.qnoise_factor = qnoise_factor
    self.symmetric = symmetric

    # Set default quantization scale
    self.quantization_scale = self.default_quantization_scale

  def _check_bits(self, bits):
    """Error checking for bits parameter"""
    err_msg = f"Bit count {bits} must be positive"
    if bits <= 0:
      raise ValueError(err_msg)

  def _check_alpha(self, alpha):
    """Error checking for alpha parameter"""

    if isinstance(alpha, six.string_types):
      # Check the quantizer has been given a valid alpha string
      if not alpha in self.ALPHA_STRING_OPTIONS:
        raise ValueError(
            f"Invalid alpha '{alpha}' for auto alpha computation. "
            f"Must be one of {self.ALPHA_STRING_OPTIONS}")
    elif alpha is not None: # alpha is a tensor
      try:
        # any allowable array type can be cast as a numpy array
        np.array(alpha)
      except TypeError:
        raise TypeError(
            f"alpha must be, a string, an array, or None, not {type(alpha)}")

  @property
  def bits(self):
    return self._bits

  @property
  def integer(self):
    return self._integer

  @property
  def keep_negative(self):
    return self._keep_negative

  @property
  def use_stochastic_rounding(self):
    return self._use_stochastic_rounding

  @property
  def scale_axis(self):
    return self._scale_axis

  @property
  def use_variables(self):
    return self._use_variables

  @property
  def scale(self):
    return self.quantization_scale / self.data_type_scale

  @property
  def data_type_scale(self):
    """Quantization scale for the data type"""
    # integer is sometimes cast as int32, so cast to float32 to avoid errors
    integer = tf.cast(self.integer, tf.float32)
    return K.pow(2.0, integer - self.bits + self.keep_negative)

  @property
  def auto_alpha(self):
    """Returns true if using a data-dependent alpha"""

    return isinstance(self.alpha, six.string_types)

  @property
  def use_sign_function(self):
    """Return true if using sign function for quantization"""

    return (self.bits == 1.0) and self.keep_negative

  @property
  def default_quantization_scale(self):
    """Calculate and set quantization_scale default"""

    # Set default quantization scale
    quantization_scale = self.data_type_scale

    # Quantization scale given by alpha
    if self.alpha is not None and not self.auto_alpha:
      quantization_scale = self.alpha * self.data_type_scale

    return quantization_scale

  def get_clip_bounds(self):
    """Get bounds of clip range"""

    if self.use_sign_function:
      clip_min = K.cast_to_floatx(-0.5)
      clip_max = K.cast_to_floatx(0.5)
    else:
      unsigned_bits_po2 = K.pow(2.0, self.bits - self.keep_negative)
      # if symmetric, clip_min is negative of clip_max. Otherwise clip_min is
      # lowered by 1, giving us one more representable number
      clip_min = self.keep_negative * (-unsigned_bits_po2 + self.symmetric)
      clip_max = unsigned_bits_po2 - K.cast_to_floatx(1.0)

    return clip_min, clip_max

  def __call__(self, x):
    """Core quantization function"""

    # Build if not already built
    self._build()

    # Data type conversion
    x = K.cast_to_floatx(x)
    shape = x.shape

    if self.auto_alpha:
      # get data-dependent quantization scale
      quantization_scale = self._get_auto_quantization_scale(x)
    else:
      # quantization scale determined by quantizer params, not data
      # see default_quantization_scale property for more info
      quantization_scale = self.quantization_scale

    scaled_xq = self._scale_clip_and_round(x, quantization_scale)
    xq = scaled_xq * quantization_scale

    res = x + self.qnoise_factor * (xq - x)
    res.set_shape(shape)

    return res

  def _scale_clip_and_round(self, x, quantization_scale):
    """Scale, clip, and round x to an integer value in a limited range
    Note that the internal shift is needed for 1-bit quantization to ensure
    that a sign function is used. Otherise, the binary quantizer would have
    three output values"""

    # special shifting needed to compute a sign function.
    shift = self.use_sign_function * 0.5

    clip_min, clip_max = self.get_clip_bounds()

    scaled_x = x / quantization_scale
    clipped_scaled_x = K.clip(scaled_x, clip_min, clip_max)
    # Round through to nearest integer, using straight-through estimator
    # for gradient computations.
    scaled_xq = _round_through(
      clipped_scaled_x - shift,
      use_stochastic_rounding=self.use_stochastic_rounding,
      precision=1.0, # using 1.0 precision so that we round to a nearby integer
    )

    return scaled_xq + shift

  def _get_auto_quantization_scale(self, x):
    """Get quantization_scale, either from self or from input x"""

    # Get the minimum floating point scale that does not clip the max of x
    # This is the quantization scale for alpha="auto"
    quantization_scale = self._get_quantization_scale_from_max_data(x)

    if self.alpha == "auto_po2":
      quantization_scale = self._po2_autoscale(x, quantization_scale)

    # update quantization_scale variable
    # stop_gradient on quantization_scale to ignore dependence on x
    self.quantization_scale = tf.stop_gradient(quantization_scale)

    # very important that return value is a tf.Variable with shape None
    return self.quantization_scale

  def _get_quantization_scale_from_max_data(self, x):
    """Get the minimum floating point scale that does not clip the max
    of x"""

    axis = _get_scaling_axis(self.scale_axis, tf.rank(x))

    clip_min, clip_max = self.get_clip_bounds()
    clip_range = clip_max - clip_min

    # get quantization scale- depends on whether we are keeping negative
    # divide by clip range to ensure that we clip right at the max of x
    if self.keep_negative:
      data_max = K.max(tf.math.abs(x), axis=axis, keepdims=True)
      quantization_scale = (data_max * 2) / clip_range
    else:
      data_max = K.max(x, axis=axis, keepdims=True)
      quantization_scale = data_max / clip_range

    return tf.math.maximum(quantization_scale, K.epsilon())

  def _po2_autoscale(self, x, quantization_scale):
    """Get an approximation of the "best" po2 scale using least squares"""

    # set alpha scale to a near power of two
    quantization_scale = K.pow(
        2.0,
        tf.math.round(K.log(quantization_scale + K.epsilon()) / K.log(2.0)))

    def loop_body(_, quantization_scale):
      """Loop body for least squares autoscaling"""

      scaled_xq = self._scale_clip_and_round(x, quantization_scale)
      new_quantization_scale = _get_least_squares_scale(
          alpha="auto_po2",
          x=x,
          q=scaled_xq,
          scale_axis=self.scale_axis,
      )
      return quantization_scale, new_quantization_scale

    def loop_cond(last_quantization_scale, quantization_scale):
      """Loop condition for least squares autoscaling- stop when the
      scale converges"""

      tensors_not_equal = tf.math.reduce_any(
          tf.not_equal(last_quantization_scale, quantization_scale))
      return tensors_not_equal

    # Need a tensor of the same shape as quantization_scale that
    # does not equal quantization_scale
    dummy_quantization_scale = -tf.ones_like(quantization_scale)

    # For 1-bit quantization, po2 autoscale loop is guaranteed to converge
    # after 1 iteration
    max_iterations = 1 if self.use_sign_function else 5

    _, quantization_scale = tf.while_loop(
        loop_cond,
        loop_body,
        (dummy_quantization_scale, quantization_scale),
        maximum_iterations=max_iterations,
    )

    return quantization_scale

  def _build(self):
    """Build if not done so already"""

    if not self.built:
      self.build(var_name=self.var_name, use_variables=self.use_variables)

  def max(self):
    """Get maximum value that quantized_linear class can represent."""
    _, clip_max = self.get_clip_bounds()
    return clip_max * self.quantization_scale

  def min(self):
    """Get minimum value that quantized_linear class can represent."""
    clip_min, _ = self.get_clip_bounds()
    return clip_min * self.quantization_scale

  def range(self):
    """Returns a list of all values that quantized_linear can represent
    }."""

    if self.use_sign_function:
      return K.cast_to_floatx([self.max(), self.min()])
    else:
      clip_min, clip_max = self.get_clip_bounds()
      clip_max = tf.cast(clip_max, tf.int32)
      clip_min = tf.cast(clip_min, tf.int32)
      pos_array = K.cast_to_floatx(tf.range(clip_max + 1))
      neg_array = K.cast_to_floatx(tf.range(clip_min, 0))

      return self.quantization_scale * tf.concat([pos_array, neg_array], axis=0)

  def __str__(self):

    # Main parameters always printed in string
    flags = [
      str(int(self.bits)),
      str(int(self.integer)),
      str(int(self.symmetric))]
    # Optional parameters only printed if not default
    if not self.keep_negative:
      flags.append("keep_negative=False")
    if self.auto_alpha:
      alpha = "'" + self.alpha + "'"
      flags.append("alpha=" + alpha)
    elif self.alpha is not None:
      alpha = np.array(alpha)
      flags.append("alpha=" + str(alpha))
    if self.use_stochastic_rounding:
      flags.append("use_stochastic_rounding=" +
                   str(int(self.use_stochastic_rounding)))
    return "quantized_linear(" + ",".join(flags) + ")"

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"
      self.symmetric = True

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):

    config = {
        "bits": self.bits,
        "integer": self.integer,
        "symmetric": self.symmetric,
        "alpha": self.alpha,
        "keep_negative": self.keep_negative,
        "use_stochastic_rounding": self.use_stochastic_rounding,
        "qnoise_factor": self.qnoise_factor,
    }
    return config


@quantizer_registry.register_quantizer
class quantized_bits(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Legacy quantizer: Quantizes the number to a number of bits.

  In general, we want to use a quantization function like:

  a = (pow(2,bits) - 1 - 0) / (max(x) - min(x))
  b = -min(x) * a

  in the equation:

  xq = a x + b

  This requires multiplication, which is undesirable. So, we
  enforce weights to be between -1 and 1 (max(x) = 1 and min(x) = -1),
  and separating the sign from the rest of the number as we make this function
  symmetric, thus resulting in the following approximation.

  1) max(x) = +1, min(x) = -1
  2) max(x) = -min(x)

  a = pow(2,bits-1)
  b = 0

  Finally, just remember that to represent the number with sign, the
  largest representation is -pow(2,bits) to pow(2, bits-1)

  Symmetric and keep_negative allow us to generate numbers that are symmetric
  (same number of negative and positive representations), and numbers that
  are positive.

  Note:
    the behavior of quantized_bits is different than Catapult HLS ac_fixed
    or Vivado HLS ap_fixed. For ac_fixed<word_length, integer_lenth, signed>,
    when signed = true, it is equavlent to
    quantized_bits(word_length, integer_length-1, keep_negative=True)

  Attributes:
    bits: number of bits to perform quantization.
    integer: number of bits to the left of the decimal point.
    symmetric: if true, we will have the same number of values for positive
      and negative numbers.
    alpha: a tensor or None, the scaling factor per channel.
      If None, the scaling factor is 1 for all channels.
    keep_negative: if true, we do not clip negative numbers.
    use_stochastic_rounding: if true, we perform stochastic rounding.
    scale_axis: int or List[int] which axis/axes to calculate scale from.
    qnoise_factor: float. a scalar from 0 to 1 that represents the level of
      quantization noise to add. This controls the amount of the quantization
      noise to add to the outputs by changing the weighted sum of
      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.
    var_name: String or None. A variable name shared between the tf.Variables
      created in the build function. If None, it is generated automatically.
    use_ste: Bool. Whether to use "straight-through estimator" (STE) method or
        not.
    use_variables: Bool. Whether to make the quantizer variables to be dynamic
      tf.Variables or not.
    elements_per_scale: if set to an int or List[int], we create multiple scales
      per axis across scale_axis, where 'elements_per_scale' represents the
      number of elements/values associated with every separate scale value.
      It is only supported when using "auto_po2".
    min_po2_exponent: if set while using "auto_po2", it represents the minimum
      allowed power of two exponent.
    max_po2_exponent: if set while using "auto_po2", it represents the maximum
      allowed power of two exponent.
    post_training_scale: if set, it represents the scale value to be used for
      quantization.

  Returns:
    Function that computes fixed-point quantization with bits.
  """

  def __init__(self,
               bits=8,
               integer=0,
               symmetric=0,
               keep_negative=True,
               alpha=None,
               use_stochastic_rounding=False,
               scale_axis=None,
               qnoise_factor=1.0,
               var_name=None,
               use_ste=True,
               use_variables=False,
               elements_per_scale=None,
               min_po2_exponent=None,
               max_po2_exponent=None,
               post_training_scale=None):
    super().__init__()

    self.bits = bits
    self.integer = integer
    self.symmetric = symmetric
    self.keep_negative = keep_negative
    self.alpha = alpha
    self.use_stochastic_rounding = use_stochastic_rounding
    self.post_training_scale = post_training_scale
    # "auto*" |-> symmetric
    if isinstance(self.alpha, six.string_types):
      self.freeze_scale = False
      self.symmetric = True
      if post_training_scale is not None:
        self.scale = np.array(post_training_scale)
        self.freeze_scale = True
    else:
      if post_training_scale is not None:
        raise ValueError(f"alpha={alpha} doesn't support post_training_scale: "
                         f"{post_training_scale}")
      self.scale = None
      # If alpha is not "auto*", then scale is fixed and not trainable.
      self.freeze_scale = True

    self.scale_axis = scale_axis
    self.qnoise_factor = qnoise_factor
    self.use_ste = use_ste
    self.var_name = var_name
    self.use_variables = use_variables
    self.elements_per_scale = elements_per_scale
    self.min_po2_exponent = min_po2_exponent
    self.max_po2_exponent = max_po2_exponent

  def __str__(self):
    # Convert Tensors to printable strings by converting to a numpy array and
    # then using regex to remove brackets when there is only one integer bit
    integer_bits = re.sub(
        r"\[(\d)\]", r"\g<1>",
        str(self.integer.numpy() if isinstance(self.integer, tf.Variable
                                              ) else self.integer))

    flags = [str(self.bits), integer_bits, str(int(self.symmetric))]
    if not self.keep_negative:
      flags.append("keep_negative=False")
    if self.alpha:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.use_stochastic_rounding:
      flags.append("use_stochastic_rounding=" +
                   str(int(self.use_stochastic_rounding)))
    return "quantized_bits(" + ",".join(flags) + ")"

  def __call__(self, x):
    """Computes fixedpoint quantization of x."""
    if not self.built:
      self.build(var_name=self.var_name, use_variables=self.use_variables)

    x = K.cast_to_floatx(x)

    # quantized_bits with "1" bit becomes a binary implementation.
    unsigned_bits = self.bits - self.keep_negative
    # In pow function, use float datatype instead of integer, so that
    # K.pow() results will use float32 instead of int32 as the default datatype.
    # float32 has a much larger value range (2^128) than int32 (2^32), this is
    # particularly important when quantizing very large values, and when integer
    # bits are set much larger than total bits.
    m = K.pow(2.0, K.cast_to_floatx(unsigned_bits))
    m_i = K.pow(2.0, K.cast_to_floatx(self.integer))

    # Verify that "elements_per_scale", "min_po2_exponent",
    # and "max_po2_exponent" are only set when alpha is "auto_po2"
    if self.alpha != "auto_po2":
      assert (
          self.elements_per_scale is None
      ), "elements_per_scale is only supported when using auto_po2"
      assert (
          self.min_po2_exponent is None
      ), "min_po2_exponent is only supported when using auto_po2"
      assert (
          self.max_po2_exponent is None
      ), "max_po2_exponent is only supported when using auto_po2"

    if self.alpha is None:
      scale = 1.0
    elif isinstance(self.alpha, six.string_types):
      # We only deal with the symmetric case right now.
      assert self.symmetric, "Only symmetric quantizers are implemented"
      len_axis = len(x.shape)
      if len_axis > 1:
        axis = _get_scaling_axis(self.scale_axis, len_axis)
      else:
        axis = [0]

      x = x / m_i

      # Using 2's complement, we can represent 2**(bits-1)-1 positive values
      # If we wish to maintain symmetry, we can double 2**(bits-1)-1 to get
      # the total number of possible values we can represent.
      # If symmetry is not enforced, then we can represent (2**bits)-1 values
      # using 2's complement.
      levels = (2**(self.bits-1)-1) * 2 if self.symmetric else (2**self.bits)-1

      if self.freeze_scale:
        # Scale is fixed value. In this case, scale is extracted from the
        # post-training quantizater scale. In order to retrain models with
        # this scale value, we need to divide it by m to make it in the same
        # value scale as x.
        scale = self.scale / m
      else:
        # Calculate the scale.
        scale = (K.max(abs(x), axis=axis, keepdims=True) * 2) / levels

        # If alpha is "auto_po2", then get the "best" po2 scale
        if "po2" in self.alpha:
          scale = K.pow(2.0,
                        tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)))
          for idx in range(5):
            v = tf.floor(tf.abs(x) / scale + 0.5)
            mask = v < levels / 2
            z = tf.sign(x) * tf.where(mask, v, tf.ones_like(v) * levels / 2)
            scale = _get_least_squares_scale(
                alpha="auto_po2", x=x, q=z,
                scale_axis=self.scale_axis,
                elements_per_scale=self.elements_per_scale,
                min_po2_exponent=self.min_po2_exponent,
                max_po2_exponent=self.max_po2_exponent)

        elif self.alpha != "auto":
          # If alpha is "auto", then directly uuse the "best"
          # floating point scale.
          raise ValueError(f"Invalid alpha '{self.alpha}'")

      # Even for trainable scale, we still need to quantize x with the best
      # scale. This extra step is needed to ensure that with the same input
      # and scale, the quantized output is identical between training and
      # inference.
      v = tf.floor(tf.abs(x) / scale + 0.5)
      mask = v < levels / 2
      z = tf.sign(x) * tf.where(mask, v, tf.ones_like(v) * levels / 2)

      # z is an integer number, so we must make the scale * m and z / m
      scale = scale * m

      # we will not use "z" right now because of stochastic_rounding
      # this is still under test.

      # if "new" in self.alpha:
      #  z = z / m
      #  self.scale = scale
      #  return x + tf.stop_gradient(-x + scale * z)
      x = m_i * x
      xq = m_i * z / m
      if not self.freeze_scale:
        self.scale = scale
      xq = scale * xq

      if self.use_ste:
        return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))
      else:
        return (1 - self.qnoise_factor) * x + tf.stop_gradient(
            self.qnoise_factor * xq)

    else:
      scale = self.alpha

    # quantized_bits with "1" bit becomes a binary implementation.
    if unsigned_bits > 0:
      p = x * m / m_i
      xq = m_i * tf.keras.backend.clip(
          _round_through(p, self.use_stochastic_rounding, precision=1.0),
          self.keep_negative  * (-m + self.symmetric), m - 1) / m
    else:
      xq = tf.sign(x)
      xq += (1.0 - tf.abs(xq))
      if not self.keep_negative:
        xq = (xq + 1.0) / 2.0

    self.scale = scale
    xq = scale * xq

    if self.use_ste:
      return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))
    else:
      return (1 - self.qnoise_factor) * x + tf.stop_gradient(
          self.qnoise_factor * xq)

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"
      self.freeze_scale = False
      self.symmetric = True

  def max(self):
    """Get maximum value that quantized_bits class can represent."""
    unsigned_bits = self.bits - self.keep_negative
    if unsigned_bits > 0:
      return max(
          1.0,
          np.array(
              K.pow(2., K.cast(self.integer, dtype="float32")),
              dtype="float32"))
    else:
      return 1.0

  def min(self):
    """Get minimum value that quantized_bits class can represent."""
    if not self.keep_negative:
      return 0.0
    unsigned_bits = self.bits - self.keep_negative
    if unsigned_bits > 0:
      return -max(
          1.0,
          np.array(
              K.pow(2, K.cast(self.integer, dtype="float32")), dtype="float32"))
    else:
      return -1.0

  def range(self):
    """Returns a list of all values that quantized_bits can represent
    ordered by their binary representation ascending."""
    assert self.symmetric == 0
    assert self.keep_negative
    assert self.alpha is None or self.alpha == 1.0

    x = np.asarray(range(2**self.bits), dtype=np.float32)
    p_and_n = np.where(x >= 2**(self.bits - 1),
                       (x - 2**(self.bits - 1)) - 2**(self.bits - 1), x)
    return p_and_n * np.array(
        K.pow(2.0, -self.bits + K.cast(self.integer, dtype="float32") + 1),
        dtype="float32")

  @classmethod
  def from_config(cls, config):
    # Convert JSON-serializable lists back to NumPy arrays.
    if config.get("post_training_scale") is not None:
      config["post_training_scale"] = np.array(config["post_training_scale"])

    return cls(**config)

  def get_config(self):
    config = {
        "bits":
            self.bits,
        "integer":
            self.integer.numpy()
            if isinstance(self.integer, tf.Variable) else self.integer,
        "symmetric":
            self.symmetric,
        "alpha":
            self.alpha,
        "keep_negative":
            self.keep_negative,
        "use_stochastic_rounding":
            self.use_stochastic_rounding,
        "qnoise_factor":
            self.qnoise_factor.numpy() if isinstance(
                self.qnoise_factor, tf.Variable) else self.qnoise_factor,
        "post_training_scale":
            # Since NumPy arrays are not directly JSON-serializable,
            # we convert them to lists.
            (self.post_training_scale.tolist() if self.post_training_scale is
             not None else None)
    }
    return config


@quantizer_registry.register_quantizer
class bernoulli(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes a Bernoulli sample with probability sigmoid(x).

  This computation uses ST approximation.

  To do that, we compute sigmoid(x) and a random sample z ~ U[0,1]. As
  p in [0,1] and z in [0,1], p - z in [-1,1]. However, -1 will
  never appear because to get -1 we would need sigmoid(-inf) - z == 1.
  As a result, the range will be in practical terms [0,1].

  The noise introduced by z can be seen as a regularizer to the weights W of
  y = Wx as y = Wx + Wz for some noise z with mean mu(z) and var(z). As a
  result, W**2 var(z) to the variance of y, which has the same effect as a
  regularizer on L2 with lambda = var(z), as presented in Hinton"s Coursera
  Lecture 9c.

  Remember that E[dL/dy] = E[dL/dx] once we add stochastic sampling.

  Attributes:
    alpha: allows one to specify multiplicative factor for number generation
      of "auto" or "auto_po2".
    temperature: amplifier factor for sigmoid function, making stochastic
      less stochastic as it moves away from 0.
    use_real_sigmoid: use real sigmoid for probability.

  Returns:
    Computation of round with stochastic sampling with straight through
    gradient.
  """

  def __init__(self, alpha=None, temperature=6.0, use_real_sigmoid=True):
    super().__init__()
    self.alpha = alpha
    self.bits = 1
    self.temperature = temperature
    self.use_real_sigmoid = use_real_sigmoid
    self.default_alpha = 1.0
    self.scale = None

  def __str__(self):
    flags = []
    if self.alpha is not None:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.temperature != 6.0:
      flags.append("temperature=" + str(self.temperature))
    if not self.use_real_sigmoid:
      flags.append("use_real_sigmoid=" + str(int(self.use_real_sigmoid)))
    return "bernoulli(" + ",".join(flags) + ")"

  def __call__(self, x):
    if isinstance(self.alpha, six.string_types):
      assert self.alpha in ["auto", "auto_po2"]

    if isinstance(self.alpha, six.string_types):
      len_axis = len(x.shape)

      if len_axis > 1:
        if K.image_data_format() == "channels_last":
          axis = list(range(len_axis - 1))
        else:
          axis = list(range(1, len_axis))
      else:
        axis = [0]

      std = K.std(x, axis=axis, keepdims=True) + K.epsilon()
    else:
      std = 1.0

    if self.use_real_sigmoid:
      p = tf.keras.backend.sigmoid(self.temperature * x / std)
    else:
      p = _sigmoid(self.temperature * x/std)
    r = tf.random.uniform(tf.shape(x))
    q = tf.sign(p - r)
    q += (1.0 - tf.abs(q))
    q = (q + 1.0) / 2.0

    q_non_stochastic = tf.sign(x)
    q_non_stochastic += (1.0 - tf.abs(q_non_stochastic))
    q_non_stochastic = (q_non_stochastic + 1.0) / 2.0

    # if we use non stochastic binary to compute alpha,
    # this function seems to behave better
    scale = _get_least_squares_scale(self.alpha, x, q_non_stochastic)
    self.scale = scale
    return x + tf.stop_gradient(-x + scale * q)

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"

  def max(self):
    """Get the maximum value bernoulli class can represent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return 1.0
    else:
      return max(1.0, self.alpha)

  def min(self):
    """Get the minimum value bernoulli class can represent."""
    return 0.0

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {"alpha": self.alpha}
    return config


@quantizer_registry.register_quantizer
class ternary(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes an activation function returning -alpha, 0 or +alpha.

  Right now we assume two type of behavior. For parameters, we should
  have alpha, threshold and stochastic rounding on. For activations,
  alpha and threshold should be floating point numbers, and stochastic
  rounding should be off.

  Attributes:
    x: tensor to perform sign opertion with stochastic sampling.
    bits: number of bits to perform quantization.
    alpha: ternary is -alpha or +alpha. Alpha can be "auto" or "auto_po2".
    threshold: threshold to apply "dropout" or dead band (0 value). If "auto"
      is specified, we will compute it per output layer.
    use_stochastic_rounding: if true, we perform stochastic rounding.

  Returns:
    Computation of sign within the threshold.
  """

  def __init__(self, alpha=None, threshold=None, use_stochastic_rounding=False,
               number_of_unrolls=5):
    super().__init__()
    self.bits = 2
    self.alpha = alpha
    self.threshold = threshold
    self.use_stochastic_rounding = use_stochastic_rounding
    self.default_alpha = 1.0
    self.default_threshold = 0.33
    self.number_of_unrolls = number_of_unrolls
    self.scale = None

  def __str__(self):
    flags = []
    if self.alpha is not None:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.threshold is not None:
      flags.append("threshold=" + str(self.threshold))
    if self.use_stochastic_rounding:
      flags.append(
          "use_stochastic_rounding=" + str(int(self.use_stochastic_rounding)))
    if self.number_of_unrolls != 5:
      flags.append(
          "number_of_unrolls=" + str(int(self.number_of_unrolls)))
    return "ternary(" + ",".join(flags) + ")"

  def __call__(self, x):
    if isinstance(self.alpha, six.string_types):
      # parameters
      assert self.alpha in ["auto", "auto_po2"]
      assert self.threshold is None
    else:
      # activations
      assert not self.use_stochastic_rounding
      assert not isinstance(self.threshold, six.string_types)

    if self.alpha is None or isinstance(self.alpha, six.string_types):
      scale = 1.0
    elif isinstance(self.alpha, np.ndarray):
      scale = self.alpha
    else:
      scale = float(self.alpha)

    # This is an approximiation from https://arxiv.org/abs/1605.04711
    # We consider channels_last only for now.
    if isinstance(self.alpha, six.string_types):
      # It is for parameters
      # first, compute which asix corresponds to the channels.
      # TODO(b/237833510): support channels_first
      try:
        len_axis = len(x.shape.as_list())
      except AttributeError:
        len_axis = len(list(x.shape))

      if len_axis == 1:
        axis = None
      elif K.image_data_format() == "channels_last":
        axis = list(range(len_axis - 1))
      else:
        axis = list(range(1, len_axis))

      # This approximation is exact if x ~ U[-m, m]. For x ~ N(0, m)
      # we need to iterate a few times before we can coverge
      m = K.max(tf.abs(x), axis=axis, keepdims=True)
      scale = 2 * m / 3.0
      if "po2" in self.alpha:
        scale = K.pow(2.0,
                      tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)))

      for _ in range(self.number_of_unrolls):
        thres = scale / 2.0
        # once we scale the number precision == 0.33 works
        # well for Uniform and Normal distribution of input
        v = scale * _round_through(
            x / scale,
            use_stochastic_rounding=self.use_stochastic_rounding,
            precision=1. / 3.)
        q = K.cast(tf.abs(v) >= thres, K.floatx()) * tf.sign(x)
        scale = _get_least_squares_scale(self.alpha, x, q)
    else:
      if self.threshold is None:
        thres = self.default_threshold
      else:
        thres = self.threshold
      q = K.cast(tf.abs(x) >= thres, K.floatx()) * tf.sign(x)

    # ternary ranges from -1 to +1, so we use tanh(x) to be a differentiable
    # version of that.
    if self.alpha is None:
      x = K.tanh(x)

    self.scale = scale
    return x + tf.stop_gradient(-x + scale * q)

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"

  def max(self):
    """Get the maximum value that ternary can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return 1.0
    else:
      return max(1.0, self.alpha)

  def min(self):
    """Get the minimum value that ternary can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return -1.0
    else:
      return -max(1.0, self.alpha)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "alpha": self.alpha,
        "threshold": self.threshold,
        "use_stochastic_rounding": self.use_stochastic_rounding,
        "number_of_unrolls": self.number_of_unrolls
    }
    return config


@quantizer_registry.register_quantizer
class stochastic_ternary(ternary):  # pylint: disable=invalid-name
  """Computes a stochastic activation function returning -alpha, 0 or +alpha.

  Computes straight-through approximation using random sampling to make
  E[dL/dy] = E[dL/dx], and computing the sign function. See explanation above.

  Attributes:
    x: tensor to perform sign opertion with stochastic sampling.
    bits: number of bits to perform quantization.
    alpha: ternary is -alpha or +alpha, or "auto" or "auto_po2".
    threshold: (1-threshold) specifies the spread of the +1 and -1 values.
    temperature: amplifier factor for sigmoid function, making stochastic less
      stochastic as it moves away from 0.
    use_real_sigmoid: use real sigmoid for probability.
    number_of_unrolls: number of times we iterate between scale and threshold.

  Returns:
    Computation of sign with stochastic sampling with straight through gradient.
  """

  def __init__(
      self,
      alpha=None,
      threshold=None,
      temperature=8.0,
      use_real_sigmoid=True,
      number_of_unrolls=5,
  ):
    super().__init__(
        alpha=alpha, threshold=threshold, number_of_unrolls=number_of_unrolls
    )

    self.bits = 2
    self.alpha = alpha
    self.threshold = threshold
    assert threshold != 1.0
    self.default_alpha = 1.0
    self.default_threshold = 0.33
    self.temperature = temperature
    self.use_real_sigmoid = use_real_sigmoid
    self.number_of_unrolls = number_of_unrolls
    self.scale = None

  def __str__(self):
    flags = []
    if self.alpha is not None:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.threshold is not None:
      flags.append("threshold=" + str(self.threshold))
    if self.temperature != 8.0:
      flags.append("temperature=" + str(self.temperature))
    if not self.use_real_sigmoid:
      flags.append("use_real_sigmoid=0")
    if self.number_of_unrolls != 5:
      flags.append("number_of_unrolls=" + str(self.number_of_unrolls))
    return "stochastic_ternary(" + ",".join(flags) + ")"

  def __call__(self, x):
    def stochastic_output():
      # right now we only accept alpha = "auto" or "auto_po2"

      assert isinstance(self.alpha, six.string_types)
      assert self.alpha in ["auto", "auto_po2"]

      if self.alpha is None:
        scale = self.default_alpha
      elif isinstance(self.alpha, six.string_types):
        scale = 1.0
        assert self.alpha in ["auto", "auto_po2"]
      else:
        assert self.alpha >= 0.0
        scale = float(self.alpha)

      len_axis = len(x.shape)
      if len_axis > 1:
        if K.image_data_format() == "channels_last":
          axis = list(range(len_axis - 1))
        else:
          axis = list(range(1, len_axis))
      else:
        axis = [0]

      x_std = K.std(x, axis=axis, keepdims=True)

      m = K.max(tf.abs(x), axis=axis, keepdims=True)
      scale = 2.0 * m / 3.0
      if self.alpha == "auto_po2":
        scale = K.pow(
            2.0, tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0))
        )
      for _ in range(self.number_of_unrolls):
        T = scale / 2.0
        q_ns = K.cast(tf.abs(x) >= T, K.floatx()) * K.sign(x)
        scale = _get_least_squares_scale(self.alpha, x, q_ns)

      x_norm = x / (x_std + K.epsilon())
      T = scale / (2.0 * (x_std + K.epsilon()))

      if self.use_real_sigmoid:
        p0 = tf.keras.backend.sigmoid(self.temperature * (x_norm - T))
        p1 = tf.keras.backend.sigmoid(self.temperature * (x_norm + T))
      else:
        p0 = _sigmoid(self.temperature * (x_norm - T))
        p1 = _sigmoid(self.temperature * (x_norm + T))
      r0 = tf.random.uniform(tf.shape(p0))
      r1 = tf.random.uniform(tf.shape(p1))
      q0 = tf.sign(p0 - r0)
      q0 += 1.0 - tf.abs(q0)
      q1 = tf.sign(p1 - r1)
      q1 += 1.0 - tf.abs(q1)

      q = (q0 + q1) / 2.0
      self.scale = scale
      return x + tf.stop_gradient(-x + scale * q)

    output = tf_utils.smart_cond(
        K.learning_phase(), stochastic_output, lambda: ternary.__call__(self, x)
    )
    return output

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"

  def max(self):
    """Get the maximum value that stochastic_ternary can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return 1.0
    else:
      return max(1.0, self.alpha)

  def min(self):
    """Get the minimum value that stochastic_ternary can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return -1.0
    else:
      return -max(1.0, self.alpha)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "alpha": self.alpha,
        "threshold": self.threshold,
        "temperature": self.temperature,
        "use_real_sigmoid": self.use_real_sigmoid,
        "number_of_unrolls": self.number_of_unrolls,
    }
    return config


@quantizer_registry.register_quantizer
class binary(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes the sign(x) returning a value between -alpha and alpha.

  Although we cannot guarantee E[dL/dy] = E[dL/dx] if we do not use the
  stochastic sampling, we still use the ST approximation.

  Modified from original binary to match QNN implementation.

  The binary qunatizer supports multiple-scales per tensor where:
  - alpha: It can be set to "auto" or "auto_po2" to enable auto-scaling. "auto"
           allows arbitrary scale while "auto_po2" allows power-of-two scales
           only. It can also be set to a fixed value or None (i.e., no scaling).
  - scale_axis: It determines the axis/axes to calculate the auto-scale at.
  - elements_per_scale: It enables fine-grained scaling where it determines
    the number of elements across scale axis/axes that should be grouped into
    one scale.

  Examples:

  1. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=None,
     elements_per_scale=None --> Number of separate scales = 16

  2. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=1,
     elements_per_scale=None --> Number of separate scales = 8

  3. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=1,
     elements_per_scale=2 --> Number of separate scales = 4

  4. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=[2, 3],
     elements_per_scale=2 --> Number of separate scales = 4*8 = 32

  5. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=[2, 3],
     elements_per_scale=[2, 4] --> Number of separate scales = 4*4 = 16

  Attributes:
    x: tensor to perform sign_through.
    bits: number of bits to perform quantization.
    use_01: if True, return {0,1} instead of {-1,+1}.
    alpha: binary is -alpha or +alpha, or "auto", "auto_po2" to compute
      automatically.
    use_stochastic_rounding: if true, we perform stochastic rounding.
    elements_per_scale: if set to an int or List[int], we create multiple scales
      per axis across scale_axis, where 'elements_per_scale' represents the
      number of elements/values associated with every separate scale value.
    scale_axis: int or List[int] which axis/axes to calculate scale from.
    min_po2_exponent: if set while using "auto_po2", it represents the minimum
      allowed power of two exponent.
    max_po2_exponent: if set while using "auto_po2", it represents the maximum
      allowed power of two exponent.

  Returns:
    Computation of sign operation with straight through gradient.
  """

  def __init__(self, use_01=False, alpha=None, use_stochastic_rounding=False,
               scale_axis=None, elements_per_scale=None, min_po2_exponent=None,
               max_po2_exponent=None):
    super().__init__()
    self.use_01 = use_01
    self.bits = 1
    self.alpha = alpha
    self.use_stochastic_rounding = use_stochastic_rounding
    self.default_alpha = 1.0
    self.scale = None
    self.scale_axis = scale_axis
    self.elements_per_scale = elements_per_scale
    self.min_po2_exponent = min_po2_exponent
    self.max_po2_exponent = max_po2_exponent

  def __str__(self):
    def list_to_str(l):
      return ",".join([str(x) for x in l])

    flags = []
    if self.use_01:
      flags.append("use_01=" + str(int(self.use_01)))
    if self.alpha is not None:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.elements_per_scale is not None:
      if isinstance(self.elements_per_scale, list):
        flags.append("elements_per_scale=[" +
                     list_to_str(self.elements_per_scale) + "]")
      else:
        flags.append("elements_per_scale=" + str(self.elements_per_scale))
    if self.scale_axis is not None:
      if isinstance(self.scale_axis, list):
        flags.append("scale_axis=[" + list_to_str(self.scale_axis) + "]")
      else:
        flags.append("scale_axis=" + str(self.scale_axis))
    if self.min_po2_exponent is not None:
      flags.append("min_po2_exponent=" + str(self.min_po2_exponent))
    if self.max_po2_exponent is not None:
      flags.append("max_po2_exponent=" + str(self.max_po2_exponent))
    if self.use_stochastic_rounding:
      flags.append(
          "use_stochastic_rounding=" + str(self.use_stochastic_rounding))
    return "binary(" + ",".join(flags) + ")"

  def __call__(self, x):
    if isinstance(self.alpha, six.string_types):
      assert self.alpha in ["auto", "auto_po2"]
    if self.alpha is None:
      scale = self.default_alpha
    elif isinstance(self.alpha, six.string_types):
      scale = 1.0
    elif isinstance(self.alpha, np.ndarray):
      scale = self.alpha
    else:
      scale = float(self.alpha)

    if self.use_stochastic_rounding:
      try:
        len_axis = len(x.shape.as_list())
      except AttributeError:
        len_axis = len(list(x.shape))
      if len_axis == 1:
        axis = None
      elif K.image_data_format() == "channels_last":
        axis = list(range(len_axis - 1))
      else:
        axis = list(range(1, len_axis))

      # if stochastic_round is through, we need to scale
      # number so that the precision is small enough.
      # This is especially important if range of x is very
      # small, which occurs during initialization of weights.
      m = K.max(tf.abs(x), axis=axis, keepdims=True)
      m = tf.where(m > 1.0, tf.ones_like(m), m)
      f = 2 * m

      x = tf_utils.smart_cond(
          K.learning_phase(),
          lambda: f * _round_through(
              x / f, use_stochastic_rounding=True, precision=0.125),
          lambda: x)

    k_sign = tf.sign(x)
    if self.use_stochastic_rounding:
      # in inference, we use a biased "1" for stochastic rounding right now
      k_sign += (1.0 - tf.abs(k_sign)) * tf_utils.smart_cond(
          K.learning_phase(),
          lambda: 2.0 * tf.round(tf.random.uniform(tf.shape(x))) - 1.0,
          lambda: tf.ones_like(tf.shape(x), dtype=K.floatx()))
      # if something still remains, just make it positive for now.
    k_sign += (1.0 - tf.abs(k_sign))
    if self.use_01:
      k_sign = (k_sign + 1.0) / 2.0

    # approximate binary by tanh(x) as it has limited range between -1 and +1.
    if self.alpha is None:
      x = K.tanh(x)

    self.scale = _get_least_squares_scale(
        self.alpha,
        x,
        k_sign,
        elements_per_scale=self.elements_per_scale,
        scale_axis=self.scale_axis,
        min_po2_exponent=self.min_po2_exponent,
        max_po2_exponent=self.max_po2_exponent,
    )
    return x + tf.stop_gradient(-x + self.scale * k_sign)

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"

  def max(self):
    """Get maximum value that binary class can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return 1.0
    else:
      return max(1.0, self.alpha)

  def min(self):
    """Get minimum value that binary class can respresent."""
    if self.use_01:
      return 0.0
    elif self.alpha is None or isinstance(self.alpha, six.string_types):
      return -1.0
    else:
      return -max(1.0, self.alpha)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "use_01": self.use_01,
        "alpha": self.alpha,
        "use_stochastic_rounding": self.use_stochastic_rounding
    }
    return config


@quantizer_registry.register_quantizer
class stochastic_binary(binary):  # pylint: disable=invalid-name
  """Computes a stochastic activation function returning -alpha or +alpha.

  Computes straight-through approximation using random sampling to make
  E[dL/dy] = E[dL/dx], and computing the sign function. See explanation above.

  Attributes:
    x: tensor to perform sign opertion with stochastic sampling.
    alpha: binary is -alpha or +alpha, or "auto" or "auto_po2".
    bits: number of bits to perform quantization.
    temperature: amplifier factor for sigmoid function, making stochastic
      behavior less stochastic as it moves away from 0.
    use_real_sigmoid: use real sigmoid from tensorflow for probablity.

  Returns:
    Computation of sign with stochastic sampling with straight through gradient.
  """

  def __init__(self, alpha=None, temperature=6.0, use_real_sigmoid=True):
    super().__init__(alpha=alpha)
    self.alpha = alpha
    self.bits = 1
    self.temperature = temperature
    self.use_real_sigmoid = use_real_sigmoid
    self.default_alpha = 1.0
    self.scale = None

  def __str__(self):
    flags = []
    if self.alpha is not None:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.temperature != 6.0:
      flags.append("temperature=" + str(self.temperature))
    if not self.use_real_sigmoid:
      flags.append("use_real_sigmoid=" + str(int(self.use_real_sigmoid)))
    return "stochastic_binary(" + ",".join(flags) + ")"

  def __call__(self, x):
    def stochastic_output():
      if isinstance(self.alpha, six.string_types):
        assert self.alpha in ["auto", "auto_po2"]
        len_axis = len(x.shape)
        if len_axis > 1:
          if K.image_data_format() == "channels_last":
            axis = list(range(len_axis - 1))
          else:
            axis = list(range(1, len_axis))
        else:
          axis = [0]
        std = K.std(x, axis=axis, keepdims=True) + K.epsilon()
      else:
        std = 1.0

      if self.use_real_sigmoid:
        p = tf.keras.backend.sigmoid(self.temperature * x / std)
      else:
        p = _sigmoid(self.temperature * x / std)

      r = tf.random.uniform(tf.shape(x))
      q = tf.sign(p - r)
      q += 1.0 - tf.abs(q)
      q_non_stochastic = tf.sign(x)
      q_non_stochastic += 1.0 - tf.abs(q_non_stochastic)
      scale = _get_least_squares_scale(self.alpha, x, q_non_stochastic)
      self.scale = scale
      return x + tf.stop_gradient(-x + scale * q)

    output = tf_utils.smart_cond(
        K.learning_phase(), stochastic_output, lambda: binary.__call__(self, x)
    )
    return output

  def _set_trainable_parameter(self):
    if self.alpha is None:
      self.alpha = "auto_po2"

  def max(self):
    """Get the maximum value that stochastic_binary can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return 1.0
    else:
      return max(1.0, self.alpha)

  def min(self):
    """Get the minimum value that stochastic_binary can respresent."""
    if self.alpha is None or isinstance(self.alpha, six.string_types):
      return -1.0
    else:
      return -max(1.0, self.alpha)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "alpha": self.alpha,
        "temperature": self.temperature,
        "use_real_sigmoid": self.use_real_sigmoid,
    }
    return config


@tf.function(jit_compile=True)
def fast_relu_quantize(p, m_i, factor):
  return m_i * tf.clip_by_value(tf.round(p) * factor, 0.0, 1.0 - factor)


@quantizer_registry.register_quantizer
class quantized_relu(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes a quantized relu to a number of bits.

  Modified from:

  [https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow]

  Assume h(x) = +1 with p = sigmoid(x), -1 otherwise, the expected value of
  h(x) is:

  E[h(x)] = +1 P(p <= sigmoid(x)) - 1 P(p > sigmoid(x))
          = +1 P(p <= sigmoid(x)) - 1 ( 1 - P(p <= sigmoid(x)) )
          = 2 P(p <= sigmoid(x)) - 1
          = 2 sigmoid(x) - 1, if p is sampled from a uniform distribution U[0,1]

  If use_sigmoid is 0, we just keep the positive numbers up to
  2**integer * (1 - 2**(-bits)) instead of normalizing them, which is easier
  to implement in hardware.

  Attributes:
    bits: number of bits to perform quantization.
    integer: number of bits to the left of the decimal point.
    use_sigmoid: if true, we apply sigmoid to input to normalize it.
    negative_slope: slope when activation < 0, needs to be power of 2.
    use_stochastic_rounding: if true, we perform stochastic rounding.
    relu_upper_bound: A float representing an upper bound of the unquantized
      relu. If None, we apply relu without the upper bound when
      "is_quantized_clip" is set to false (true by default).
      Note: The quantized relu uses the quantization parameters (bits and
      integer) to upper bound. So it is important to set relu_upper_bound
      appropriately to the quantization parameters. "is_quantized_clip"
      has precedence over "relu_upper_bound" for backward compatibility.
    is_quantized_clip: A boolean representing whether the inputs are clipped to
      the maximum value represented by the quantization parameters. This
      parameter is deprecated, and the default is set to True for backwards
      compatibility. Users are encouraged to use "relu_upper_bound" instead.
    qnoise_factor: float. a scalar from 0 to 1 that represents the level of
      quantization noise to add. This controls the amount of the quantization
      noise to add to the outputs by changing the weighted sum of
      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.
    var_name: String or None. A variable name shared between the tf.Variables
      created in the build function. If None, it is generated automatically.
    use_ste: Bool. Whether to use "straight-through estimator" (STE) method or
        not.
    use_variables: Bool. Whether to make the quantizer variables to be dynamic
      tf.Variables or not.

  Returns:
    Function that performs relu + quantization to bits >= 0.
  """

  def __init__(self,
               bits=8,
               integer=0,
               use_sigmoid=0,
               negative_slope=0.0,
               use_stochastic_rounding=False,
               relu_upper_bound=None,
               is_quantized_clip=True,
               qnoise_factor=1.0,
               var_name=None,
               use_ste=True,
               use_variables=False,
               enable_fast_inference=False):
    super().__init__()
    self.bits = bits
    self.integer = integer
    self.use_sigmoid = use_sigmoid
    self.negative_slope = negative_slope
    self.use_stochastic_rounding = use_stochastic_rounding
    self.relu_upper_bound = relu_upper_bound
    self.is_quantized_clip = is_quantized_clip
    self.qnoise_factor = qnoise_factor
    self.use_ste = use_ste
    assert negative_slope >= 0.0
    if negative_slope != 0.0:
      assert np.mod(np.log2(negative_slope), 1) == 0
    self.var_name = var_name
    self.use_variables = use_variables
    self.enable_fast_inference = enable_fast_inference

  def __str__(self):
    # Converts Tensors to printable strings by converting to a numpy array and
    # then using regex to remove brackets when there is only one integer bit
    integer_bits = re.sub(
        r"\[(\d)\]", r"\g<1>",
        str(self.integer.numpy() if isinstance(self.integer, tf.Variable
                                              ) else self.integer))

    flags = [str(self.bits), integer_bits]
    if self.use_sigmoid or self.use_stochastic_rounding:
      flags.append(str(int(self.use_sigmoid)))
    if self.negative_slope:
      flags.append(str(self.negative_slope))
    if self.use_stochastic_rounding:
      flags.append(str(int(self.use_stochastic_rounding)))
    return "quantized_relu(" + ",".join(flags) + ")"

  def __call__(self, x):
    if self.enable_fast_inference:
      # This is the fast inference version of the quantizer.
      m_i = 1 << self.integer
      p = x * (2 ** (self.bits - self.integer))
      factor = 2 ** -self.bits
      return fast_relu_quantize(p, m_i, factor)

    if not self.built:
      self.build(var_name=self.var_name, use_variables=self.use_variables)

    non_sign_bits = self.bits - (self.negative_slope != 0.0)
    x = K.cast(x, dtype="float32")
    m = K.cast(K.pow(2, non_sign_bits), dtype="float32")
    m_i = K.cast(K.pow(2, self.integer), dtype="float32")

    # is_quantized_clip has precedence over relu_upper_bound for backward
    # compatibility.
    m_f = K.cast(
        K.pow(
            tf.constant(2., tf.float32),
            K.cast(self.integer, dtype="float32") - non_sign_bits),
        dtype="float32")
    if self.is_quantized_clip:
      x_u = tf.where(x <= m_i - m_f, K.relu(x, alpha=self.negative_slope),
                     tf.ones_like(x) * (m_i - m_f))
    elif self.relu_upper_bound is not None:
      x_u = tf.where(x <= self.relu_upper_bound,
                     K.relu(x, alpha=self.negative_slope),
                     tf.ones_like(x) * self.relu_upper_bound)
    else:
      x_u = K.relu(x, alpha=self.negative_slope)

    if self.use_sigmoid:
      p = _sigmoid(x / m_i) * m
      xq = m_i * tf.keras.backend.clip(
          2.0 * (_round_through(p, self.use_stochastic_rounding) / m) - 1.0,
          0.0, 1.0 - 1.0 / m)
      if self.negative_slope > 0:
        neg_factor = 1 / (self.negative_slope * m)
        xq = xq + m_i * self.negative_slope * tf.keras.backend.clip(
            2.0 * (_round_through(p * self.negative_slope,
                                  self.use_stochastic_rounding) * neg_factor) -
            1.0, -1.0, 0.0)
    else:
      p = x * m / m_i
      xq = m_i * tf.keras.backend.clip(
          _round_through(p, self.use_stochastic_rounding) / m, 0.0,
          1.0 - 1.0 / m)
      if self.negative_slope > 0:
        neg_factor = 1 / (self.negative_slope * m)
        xq = xq + m_i * self.negative_slope * (
            tf.keras.backend.clip(
                _round_through(p * self.negative_slope,
                               self.use_stochastic_rounding) * neg_factor, -1.0,
                0.0))

    if self.relu_upper_bound and not self.is_quantized_clip:
      xq = tf.where(xq <= self.relu_upper_bound, xq,
                    tf.ones_like(xq) * self.relu_upper_bound)

    if self.use_ste:
      return x_u + tf.stop_gradient(self.qnoise_factor * (-x_u + xq))
    else:
      return (1 - self.qnoise_factor) * x_u + tf.stop_gradient(
          self.qnoise_factor * xq)

  def max(self):
    """Get the maximum value that quantized_relu can represent."""
    unsigned_bits = self.bits - (self.negative_slope != 0.0)

    if unsigned_bits > 0:
      return max(
          1.0,
          np.array(
              K.pow(2.0, K.cast(self.integer, dtype="float32")),
              dtype="float32"))
    else:
      return 1.0

  def min(self):
    """Get the minimum value that quantized_relu can represent."""
    if self.negative_slope == 0.0:
      return 0.0

    unsigned_bits = self.bits - 1
    if unsigned_bits > 0:
      return min(
          -0.0, -self.negative_slope * np.array(
              K.pow(2.0, K.cast(self.integer, dtype="float32")),
              dtype="float32"))
    else:
      return -1.0

  def range(self):
    """Returns a list of all values that quantized_relu can represent

      ordered by their binary representation ascending.
    """
    assert self.use_sigmoid == 0  # current unsupported
    assert self.negative_slope == 0  # # unsupported unsupported
    x = np.asarray(range(2**self.bits))
    return x * np.array(
        K.pow(2.0, -self.bits + K.cast(self.integer, dtype="float32")),
        dtype="float32")

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "bits":
            self.bits,
        "integer":
            self.integer.numpy() if isinstance(self.integer, tf.Variable) else
            self.integer,
        "use_sigmoid":
            self.use_sigmoid,
        "negative_slope":
            self.negative_slope,
        "use_stochastic_rounding":
            self.use_stochastic_rounding,
        "relu_upper_bound":
            self.relu_upper_bound,
        "qnoise_factor":
            self.qnoise_factor.numpy() if isinstance(
                self.qnoise_factor, tf.Variable) else self.qnoise_factor
    }
    return config


@quantizer_registry.register_quantizer
class quantized_ulaw(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes a u-law quantization.

  Attributes:
    bits: number of bits to perform quantization.
    integer: number of bits to the left of the decimal point.
    symmetric: if true, we will have the same number of values for positive
      and negative numbers.
    u: parameter of u-law

  Returns:
    Function that performs ulaw + quantization to bits in the range -1.0 to 1.0.
  """

  def __init__(self, bits=8, integer=0, symmetric=0, u=255.0):
    super().__init__()
    self.bits = bits
    self.integer = integer
    self.symmetric = symmetric
    self.u = u

  def __str__(self):
    flags = [str(self.bits), str(self.integer)]
    if self.symmetric or self.u != 255.0:
      flags.append(str(int(self.symmetric)))
    if self.u != 255.0:
      flags.append(str(self.u))
    return "quantized_ulaw(" + ",".join(flags) + ")"

  def __call__(self, x):
    non_sign_bits = self.bits - 1
    m = pow(2, non_sign_bits)
    m_i = pow(2, self.integer)
    p = _sigmoid(x / m_i) * m
    rp = 2.0 * (_round_through(p) / m) - 1.0
    u_law_p = tf.sign(rp) * tf.keras.backend.log(
        1 + self.u * tf.abs(rp)) / tf.keras.backend.log(1 + self.u)
    xq = m_i * tf.keras.backend.clip(u_law_p, -1.0 +
                                     (1.0 * self.symmetric) / m, 1.0 - 1.0 / m)
    return xq

  def max(self):
    """Get the maximum value that quantized_ulaw can represent."""
    unsigned_bits = self.bits - 1

    if unsigned_bits > 0:
      return max(1.0, np.power(2.0, self.integer))
    else:
      return 1.0

  def min(self):
    """Get the minimum value that quantized_ulaw can represent."""
    unsigned_bits = self.bits - 1

    if unsigned_bits > 0:
      return -max(1.0, np.power(2.0, self.integer))
    else:
      return -1.0

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "bits": self.bits,
        "integer": self.integer,
        "symmetric": self.symmetric,
        "u": self.u
    }
    return config


@quantizer_registry.register_quantizer
class quantized_tanh(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes a quantized tanh to a number of bits.

  Modified from:

  [https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow]

  Attributes:
    bits: number of bits to perform quantization.
    use_stochastic_rounding: if true, we perform stochastic rounding.
    symmetric: if true, we will have the same number of values for positive
      and negative numbers.
    use_real_tanh: if true, use the tanh function from Keras backend,
      if false, use tanh that is defined as 2 * sigmoid(x) - 1

  Returns:
    Function that performs tanh + quantization to bits in the range -1.0 to 1.0.
  """

  def __init__(self, bits=8, use_stochastic_rounding=False,
               symmetric=False, use_real_tanh=False):
    super().__init__()
    self.bits = bits
    self.symmetric = symmetric
    self.use_stochastic_rounding = use_stochastic_rounding
    self.use_real_tanh = use_real_tanh

  def __str__(self):
    flags = [str(self.bits)]
    if self.use_stochastic_rounding:
      flags.append(str(int(self.use_stochastic_rounding)))
    if self.symmetric:
      flags.append(str(int(self.symmetric)))
    if self.use_real_tanh:
      flags.append(str(int(self.use_real_tanh)))
    return "quantized_tanh(" + ",".join(flags) + ")"

  def __call__(self, x):
    non_sign_bits = self.bits - 1
    x = K.cast_to_floatx(x)
    m = K.cast_to_floatx(K.pow(2, non_sign_bits))
    p = K.tanh(x) if self.use_real_tanh else 2.0 * _sigmoid(x) - 1.0
    return tf.keras.backend.clip(
                                 (_round_through(p * m, self.use_stochastic_rounding) / m),
                                 -1.0 + (1.0 * self.symmetric) / m,
                                 1.0 - 1.0 / m)

  def max(self):
    """Get the maximum value that quantized_tanh can represent."""
    return 1.0 - 1.0 / pow(2, self.bits - 1)

  def min(self):
    """Get the minimum value that quantized_tanh can represent."""
    return -1.0 + (1.0 * self.symmetric) / pow(2, self.bits - 1)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "bits": self.bits,
        "symmetric": self.symmetric,
        "use_stochastic_rounding": self.use_stochastic_rounding,
        "use_real_tanh": self.use_real_tanh
    }
    return config


@quantizer_registry.register_quantizer
class quantized_sigmoid(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Computes a quantized sigmoid to a number of bits.

  Attributes:
    bits: number of bits to perform quantization.
    symmetric: if true, we will have the same number of values for positive
      and negative numbers.
    use_real_sigmoid: if true, will use the sigmoid from Keras backend
    use_stochastic_rounding: if true, we perform stochastic rounding.

  Returns:
    Function that performs sigmoid + quantization to bits in the range 0.0 to 1.0.
  """

  def __init__(self, bits=8, symmetric=False,
               use_real_sigmoid=False,
               use_stochastic_rounding=False):
    super().__init__()
    self.bits = bits
    self.symmetric = symmetric
    self.use_real_sigmoid = use_real_sigmoid
    self.use_stochastic_rounding = use_stochastic_rounding

  def __str__(self):
    flags = [str(self.bits)]
    if self.symmetric:
      flags.append(str(int(self.symmetric)))
    if self.use_real_sigmoid:
      flags.append(str(int(self.use_real_sigmoid)))
    if self.use_stochastic_rounding:
      flags.append(str(int(self.use_stochastic_rounding)))
    return "quantized_sigmoid(" + ",".join(flags) + ")"

  def __call__(self, x):
    x = K.cast_to_floatx(x)
    m = K.cast_to_floatx(K.pow(2, self.bits))

    p = K.sigmoid(x) if self.use_real_sigmoid else _sigmoid(x)

    return tf.keras.backend.clip((_round_through(p*m, self.use_stochastic_rounding) / m),
                                 (1.0 * self.symmetric) / m,
                                 1.0 - 1.0 / m)

  def max(self):
    """Get the maximum value that quantized_sigmoid can represent."""
    return 1.0 - 1.0 / pow(2, self.bits)

  def min(self):
    """Get the minimum value that quantized_sigmoid can represent."""
    return (1.0 * self.symmetric) / pow(2, self.bits)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    config = {
        "bits": self.bits,
        "symmetric": self.symmetric,
        "use_real_sigmoid": self.use_real_sigmoid,
        "use_stochastic_rounding": self.use_stochastic_rounding
    }
    return config


def _clip_power_of_two(x_abs,
                       min_exp,
                       max_exp,
                       max_value,
                       quadratic_approximation=False,
                       use_stochastic_rounding=False,
                       log2_rounding="rnd"):
  """Clips a tensor using power-of-two quantizer.


  Args:
    x_abs: A tensor object. Its elements should be non-negative.
    min_exp: An integer representing the smallest exponent.
    max_exp: An integer representing the largest exponent.
    max_value: A float or None. If it is None, we clip the value to max_value.
    quadratic_approximation: An boolean representing whether the quadratic
      approximation is applied.
    use_stochastic_rounding: An boolean representing whether the stochastic
      rounding method is applied.
    log2_rounding: log2 rounding mode. "rnd" and "floor" currently
      supported, corresponding to tf.round and tf.floor respectively.

  Returns:
    A tensor object, the values are clipped by min_exp and max_exp.
  """

  # if quadratic_approximation is True, round to the exponent for sqrt(x),
  # so that the return value can be divided by two without remainder.
  log2 = np.log(2.0)

  # When the elements of x_abs are small than the keras epsilon,
  # we just overwrite x_abs with eps
  eps = tf.keras.backend.epsilon()
  x_filter = tf.where(x_abs < eps, eps, x_abs)
  if max_value is not None:
    # If the elements of x_filter has value larger than x_value, clip it.
    x_filter = tf.where(x_filter >= max_value,
                        tf.ones_like(x_filter) * max_value, x_filter)

  def power_of_two_clip(x_abs, min_exp, max_exp, quadratic_approximation,
                        use_stochastic_rounding, log2_rounding):
    assert log2_rounding in ["rnd", "floor"]

    if quadratic_approximation:
      q_factor = 2.0
      x_input = tf.sqrt(x_abs)
    else:
      q_factor = 1.0
      x_input = x_abs

    if log2_rounding == "floor":
      x_log2 = _floor_through(tf.keras.backend.log(x_input) / log2)
    elif use_stochastic_rounding:
      x_log2 = tf_utils.smart_cond(
          K.learning_phase(),
          lambda: stochastic_round_po2(x_input),
          lambda: _round_through(tf.keras.backend.log(x_input) / log2))
    else:
      x_log2 = _round_through(tf.keras.backend.log(x_input) / log2)

    x_clipped = q_factor * tf.keras.backend.clip(x_log2, min_exp, max_exp)
    return x_clipped

  x_clipped = tf.where(
      x_abs < eps,
      tf.ones_like(x_abs) * min_exp,
      power_of_two_clip(x_filter, min_exp, max_exp, quadratic_approximation,
                        use_stochastic_rounding, log2_rounding))

  return x_clipped


def _need_exponent_sign_bit_check(max_value):
  """Checks whether the sign bit of exponent is needed.

  This is used by quantized_po2 and quantized_relu_po2.

  Args:
    max_value: the maximum value allowed.

  Returns:
    An integer. 1: sign_bit is needed. 0: sign_bit is not needed.
  """

  if max_value is not None:
    if max_value < 0:
      raise ValueError("po2 max_value should be non-negative.")
    if max_value > 1:
      # if max_value is larger than 1,
      #   the exponent could be positive and negative.
      #   e.g., log(max_value) > 0 when max_value > 1
      need_exponent_sign_bit = 1
    else:
      need_exponent_sign_bit = 0
  else:
    # max_value is not specified, so we cannot decide the range.
    # Then we need to put sign_bit for exponent to be safe
    need_exponent_sign_bit = 1
  return need_exponent_sign_bit


def _get_min_max_exponents(non_sign_bits, need_exponent_sign_bit,
                           quadratic_approximation):
  """Given a bitwidth, gets min and max exponents that it can represent.

  Args:
    non_sign_bits: An integer representing the bitwidth of the exponent.
    need_exponent_sign_bit: An integer representing whether it needs sign bit
      in exponent. (1: need sign bit. 0: sign bit is not needed.)
    quadratic_approximation: A boolean representing whether the quadratic
      approximiation method is enforced.

  Returns:
    A tuple of integers: min_exp, max_exp
  """
  effect_bits = non_sign_bits - need_exponent_sign_bit
  min_exp = -2**(effect_bits)
  max_exp = 2**(effect_bits) - 1
  if quadratic_approximation:
    max_exp = 2 * (max_exp // 2)
  return min_exp, max_exp


@quantizer_registry.register_quantizer
class quantized_po2(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Quantizes to the closest power of 2.

  Attributes:
    bits: An integer, the bits allocated for the exponent, its sign and the sign
      of x.
    max_value: An float or None. If None, no max_value is specified.
      Otherwise, the maximum value of quantized_po2 <= max_value
    use_stochastic_rounding: A boolean, default is False, if True, it uses
      stochastic rounding and forces the mean of x to be x statstically.
    quadratic_approximation: A boolean, default is False if True, it forces the
      exponent to be even number that closted to x.
    log2_rounding: A string, log2 rounding mode. "rnd" and "floor" currently
      supported, corresponding to tf.round and tf.floor respectively.
    qnoise_factor: float. a scalar from 0 to 1 that represents the level of
      quantization noise to add. This controls the amount of the quantization
      noise to add to the outputs by changing the weighted sum of
      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.
    var_name: String or None. A variable name shared between the tf.Variables
      created in the build function. If None, it is generated automatically.
    use_ste: Bool. Whether to use "straight-through estimator" (STE) method or
        not.
    use_variables: Bool. Whether to make the quantizer variables to be dynamic
      tf.Variables or not.
  """

  def __init__(self,
               bits=8,
               max_value=None,
               use_stochastic_rounding=False,
               quadratic_approximation=False,
               log2_rounding="rnd",
               qnoise_factor=1.0,
               var_name=None,
               use_ste=True,
               use_variables=False):
    super().__init__()
    self.bits = bits
    self.max_value = max_value
    self.use_stochastic_rounding = use_stochastic_rounding
    self.log2_rounding = log2_rounding
    # if True, round to the exponent for sqrt(x),
    # so that the return value can be divided by two without remainder.
    self.quadratic_approximation = quadratic_approximation
    need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value)
    non_sign_bits = self.bits - 1
    self._min_exp, self._max_exp = _get_min_max_exponents(
        non_sign_bits, need_exponent_sign_bit, self.quadratic_approximation)
    # qnoise_factor related attributes
    self.qnoise_factor = qnoise_factor
    self.use_ste = use_ste
    self.var_name = var_name
    self.use_variables = use_variables

  def __str__(self):
    flags = [str(self.bits)]
    if self.max_value is not None or self.use_stochastic_rounding:
      flags.append(str(int(self.max_value)))
    if self.use_stochastic_rounding:
      flags.append(str(int(self.use_stochastic_rounding)))
    if self.quadratic_approximation:
      flags.append(
          "quadratic_approximation=" + str(int(self.quadratic_approximation)))
    return "quantized_po2(" + ",".join(flags) + ")"

  def __call__(self, x):
    if not self.built:
      self.build(var_name=self.var_name, use_variables=self.use_variables)

    x_sign = tf.sign(x)
    x_sign += (1.0 - tf.abs(x_sign))
    x_abs = tf.abs(x)
    x_clipped = _clip_power_of_two(x_abs, self._min_exp, self._max_exp,
                                   self.max_value,
                                   self.quadratic_approximation,
                                   self.use_stochastic_rounding,
                                   self.log2_rounding)
    xq = x_sign * pow(2.0, x_clipped)

    if self.use_ste:
      return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))
    else:
      return (1 - self.qnoise_factor) * x + tf.stop_gradient(
          self.qnoise_factor * xq)

  def max(self):
    """Get the maximum value that quantized_po2 can represent."""
    if self.max_value:
      return max(1.0, self.max_value)
    else:
      return max(1.0, 2**self._max_exp)

  def min(self):
    """Get the minimum value that quantized_po2 can represent."""
    if self.max_value:
      return -max(1.0, self.max_value)
    else:
      return -max(1.0, 2**self._max_exp)

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    """Gets configugration of the quantizer.

    Returns:
      A dict mapping quantization configuration, including
        bits: bitwidth for exponents.
        max_value: the maximum value of this quantized_po2 can represent.
        use_stochastic_rounding:
          if True, stochastic rounding is used.
        quadratic_approximation:
          if True, the exponent is enforced to be even number, which is
          the closest one to x.
        log2_rounding:
          A string, Log2 rounding mode
    """
    config = {
        "bits":
            self.bits,
        "max_value":
            self.max_value,
        "use_stochastic_rounding":
            self.use_stochastic_rounding,
        "quadratic_approximation":
            self.quadratic_approximation,
        "qnoise_factor":
            self.qnoise_factor.numpy() if isinstance(
                self.qnoise_factor, tf.Variable) else self.qnoise_factor,
        "log2_rounding":
            self.log2_rounding
    }
    return config


@quantizer_registry.register_quantizer
class quantized_relu_po2(base_quantizer.BaseQuantizer):  # pylint: disable=invalid-name
  """Quantizes x to the closest power of 2 when x > 0

  Attributes:
    bits: An integer, the bits allocated for the exponent and its sign.
    max_value: default is None, or a non-negative value to put a constraint for
      the max value.
    negative_slope: slope when activation < 0, needs to be power of 2.
    use_stochastic_rounding: A boolean, default is False, if True, it uses
      stochastic rounding and forces the mean of x to be x statstically.
    quadratic_approximation: A boolean, default is False if True, it forces the
      exponent to be even number that is closest to x.
    log2_rounding: A string, log2 rounding mode. "rnd" and "floor" currently
      supported, corresponding to tf.round and tf.floor respectively.
    qnoise_factor: float. a scalar from 0 to 1 that represents the level of
      quantization noise to add. This controls the amount of the quantization
      noise to add to the outputs by changing the weighted sum of
      (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.
    var_name: String or None. A variable name shared between the tf.Variables
      created in the build function. If None, it is generated automatically.
    use_ste: Bool. Whether to use "straight-through estimator" (STE) method or
        not.
    use_variables: Bool. Whether to make the quantizer variables to be dynamic
      tf.Variables or not.
  """

  def __init__(self,
               bits=8,
               max_value=None,
               negative_slope=0,
               use_stochastic_rounding=False,
               quadratic_approximation=False,
               log2_rounding="rnd",
               qnoise_factor=1.0,
               var_name=None,
               use_ste=True,
               use_variables=False):
    super().__init__()
    self.bits = bits
    self.max_value = max_value
    self.negative_slope = negative_slope
    self.use_stochastic_rounding = use_stochastic_rounding
    self.log2_rounding = log2_rounding
    # if True, round to the exponent for sqrt(x),
    # so that the return value can be divided by two without remainder.
    self.quadratic_approximation = quadratic_approximation
    need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value)
    self._min_exp = -2**(self.bits - need_exponent_sign_bit)
    self._max_exp = 2**(self.bits - need_exponent_sign_bit) - 1
    if self.quadratic_approximation:
      self._max_exp = 2 * (self._max_exp // 2)

    assert negative_slope >= 0.0
    if negative_slope != 0:
      assert np.mod(np.log2(negative_slope), 1) == 0
    # qnoise_factor related attributes
    self.qnoise_factor = qnoise_factor
    self.use_ste = use_ste
    self.var_name = var_name
    self.use_variables = use_variables

  def __str__(self):
    flags = [str(self.bits)]
    if self.max_value is not None or self.use_stochastic_rounding:
      flags.append(str(int(self.max_value)))
    if self.negative_slope:
      flags.append(str(self.negative_slope))
    if self.use_stochastic_rounding:
      flags.append(str(int(self.use_stochastic_rounding)))
    if self.quadratic_approximation:
      flags.append(
          "quadratic_approximation=" + str(int(self.quadratic_approximation)))
    return "quantized_relu_po2(" + ",".join(flags) + ")"

  def __call__(self, x):
    if not self.built:
      self.build(var_name=self.var_name, use_variables=self.use_variables)

    x_original = x

    if self.max_value is None:
      x = K.relu(x, self.negative_slope)
    else:
      x = tf.where(
          x <= self.max_value,
          K.relu(x, self.negative_slope),
          tf.ones_like(x) * self.max_value)

    x_pos_clipped = _clip_power_of_two(
        K.relu(x_original),
        self._min_exp, self._max_exp,
        self.max_value,
        self.quadratic_approximation,
        self.use_stochastic_rounding,
        self.log2_rounding)

    x_neg_clipped = _clip_power_of_two(
        K.relu(-x_original) * self.negative_slope,
        self._min_exp, self._max_exp,
        self.max_value,
        self.quadratic_approximation,
        self.use_stochastic_rounding,
        self.log2_rounding)

    xq = tf.where(
        tf.logical_or(x_original >= 0.0, self.negative_slope == 0.0),
        pow(2.0, x_pos_clipped), -pow(2.0, x_neg_clipped))

    if self.use_ste:
      return x + tf.stop_gradient(self.qnoise_factor * (-x + xq))
    else:
      return (1 - self.qnoise_factor) * x + tf.stop_gradient(
          self.qnoise_factor * xq)

  def max(self):
    """Get the maximum value that quantized_relu_po2 can represent."""
    if self.max_value:
      return max(1.0, self.max_value)
    else:
      return max(1.0, 2**self._max_exp)

  def min(self):
    """Get the minimum value that quantized_relu_po2 can represent."""
    if self.negative_slope == 0.0:
      return 2**self._min_exp

    unsigned_bits = self.bits - 1
    if unsigned_bits > 0:
      return min(2**self._min_exp, - self.negative_slope * np.power(2.0, unsigned_bits))
    else:
      return 2**self._min_exp

  @classmethod
  def from_config(cls, config):
    return cls(**config)

  def get_config(self):
    """Gets configugration of the quantizer.

    Returns:
      A dict mapping quantization configuration, including
        bits: bitwidth for exponents.
        max_value: the maximum value of this quantized_relu_po2 can represent.
        use_stochastic_rounding:
          if True, stochastic rounding is used.
        quadratic_approximation:
          if True, the exponent is enforced to be even number, which is
          the closest one to x.
        log2_rounding:
          A string, Log2 rounding mode

    """

    config = {
        "bits":
            self.bits,
        "max_value":
            self.max_value,
        "negative_slope":
            self.negative_slope,
        "use_stochastic_rounding":
            self.use_stochastic_rounding,
        "quadratic_approximation":
            self.quadratic_approximation,
        "qnoise_factor":
            self.qnoise_factor.numpy() if isinstance(
                self.qnoise_factor, tf.Variable) else self.qnoise_factor,
        "log2_rounding":
            self.log2_rounding
    }
    return config


@quantizer_registry.register_quantizer
class quantized_hswish(quantized_bits):  # pylint: disable=invalid-name
  """Computes a quantized hard swish to a number of bits.

  # TODO(mschoenb97): Update to inherit from quantized_linear.

  Equation of h-swisth function in mobilenet v3:
  hswish(x) = x * ReluY(x + relu_shift) / Y
  Y is relu_upper_bound

  Attributes:
    bits: number of bits to perform quantization, also known as word length.
    integer: number of integer bits.
    symmetric: if True,  the quantization is in symmetric mode, which puts
      restricted range for the quantizer. Otherwise, it is in asymmetric mode,
      which uses the full range.
    alpha: a tensor or None, the scaling factor per channel. If None, the
      scaling factor is 1 for all channels.
    use_stochastic_rounding: if true, we perform stochastic rounding. This
      parameter is passed on to the underlying quantizer quantized_bits which is
      used to quantize h_swish.
    scale_axis: which axis to calculate scale from
    qnoise_factor: float. a scalar from 0 to 1 that represents the level of
      quantization noise to add. This controls the amount of the quantization
      noise to add to the outputs by changing the weighted sum of (1 -
      qnoise_factor)*unquantized_x + qnoise_factor*quantized_x.
    var_name: String or None. A variable name shared between the tf.Variables
      created in the build function. If None, it is generated automatically.
    use_ste: Bool. Whether to use "straight-through estimator" (STE) method or
      not.
    use_variables: Bool. Whether to make the quantizer variables to be dynamic
      tf.Variables or not.
    relu_shift: integer type, representing the shift amount of the unquantized
      relu.
    relu_upper_bound: integer type, representing an upper bound of the
      unquantized relu. If None, we apply relu without the upper bound when
      "is_quantized_clip" is set to false (true by default).
      Note: The quantized relu uses the quantization parameters (bits and
        integer) to upper bound. So it is important to set relu_upper_bound
        appropriately to the quantization parameters. "is_quantized_clip" has
        precedence over "relu_upper_bound" for backward compatibility.
  """

  def __init__(
      self,
      bits=8,
      integer=0,
      symmetric=0,
      alpha=None,
      use_stochastic_rounding=False,
      scale_axis=None,
      qnoise_factor=1.0,
      var_name=None,
      use_variables=False,
      relu_shift: int = 3,
      relu_upper_bound: int = 6,
  ):
    super().__init__(
        bits=bits,
        integer=integer,
        symmetric=symmetric,
        keep_negative=True,
        alpha=alpha,
        use_stochastic_rounding=use_stochastic_rounding,
        scale_axis=scale_axis,
        qnoise_factor=qnoise_factor,
        var_name=var_name,
        use_variables=use_variables,
    )

    self.relu_shift = relu_shift
    self.relu_upper_bound = relu_upper_bound

  def __str__(self):
    """Converts Tensors to printable strings."""

    integer_bits = re.sub(
        r"\[(\d)\]",
        r"\g<1>",
        str(
            self.integer.numpy()
            if isinstance(self.integer, tf.Variable)
            else self.integer
        ),
    )
    assert isinstance(integer_bits, int)

    flags = [
        str(self.bits),
        integer_bits,
        str(int(self.symmetric)),
        "relu_shift=" + str(self.relu_shift),
        "relu_upper_bound=" + str(self.relu_upper_bound),
    ]

    if not self.keep_negative:
      flags.append("keep_negative=False")
    if self.alpha:
      alpha = str(self.alpha)
      if isinstance(self.alpha, six.string_types):
        alpha = "'" + alpha + "'"
      flags.append("alpha=" + alpha)
    if self.use_stochastic_rounding:
      flags.append(
          "use_stochastic_rounding=" + str(int(self.use_stochastic_rounding))
      )
    return "quantized_hswish(" + ",".join(flags) + ")"

  def __call__(self, x):
    assert self.relu_upper_bound > 0, (
        "relu_upper_bound must be a positive value, "
        f"found {self.relu_upper_bound} instead"
    )
    assert (
        self.relu_shift > 0
    ), f"relu_shift must be a positive value, found {self.relu_shift} instead"
    x = K.cast_to_floatx(x)
    shift_x = x + self.relu_shift
    relu_x = tf.where(
        shift_x <= self.relu_upper_bound,
        K.relu(shift_x, alpha=False),
        tf.ones_like(shift_x) * self.relu_upper_bound,
    )

    hswish_x = tf.math.multiply(x, relu_x) / self.relu_upper_bound
    return super(quantized_hswish, self).__call__(hswish_x)

  def min(self):
    """Gets the minimum value that quantized_hswish can represent."""

    # get the minimum value that the number of bits can represent
    min_quant = super(quantized_hswish, self).min()
    # In the negative end, the hswish function becomes
    # x * (x + relu_shift) / relu_upper_bound
    # the min value of this parabolic function is
    # - relu_shift^2 / (4 * relu_upper_bound)
    denom = 4 * self.relu_upper_bound
    min_parabolic = -self.relu_shift * self.relu_shift / denom

    if min_quant >= min_parabolic:
      return min_quant

    # get the quantized value of min_parabolic
    return super(quantized_hswish, self).call(min_parabolic)

  def get_config(self):
    """Add relu_shift and relu_upper_bound to the config file."""

    base_config = super(quantized_hswish, self).get_config()

    config = {
        "relu_shift": self.relu_shift,
        "relu_upper_bound": self.relu_upper_bound,
    }

    out_config = dict(list(base_config.items()) + list(config.items()))
    return out_config


# TODO(akshayap): Update to use registry for quantizers instead of globals().
def get_quantizer(identifier):
  """Gets the quantizer.

  Args:
    identifier: An quantizer, which could be dict, string, or callable function.

  Returns:
    A quantizer class or quantization function from this file. For example,
      Quantizer classes: quantized_bits, quantized_po2, quantized_relu_po2,
      binary, stochastic_binary, ternary, stochastic_ternary, etc.

      Quantization functions: binary_sigmoid, hard_sigmoid, soft_sigmoid, etc.

  Raises:
    ValueError: An error occurred when quantizer cannot be interpreted.
  """

  if identifier is None:
    return None
  if isinstance(identifier, dict):
    return deserialize_keras_object(
        identifier, module_objects=globals(), printable_module_name="quantizer")
  elif isinstance(identifier, six.string_types):
    return safe_eval(identifier, globals())
  elif callable(identifier):
    return identifier
  else:
    raise ValueError("Could not interpret quantizer identifier: " +
                     str(identifier))


def get_quantized_initializer(w_initializer, w_range):
  """Gets the initializer and scales it by the range."""

  if isinstance(w_initializer, six.string_types):

    if w_initializer == "he_normal":
      return initializers.VarianceScaling(
          scale=2 * w_range, mode="fan_in", distribution="normal", seed=None)
    if w_initializer == "he_uniform":
      return initializers.VarianceScaling(
          scale=2 * w_range, mode="fan_in", distribution="uniform", seed=None)
    elif w_initializer == "glorot_normal":
      return initializers.VarianceScaling(
          scale=w_range, mode="fan_avg", distribution="normal", seed=None)
    elif w_initializer == "glorot_uniform":
      return initializers.VarianceScaling(
          scale=w_range, mode="fan_avg", distribution="uniform", seed=None)
    elif w_initializer == "random_uniform":
      return initializers.RandomUniform(-w_range, w_range)

  return w_initializer


================================================
FILE: qkeras/registry.py
================================================
# Copyright 2024 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""General purpose registy for registering classes or functions.

The registry can be used along with decorators to record any class/function.

Sample usage:
  # Setup registry with decorator.
  _REGISTRY = registry.Registry()
  def register(cls):
    _REGISTRY.register(cls)
  def lookup(name):
    return _REGISTRY.lookup(name)

  # Register instances.
  @register
  def foo_task():
    ...

  @register
  def bar_task():
    ...

  # Retrieve instances.
  def my_executor():
   ...
   my_task = lookup("foo_task")
   ...
"""


class Registry(object):
  """A registry class to record class representations or function objects."""

  def __init__(self):
    """Initializes the registry."""
    self._container = {}

  def register(self, item, name=None):
    """Register an item.

    Args:
     item: Python item to be recorded.
     name: Optional name to be used for recording item. If not provided,
       item.__name__ is used.
    """
    if not name:
      name = item.__name__
    self._container[name] = item

  def lookup(self, name):
    """Retrieves an item from the registry.

    Args:
      name: Name of the item to lookup.

    Returns:
      Registered item from the registry.
    """
    return self._container[name]


================================================
FILE: qkeras/safe_eval.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements a safe evaluation using globals()."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from pyparsing import delimitedList
from pyparsing import Group
from pyparsing import Optional
from pyparsing import Regex
from pyparsing import Suppress

import logging
from tensorflow import keras


def Num(s):
  """Tries to convert string to either int or float."""
  try:
    try:
      return int(s)
    except ValueError:
      return float(s)
  except ValueError:
    # this should be always true. if it isn't int or float, it should be str
    assert (
        (s[0] == '"' and s[-1] == '"') or
        (s[0] == "'" and s[-1] == "'")
    )
    s = s[1:-1]
    return s

def Str(s):
  return s[1:-1]

def IsNum(s):
  try:
    try:
      int(s)
      return True
    except ValueError:
      float(s)
      return True
  except ValueError:
    return False

def IsBool(s):
  if s in ["True", "False"]:
    return True
  else:
    return False

def IsNone(s):
  return s == "None"

def Bool(s):
  return True if "True" in s else False

def ListofNums(s):
  # remove list brackets
  s = s.replace("[", "").replace("]", "")
  list_s = s.split(" ")
  return [Num(e) for e in list_s]

def IsListofNums(s):
  # remove list brackets
  s = s.replace("[", "").replace("]", "")
  list_s = s.split(" ")
  if len(list_s) > 1:
    for e in list_s:
      # if any of the elements is not a number return false
      if not IsNum(e):
        return False
    return True
  else:
    return False

def GetArg(s):
  if IsBool(s):
    return Bool(s)
  elif IsNum(s):
    return Num(s)
  elif IsNone(s):
    return None
  elif IsListofNums(s):
    return ListofNums(s)
  else:
    return Str(s)


def GetParams(s):
  """Extracts args and kwargs from string."""
  # modified from https://stackoverflow.com/questions/38799223/parse-string-to-identify-kwargs-and-args  # pylint: disable=line-too-long

  _lparen = Suppress("(")  # pylint: disable=invalid-name
  _rparen = Suppress(")")  # pylint: disable=invalid-name
  _eq = Suppress("=")  # pylint: disable=invalid-name

  data = (_lparen + Optional(
      delimitedList(
          Group(Regex(r"[^=,)\s]+") + Optional(_eq + Regex(u"[^,)]*")))
          )
      ) + _rparen)

  items = data.parseString(s).asList()

  # need to make sure that kwargs only happen after args are processed
  args = [GetArg(i[0]) for i in items if len(i) == 1]
  kwargs = {i[0]: GetArg(i[1]) for i in items if len(i) == 2}

  # check for syntax error
  for i in range(1, len(items)):
    if (len(items[i]) == 1) and (len(items[i-1]) == 2):
      raise SyntaxError(("Error with item " + str(i) + " \n" +
                         "  parsing string " + s + "\n" +
                         "  Items: " + str(items) + "\n" +
                         "  Item[" + str(i-1) +"] :" + str(items[i-1]) + "\n" +
                         "  Item[" + str(i) +"] :" + str(items[i]) ))

  return args, kwargs


def safe_eval(eval_str, op_dict, *params, **kwparams):  # pylint: disable=invalid-name
  """Replaces eval by a safe eval mechanism."""

  function_split = eval_str.split("(")
  quantizer = op_dict.get(function_split[0], None)

  if len(function_split) == 2:
    args, kwargs = GetParams("(" + function_split[1])
  else:
    args = []
    kwargs = {}

  args = args + list(params)
  for k in kwparams:
    kwargs[k] = kwparams[k]

  # must be Keras activation object if None
  if quantizer is None:
    logging.info("keras dict %s", function_split[0])
    quantizer = keras.activations.get(function_split[0])

  if len(function_split) == 2 or args or kwargs:
    return quantizer(*args, **kwargs)
  else:
    if isinstance(quantizer, type):
      # Check if quantizer is a class
      return quantizer()
    else:
      # Otherwise it is a function, so just return it
      return quantizer


================================================
FILE: qkeras/utils.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import copy
import json
import tempfile
import types

import numpy as np
import os
import six
import re
import networkx as nx
import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.models import Model
from tensorflow.keras.models import model_from_json

from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper
from tensorflow_model_optimization.python.core.sparsity.keras import prune_registry
from tensorflow_model_optimization.python.core.sparsity.keras import prunable_layer

from .qlayers import Clip
from .qconv2d_batchnorm import QConv2DBatchnorm
from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm
from .qlayers import QActivation
from .qlayers import QAdaptiveActivation
from .qpooling import QAveragePooling2D
from .qlayers import QDense
from .qlayers import QInitializer
from .qconvolutional import QConv1D
from .qconvolutional import QConv2D
from .qconvolutional import QConv2DTranspose
from .qrecurrent import QSimpleRNN
from .qrecurrent import QSimpleRNNCell
from .qrecurrent import QLSTM
from .qrecurrent import QLSTMCell
from .qrecurrent import QGRU
from .qrecurrent import QGRUCell
from .qrecurrent import QBidirectional
from .qconvolutional import QSeparableConv1D
from .qconvolutional import QSeparableConv2D
from .qconvolutional import QDepthwiseConv2D
from .qnormalization import QBatchNormalization
from .qpooling import QGlobalAveragePooling2D
from .qtools import qgraph
from .quantizers import binary
from .quantizers import bernoulli
from .quantizers import get_weight_scale
from .quantizers import quantized_bits
from .quantizers import quantized_relu
from .quantizers import quantized_ulaw
from .quantizers import quantized_tanh
from .quantizers import quantized_sigmoid
from .quantizers import quantized_po2
from .quantizers import quantized_relu_po2
from .quantizers import stochastic_binary
from .quantizers import stochastic_ternary
from .quantizers import ternary
# from .google_internals.experimental_quantizers import quantized_bits_learnable_scale
# from .google_internals.experimental_quantizers import parametric_quantizer_d_xmax
from .safe_eval import safe_eval
from tensorflow.python.ops import math_ops
from .qmac import QScaleShift


REGISTERED_LAYERS = [
    "QActivation",
    "QAdaptiveActivation",
    "QDense",
    "QConv1D",
    "QConv2D",
    "QSeparableConv1D",
    "QSeparableConv2D",
    "QDepthwiseConv2D",
    "QConv2DTranspose",
    "QSimpleRNN",
    "QLSTM",
    "QGRU",
    "QBidirectional",
    "QBatchNormalization",
    "QConv2DBatchnorm",
    "QDepthwiseConv2DBatchnorm",
    "QAveragePooling2D",
    "QGlobalAveragePooling2D",
]


def find_bn_fusing_layer_pair(model, custom_objects={}):
  """Finds layers that can be fused with the following batchnorm layers.

  Args:
    model: input model
    custom_objects: Dict of model specific objects needed for cloning.

  Returns:
    Dict that marks all the layer pairs that need to be fused.

  Note: supports sequential and non-sequential model
  """

  fold_model = clone_model(model, custom_objects)
  (graph, _) = qgraph.GenerateGraphFromModel(
      fold_model, "quantized_bits(8, 0, 1)", "quantized_bits(8, 0, 1)")

  qgraph.GraphAddSingleSourceSingleSink(graph)
  qgraph.GraphRemoveNodeWithNodeType(graph, "InputLayer")
  qgraph.GraphPropagateActivationsToEdges(graph)

  # Finds the Batchnorm nodes and mark them.
  layers_followed_by_bn = {}
  bn_layers_to_skip = set()
  for node_id in nx.topological_sort(graph):
    node = graph.nodes[node_id]
    layer = node["layer"][0]
    if layer:
      successor_ids = list(graph.successors(node_id))
      is_single = len(successor_ids) == 1
      successor_layer = graph.nodes[successor_ids[0]]["layer"][0]
      followed_by_bn = (successor_layer.__class__.__name__ ==
                        "QBatchNormalization")
      # TODO(lishanok): extend to QDense types
      enable_bn_fusing = layer.__class__.__name__ in [
          "QConv2D", "QDepthwiseConv2D"
      ] and is_single and followed_by_bn

      if enable_bn_fusing:
        layers_followed_by_bn[layer.name] = successor_layer.name
        bn_layers_to_skip.add(successor_layer.name)

  return (layers_followed_by_bn, bn_layers_to_skip)


def add_bn_fusing_weights(prev_layer, bn_layer, saved_weights):
  """Adds additional fusing weights to saved_weights.

  In hardware inference, we need to combined fuse previous layer's output with
  the following batchnorm op.
  z[i] = bn(y[i]) = inv[i] * y'[i] * scale[i] - bias'[i] is the final output
  of the previous layer and bn layer, with:
    inv[i] = gamma[i]* rsqrt(variance[i]^2+epsilon) is computed from the
      bn layer weights
    y'[i] is the i-th channel output from the previous layer (before scale)
    scale[i] is the i-th channel kernel quantizer scale
    fused_bias[i] = inv[i] * bias[i] + beta[i] - inv[i]*mean[i] where bias is
      the bias term from the previous layer, beta and mean are the bn
      layer weights.

  Args:
    prev_layer: QKeras layer, could be QConv2D/QDepthwiseConv2D/QDense.
    bn_layer: The following QBatchNormalization layer that needs to be
      fused with the previous layer.
    saved_weights: Dict. The centralized weights dictionary that exports
      relevant weights and parameters for hardware inference.
  """
  bn_qs = bn_layer.quantizers
  bn_ws = bn_layer.get_weights()

  if bn_qs[4] is not None:
    assert bn_qs[0] is None and bn_qs[3] is None, (
        "If using the inverse quantizer, the gamma and variance quantizers "
        "should not be used in order to avoid quantizing a value twice.")

  def apply_quantizer(quantizer, input_weight):
    if quantizer:
      weight = tf.constant(input_weight)
      weight = tf.keras.backend.eval(quantizer(weight))
    else:
      weight = input_weight
    return weight

  # Quantize respective bn layer weights
  gamma = 1.0
  beta = 0
  idx = 0
  if bn_layer.scale:
    gamma = apply_quantizer(bn_layer.gamma_quantizer_internal, bn_ws[idx])
    idx += 1
  if bn_layer.center:
    beta = apply_quantizer(bn_layer.beta_quantizer_internal, bn_ws[idx])
    idx += 1
  mean = apply_quantizer(bn_layer.mean_quantizer_internal, bn_ws[idx])
  idx += 1
  variance = apply_quantizer(bn_layer.variance_quantizer_internal, bn_ws[idx])

  # Compute inv[i]
  inv = gamma * math_ops.rsqrt(variance + bn_layer.epsilon)
  inv = inv.numpy()
  if bn_layer.inverse_quantizer_internal is not None:
    quantizer = bn_layer.inverse_quantizer_internal
    inv = tf.keras.backend.eval(quantizer(inv))

  # Compute fused_bias[i]
  if prev_layer.use_bias:
    cur_weights = prev_layer.get_weights()
    assert len(cur_weights) == 2, ("Weights should have length of 2. Found"
                                   f"{len(cur_weights)} instead.")
    prev_bias = cur_weights[-1]
  else:
    prev_bias = 0
  b_prime = inv * prev_bias + beta - inv * mean

  saved_weights[prev_layer.name]["enable_bn_fusing"] = True
  saved_weights[prev_layer.name]["fused_bn_layer_name"] = bn_layer.name
  saved_weights[prev_layer.name]["bn_inv"] = inv
  saved_weights[prev_layer.name]["fused_bias"] = b_prime


# Model utilities: before saving the weights, we want to apply the quantizers
def model_save_quantized_weights(model, filename=None, custom_objects={}):
  """Quantizes model for inference and save it.

  Takes a model with weights, apply quantization function to weights and
  returns a dictionary with quantized weights.

  User should be aware that "po2" quantization functions cannot really
  be quantized in meaningful way in Keras. So, in order to preserve
  compatibility with inference flow in Keras, we do not covert "po2"
  weights and biases to exponents + signs (in case of quantize_po2), but
  return instead (-1)**sign*(2**round(log2(x))). In the returned dictionary,
  we will return the pair (sign, round(log2(x))).

  Special care needs to be given to quantized_bits(alpha="auto_po2") as well.
  Since in this quantizer, hardware needs the integer weights and scale for
  hardware inference, this function will return the pair (scale,
  integer_weights) in the returned dictionary.

  Arguments:
    model: model with weights to be quantized.
    filename: if specified, we will save the hdf5 containing the quantized
      weights so that we can use them for inference later on.
    custom_objects: Dict of model specific objects needed to load/store.

  Returns:
    dictionary containing layer name and quantized weights that can be used
    by a hardware generator.
  """

  saved_weights = {}

  # Find the conv/dense layers followed by Batchnorm layers
  (fusing_layer_pair_dict, bn_layers_to_skip) = find_bn_fusing_layer_pair(
      model, custom_objects
  )

  print("... quantizing model")
  for layer in model.layers:
    if hasattr(layer, "get_quantizers"):
      # weights for software inference
      weights = []
      signs = []
      scales = []
      # weights for hardware inference
      hw_weights = []

      if any(isinstance(layer, t) for t in [
          QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]):
        qs = layer.get_quantizers()
        ws = layer.get_folded_weights()
      elif any(isinstance(layer, t) for t in [QSimpleRNN, QLSTM, QGRU]):
        qs = layer.get_quantizers()[:-1]
        ws = layer.get_weights()
      else:
        qs = layer.get_quantizers()
        ws = layer.get_weights()

      has_sign = False
      has_scale = False
      enable_bn_fusing = False

      # isinstance() might fail due to inconsistent module import path.
      # Use __class__.__name__ instead.
      layer_class = layer.__class__.__name__
      if (layer_class == "QBatchNormalization" and
          layer.name in bn_layers_to_skip):
        # Mark current bn layer to be fused with the previous layer
        enable_bn_fusing = True

      for quantizer, weight in zip(qs, ws):
        if quantizer:
          weight = tf.constant(weight)
          weight = tf.keras.backend.eval(quantizer(weight))

        # If quantizer is power-of-2 (quantized_po2 or quantized_relu_po2),
        # we would like to process it here.
        #
        # However, we cannot, because we will lose sign information as
        # quanized_po2 will be represented by the tuple (sign, log2(abs(w))).
        #
        # In addition, we will not be able to use the weights on the model
        # any longer.
        #
        # So, instead of "saving" the weights in the model, we will return
        # a dictionary so that the proper values can be propagated.

        # Weights store the weight in the format that software inference uses.
        weights.append(weight)

        q_name = ""
        if quantizer:
          if isinstance(quantizer, six.string_types):
            q_name = quantizer
          elif hasattr(quantizer, "__name__"):
            q_name = quantizer.__name__
          elif hasattr(quantizer, "name"):
            q_name = quantizer.name
          elif hasattr(quantizer, "__class__"):
            q_name = quantizer.__class__.__name__

        if quantizer and ("_po2" in q_name):
          # Quantized_relu_po2 does not have a sign.
          if q_name == "quantized_po2":
            has_sign = True
          sign = np.sign(weight)
          # Makes sure values are -1 or +1 only
          sign += (1.0 - np.abs(sign))
          # hw_weight store the weight in the format that hardware inference
          # uses.
          hw_weight = np.round(np.log2(np.abs(weight)))
          signs.append(sign)
          scales.append([])
        elif (q_name == "quantized_bits" and
              quantizer.alpha == "auto_po2"):
          unsigned_bits = quantizer.bits - quantizer.keep_negative
          m = K.cast_to_floatx(pow(2, unsigned_bits))
          m_i = K.cast_to_floatx(K.pow(2, quantizer.integer))

          assert hasattr(quantizer.scale, "numpy") or isinstance(
              quantizer.scale, np.ndarray), (
                  "The auto_po2 quantizer has to be called first in order "
                  "to know the values of scale.")
          scale = quantizer.scale if isinstance(
              quantizer.scale, np.ndarray) else quantizer.scale.numpy()
          scale = K.cast_to_floatx(scale)
          # Make sure scale is power of 2 values
          log2val = np.log2(scale)
          diff = np.round(log2val) - log2val
          assert np.all(diff == 0), "scale must be power of 2 values!"
          # Convert fixed point weight to integer weight, just
          hw_weight = weight * m / m_i
          # Because hw_weight is integer weights, set scale = scale * m_i / m
          # so that when we can multiply scale with the integer weight
          # during hardware inference to get the fixed point weights
          scale = scale * m_i / m
          has_scale = True
          scales.append(scale)
        else:
          hw_weight = weight
          signs.append([])
          scales.append([])
        hw_weights.append(hw_weight)

      # Save the weights in the format that hardware inference uses
      saved_weights[layer.name] = {"weights": hw_weights,
                                   "enable_bn_fusing": enable_bn_fusing}

      if (isinstance(layer, QAveragePooling2D) or
          isinstance(layer, QGlobalAveragePooling2D)):
        if isinstance(layer, QAveragePooling2D):
          pool_area = layer.pool_size
          if isinstance(layer.pool_size, int):
            pool_area = layer.pool_size * layer.pool_size
          else:
            pool_area = np.prod(layer.pool_size)
        else:
          pool_area = layer.compute_pooling_area(input_shape=layer.input_shape)
        saved_weights[
            layer.name]["q_mult_factor"] = layer.average_quantizer_internal(
                1.0 / pool_area).numpy()
        saved_weights[layer.name]["mult_factor"] = 1.0 / pool_area
        saved_weights[layer.name]["pool_area"] = pool_area

      if has_sign:
        saved_weights[layer.name]["signs"] = signs
      if has_scale:
        saved_weights[layer.name]["scales"] = scales
      if not any(isinstance(layer, t) for t in [
          QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]):
        # Set layer weights in the format that software inference uses
        layer.set_weights(weights)
      else:
        print(layer.name, " conv and batchnorm weights cannot be seperately"
              " quantized because they will be folded before quantization.")

      # adjust weights for bn fusing if necessary
      if layer.name in fusing_layer_pair_dict.keys():
        print(f"Fuse {layer.name} output with "
              f"{fusing_layer_pair_dict[layer.name]} for hardware inference.")
        add_bn_fusing_weights(
            prev_layer=layer,
            bn_layer=model.get_layer(fusing_layer_pair_dict[layer.name]),
            saved_weights=saved_weights)
    else:
      if layer.get_weights():
        print(" ", layer.name, "has not been quantized")

  if filename:
    model.save_weights(filename)

  return saved_weights


def quantize_activation(layer_config, activation_bits):
  """Replaces activation by quantized activation functions."""
  str_act_bits = str(activation_bits)
  # relu -> quantized_relu(bits)
  # tanh -> quantized_tanh(bits)
  # sigmoid -> quantized_sigmoid(bits)
  # more to come later
  if layer_config.get("activation", None) is None:
    return
  if isinstance(layer_config["activation"], six.string_types):
    a_name = layer_config["activation"]
  elif isinstance(layer_config["activation"], types.FunctionType):
    a_name = layer_config["activation"].__name__
  else:
    a_name = layer_config["activation"].__class__.__name__

  if a_name == "linear":
    return
  if a_name == "relu":
    layer_config["activation"] = "quantized_relu(" + str_act_bits + ")"
  elif a_name == "tanh":
    layer_config["activation"] = "quantized_tanh(" + str_act_bits + ")"
  elif a_name == "sigmoid":
    layer_config["activation"] = "quantized_sigmoid(" + str_act_bits + ")"


def get_config(quantizer_config, layer, layer_class, parameter=None):
  """Returns search of quantizer on quantizer_config."""
  quantizer = quantizer_config.get(layer["config"]["name"],
                                   quantizer_config.get(layer_class, None))

  if quantizer is not None and parameter is not None:
    quantizer = quantizer.get(parameter, None)

  return quantizer


def is_TFOpLambda_layer(layer):
  return layer.__class__.__name__ == "TFOpLambda"


def get_y_from_TFOpLambda(model_cfg, layer):
  """Get the value of "y" from the TFOpLambda layer's configuration.
  Args:
    model_cfg: dictionary type, model.get_config() output
    layer: a given layer instance

  Return:
    value of "y" for a TFOpLambda layer. 'y' here corresponds to how tensorflow
    stores TFOpLambda layer parameter in serialization. for example,
    TFOpLambda(func), where func is tf.multiply(input_tensor, 3). "y" would be
    the value 3.
  """

  for layer_config in model_cfg["layers"]:
    op_name = layer_config["config"]["name"]
    class_name = layer_config["class_name"]

    # TODO(lishanok): Extend support for other TFOpLambda types when needed
    if op_name == layer.name and class_name == "TFOpLambda":
      assert ("tf.__operators__.add" in op_name or "tf.math.multiply"
              in op_name), "TFOpLambda layer {} not supported!".format(op_name)
      return layer_config["inbound_nodes"][-1][-1]["y"]

  return None


def convert_to_folded_model(model):
  """Find conv/dense layers followed by bn layers and fold them.

  Args:
    model: input model

  Returns:
    new model without bn layers
    list of layers being folded

  Note: supports sequential and non-sequential model
  """

  fold_model = clone_model(model)
  model_cfg = model.get_config()
  (graph, _) = qgraph.GenerateGraphFromModel(
      fold_model, "quantized_bits(8, 0, 1)", "quantized_bits(8, 0, 1)")

  qgraph.GraphAddSingleSourceSingleSink(graph)
  qgraph.GraphRemoveNodeWithNodeType(graph, "InputLayer")
  qgraph.GraphPropagateActivationsToEdges(graph)

  # Finds the Batchnorm nodes to be deleted and mark them.
  bn_nodes_to_delete = []
  layers_to_fold = []
  for node_id in nx.topological_sort(graph):
    layer_input_tensors = []
    node = graph.nodes[node_id]
    layer = node["layer"][0]
    if layer:
      successor_ids = list(graph.successors(node_id))
      is_single = len(successor_ids) == 1
      successor_layer = graph.nodes[successor_ids[0]]["layer"][0]
      followed_by_bn = (successor_layer.__class__.__name__ ==
                        "BatchNormalization")
      # TODO(lishanok): extend to QDense types
      is_foldable = layer.__class__.__name__ in [
          "Conv2D", "DepthwiseConv2D"
      ] and is_single and followed_by_bn

      if is_foldable:
        # Removes the batchnorm node from the graph.
        bn_nodes_to_delete.append(successor_ids[0])
        layers_to_fold.append(layer.name)

  # Deletes the marked nodes.
  for node_id in bn_nodes_to_delete:
    qgraph.GraphRemoveNode(graph, node_id)

  # Modifies model according to the graph.
  model_outputs = []
  x = model_inputs = fold_model.inputs

  for node_id in nx.topological_sort(graph):
    layer_input_tensors = []
    node = graph.nodes[node_id]

    layer = node["layer"][0]
    if layer:
      # Gets layer input tensors from graph edge.
      for parent_node_id in graph.predecessors(node_id):
        edge = graph.edges[(parent_node_id, node_id)]
        input_tensor = edge["tensor"]
        layer_input_tensors.append(input_tensor)

      # We call the layer to get output tensor.
      if len(layer_input_tensors) == 1:
        layer_input_tensors = layer_input_tensors[0].deref()
      else:
        layer_input_tensors = [t.deref() for t in layer_input_tensors]

      if is_TFOpLambda_layer(layer):
        # TFOpLambda layer requires one extra input: "y"
        y = get_y_from_TFOpLambda(model_cfg, layer)
        x = layer(layer_input_tensors, y)
      else:
        x = layer(layer_input_tensors)

      # Replaces edge tensors between the predecessor and successor
      for u, v in graph.edges(node_id):
        # u is current layer node, v is successor layer node
        # graph[u][v] is the edge between the two nodes
        # Replace the tensor on this edge so that the input tensor for the
        # successor layer can be updated accordingly.
        graph[u][v]["tensor"] = x.ref()

        if v == -2 and x not in model_outputs:
          # When it is output layer, add the output tensor of this layer
          # into model outputs.
          model_outputs.append(x)

  new_model = Model(inputs=model_inputs, outputs=model_outputs)

  return new_model, layers_to_fold


def model_quantize(model,
                   quantizer_config,
                   activation_bits,
                   custom_objects=None,
                   transfer_weights=False,
                   prefer_qadaptiveactivation=False,
                   enable_bn_folding=False):
  """Creates a quantized model from non-quantized model.

  The quantized model translation is based on json interface of Keras,
  which requires a custom_objects dictionary for "string" types.

  Because of the way json works, we pass "string" objects for the
  quantization mechanisms and we perform an eval("string") which
  technically is not safe, but it will do the job.

  The quantizer_config is a dictionary with the following form.
  {
    Dense_layer_name: {
        "kernel_quantizer": "quantizer string",
        "bias_quantizer": "quantizer_string"
    },

    Conv2D_layer_name: {
        "kernel_quantizer": "quantizer string",
        "bias_quantizer": "quantizer_string"
    },

    Activation_layer_name: "quantizer string",

    "QActivation": { "relu": "quantizer_string" },

    "QConv2D": {
        "kernel_quantizer": "quantizer string",
        "bias_quantizer": "quantizer_string"
    },

    "QBatchNormalization": {}
  }

  In the case of "QBidirectional", we can follow the same form as above.
  The specified configuration will be used for both forward and backwards
  layer.
  {
    "Bidirectional" : {
        "kernel_quantizer" : "quantizer string",
        "bias_quantizer" : "quantizer string",
        "recurrent_quantizer" : "quantizer string"
    }
  }

  In the case of "QActivation", we can modify only certain types of
  activations, for example, a "relu". In this case we represent the
  activation name by a dictionary, or we can modify all activations,
  without representhing as a set.

  We right now require a default case in case we cannot find layer name.
  This simplifies the dictionary because the simplest case, we can just
  say:

  {
    "default": {
        "kernel": "quantized_bits(4)",
        "bias": "quantized_bits(4)"
    }
  }

  and this will quantize all layers' weights and bias to be created with
  4 bits.

  Arguments:
    model: model to be quantized
    quantizer_config: dictionary (as above) with quantized parameters
    activation_bits: number of bits for quantized_relu, quantized_tanh,
      quantized_sigmoid
    custom_objects: dictionary following keras recommendations for json
      translation.
    transfer_weights: if true, weights are to be transfered from model to
      qmodel.
    prefer_qadaptiveactivation: Bool. If true, try to use QAdaptiveActivation
      over QActivation whenever possible
    enable_bn_folding: Bool. If true, fold conv/dense layers with
      following batch normalization layers whenever possible. use
      QConv2DBatchnorm for example, to replace conv2d layers

  Returns:
    qmodel with quantized operations and custom_objects.
  """

  if enable_bn_folding:
    # Removes bn layers from the model and find a list of layers to fold.
    model, layers_to_fold = convert_to_folded_model(model)
    if len(layers_to_fold) == 0:
      # If no layers to fold, no need to perform folding.
      enable_bn_folding = False

  if not custom_objects:
    custom_objects = {}

  # Let's make a deep copy to make sure our objects are not shared elsewhere.
  jm = copy.deepcopy(json.loads(model.to_json()))
  custom_objects = copy.deepcopy(custom_objects)
  config = jm["config"]
  layers = config["layers"]

  def quantize_rnn(layer, quantizer_config):
    q_name = "Q" + layer["class_name"]
    # Needs to add kernel, recurrent bias quantizers.
    kernel_quantizer = get_config(
        quantizer_config, layer, q_name, "kernel_quantizer")
    recurrent_quantizer = get_config(
        quantizer_config, layer, q_name, "recurrent_quantizer")
    if layer["config"]['use_bias']:
      bias_quantizer = get_config(
          quantizer_config, layer, q_name, "bias_quantizer")
    else:
      bias_quantizer = None
    state_quantizer = get_config(
            quantizer_config, layer, q_name, "state_quantizer")

    # This is to avoid unwanted transformations.
    if kernel_quantizer is None:
      return

    layer["config"]["kernel_quantizer"] = kernel_quantizer
    layer["config"]["recurrent_quantizer"] = recurrent_quantizer
    layer["config"]["bias_quantizer"] = bias_quantizer
    layer["config"]["state_quantizer"] = state_quantizer

    # If activation is present, add activation here.
    activation = get_config(
        quantizer_config, layer, q_name, "activation_quantizer")
    if activation:
      layer["config"]["activation"] = activation
    else:
      quantize_activation(layer["config"], activation_bits)

    # If recurrent activation is present, add activation here.
    if layer["class_name"] in ["LSTM", "GRU"]:
      recurrent_activation = get_config(
          quantizer_config, layer, q_name, "recurrent_activation_quantizer")
      if recurrent_activation:
        layer["config"]["recurrent_activation"] = recurrent_activation
    layer["class_name"] = q_name

    registered_name = layer.pop("registered_name", None)
    if registered_name:
      layer["registered_name"] = q_name

  for layer in layers:
    layer_config = layer["config"]

    # Dense becomes QDense, Conv1D becomes QConv1D etc
    # Activation converts activation functions.

    if layer["class_name"] in [
      "Dense", "Conv1D", "Conv2D", "Conv2DTranspose",
      "SeparableConv1D", "SeparableConv2D"
    ]:
      if (layer["class_name"] in ["Dense", "Conv2D"] and enable_bn_folding and
          layer["name"] in layers_to_fold):
        # Only fold if current layer is followed by BN layer.
        q_name = "Q" + layer["class_name"] + "Batchnorm"
        layer_config["use_bias"] = True  # Folded layers require a bias

        # Sets ema_freeze_delay and folding_mode specific to
        # QDepthwiseConv2DBatchnorm layer config.
        folding_mode = get_config(
            quantizer_config, layer, q_name, "folding_mode")
        layer_config["folding_mode"] = (
            folding_mode if folding_mode else "ema_stats_folding")
        ema_freeze_delay = get_config(
            quantizer_config, layer, q_name, "ema_freeze_delay")
        layer_config["ema_freeze_delay"] = (
            ema_freeze_delay if ema_freeze_delay else None)
      else:
        q_name = "Q" + layer["class_name"]
      # Needs to add kernel/bias quantizers.
      kernel_quantizer = get_config(
          quantizer_config, layer, q_name, "kernel_quantizer")

      if layer_config["use_bias"]:
        bias_quantizer = get_config(
            quantizer_config, layer, q_name, "bias_quantizer")
      else:
        bias_quantizer = None

      if (kernel_quantizer is None and
          q_name == "Q" + layer["class_name"] + "Batchnorm"):
        # Tries none-folded layer quantizer as a back up.
        kernel_quantizer = get_config(
            quantizer_config, layer, "Q" + layer["class_name"],
            "kernel_quantizer")
        bias_quantizer = get_config(
            quantizer_config, layer, "Q" + layer["class_name"],
            "bias_quantizer")

      # This is to avoid unwanted transformations.
      if kernel_quantizer is None:
        continue

      layer["class_name"] = q_name

      layer_config["kernel_quantizer"] = kernel_quantizer
      layer_config["bias_quantizer"] = bias_quantizer

      # If activation is present, add activation here.
      quantizer = get_config(
          quantizer_config, layer, q_name, "activation_quantizer")

      if quantizer:
        layer_config["activation"] = quantizer
      else:
        quantize_activation(layer_config, activation_bits)

    elif layer["class_name"] == "DepthwiseConv2D":
      if enable_bn_folding and layer["name"] in layers_to_fold:
        q_name = "QDepthwiseConv2DBatchnorm"
        layer_config["use_bias"] = True  # Folded layers require a bias

        # Sets ema_freeze_delay and folding_mode specific to
        # QDepthwiseConv2DBatchnorm layers.
        folding_mode = get_config(
            quantizer_config, layer, q_name, "folding_mode")
        layer_config["folding_mode"] = (
            folding_mode if folding_mode else "ema_stats_folding")
        ema_freeze_delay = get_config(
            quantizer_config, layer, q_name, "ema_freeze_delay")
        layer_config["ema_freeze_delay"] = (
            ema_freeze_delay if ema_freeze_delay else None)

      else:
        q_name = "QDepthwiseConv2D"

      # Needs to add kernel/bias quantizers.
      depthwise_quantizer = get_config(quantizer_config, layer, q_name,
                                       "depthwise_quantizer")

      if layer_config["use_bias"]:
        bias_quantizer = get_config(quantizer_config, layer, q_name,
                                    "bias_quantizer")
      else:
        bias_quantizer = None

      if depthwise_quantizer is None and q_name == "QDepthwiseConv2DBatchnorm":
        # Tries none-folded layer quantizer as a back up.
        depthwise_quantizer = get_config(
            quantizer_config, layer, "QDepthwiseConv2D", "depthwise_quantizer")
        bias_quantizer = get_config(
            quantizer_config, layer, "QDepthwiseConv2D", "bias_quantizer")

      # This is to avoid unwanted transformations.
      if depthwise_quantizer is None:
        continue

      layer["class_name"] = q_name

      layer_config["depthwise_quantizer"] = depthwise_quantizer
      layer_config["bias_quantizer"] = bias_quantizer
      # If activation is present, add activation here.
      quantizer = get_config(quantizer_config, layer, q_name,
                             "activation_quantizer",)

      if quantizer:
        layer_config["activation"] = quantizer
      else:
        quantize_activation(layer_config, activation_bits)

    elif layer["class_name"] in ["SimpleRNN", "LSTM", "GRU"]:
      quantize_rnn(layer, quantizer_config)

    elif layer["class_name"] == "Bidirectional":
      forward_layer_quantizer_config = {
          layer_config["layer"]["config"]["name"]:
              get_config(quantizer_config, layer, "QBidirectional")
      }
      quantize_rnn(layer["config"]["layer"], forward_layer_quantizer_config)
      if "backward_layer" in layer_config:
        backward_layer_quantizer_config = {
            layer_config["backward_layer"]["config"]["name"]:
                get_config(quantizer_config, layer, "QBidirectional")
        }
        quantize_rnn(layer["config"]["backward_layer"],
                     backward_layer_quantizer_config)
      layer["class_name"] = "QBidirectional"

    elif layer["class_name"] == "Activation":
      if prefer_qadaptiveactivation:  # Try to find QAdaptiveActivation first
        quantizer = get_config(quantizer_config, layer, "QAdaptiveActivation")
        is_qadaptiveactivation = True
        if quantizer is None:  # Try QActivation as a backup
          quantizer = get_config(quantizer_config, layer, "QActivation")
          is_qadaptiveactivation = False
      else:  # Tries to find QActivation first.
        quantizer = get_config(quantizer_config, layer, "QActivation")
        is_qadaptiveactivation = False
        if quantizer is None:  # Try QAdaptiveActivation as a backup
          quantizer = get_config(quantizer_config, layer, "QAdaptiveActivation")
          is_qadaptiveactivation = True

      # This is to avoid softmax from quantizing in autoq.
      if quantizer is None:
        continue

      # If quantizer exists in dictionary related to this name,
      # use it, otherwise, use normal transformations.

      if not isinstance(quantizer, dict) or quantizer.get(
          layer_config["activation"], None):
        # Only change activation layer if we will use a quantized activation.

        layer["class_name"] = ("QAdaptiveActivation" if is_qadaptiveactivation
                               else "QActivation")
        if isinstance(quantizer, dict):
          quantizer = quantizer[layer_config["activation"]]
        if quantizer:
          if is_qadaptiveactivation:
            assert quantizer.find(",") < 0, \
                "Only integer bits should be defined for QAdaptiveActivation"
            layer_config["total_bits"] = int(re.sub(r"[^\d]", "", quantizer))
            quantizer = re.sub(r"\(.*", "", quantizer)  # remove params
          layer_config["activation"] = quantizer
        else:
          quantize_activation(layer_config, activation_bits)

    # We have to do this because of other instances of ReLU.
    elif layer["class_name"] in ["ReLU", "relu", "LeakyReLU"]:

      quantizer = get_config(quantizer_config, layer, "QActivation")
      # This is to avoid unwanted transformations.
      if quantizer is None:
        continue

      if layer["class_name"] == "LeakyReLU":
        negative_slope = layer["config"]["alpha"]
      elif layer["class_name"] == "relu":
        max_value = layer["config"]["max_value"]
        negative_slope = layer["config"]["alpha"]
        threshold = layer["config"]["threshold"]
      else:  # ReLU from mobilenet
        max_value = layer["config"]["max_value"]
        negative_slope = layer["config"]["negative_slope"]
        threshold = layer["config"]["threshold"]

      if negative_slope > 0:
        q_name = "leakyrelu"
      else:
        q_name = "relu"

      # If quantizer exists in dictionary related to this name,
      # use it, otherwise, use normal transformations.

      if not isinstance(quantizer, dict) or quantizer.get(q_name, None):
        # Only change activation layer if we will use a quantized activation.

        layer["class_name"] = "QActivation"

        # Remove relu specific configurations
        # remember that quantized relu's are always upper bounded.

        if layer["class_name"] == "LeakyReLU":
          del layer["config"]["alpha"]
        elif layer["class_name"] == "relu":
          del layer["config"]["max_value"]
          del layer["config"]["alpha"]
          del layer["config"]["threshold"]
        else:  # ReLU from mobilenet
          del layer["config"]["max_value"]
          del layer["config"]["negative_slope"]
          del layer["config"]["threshold"]

        if isinstance(quantizer, dict):
          quantizer = quantizer[q_name]
        if quantizer:
          layer["config"]["activation"] = quantizer
        else:
          quantize_activation(layer["config"], activation_bits)

    elif layer["class_name"] == "BatchNormalization":
      # We will assume at least QBatchNormalization or
      # layer name is in dictionary to enable conversion
      # otherwise we will just skip it.
      if (
          layer_config["name"] not in quantizer_config and
          "QBatchNormalization" not in quantizer_config
      ):
        continue

      layer["class_name"] = "QBatchNormalization"
      # Needs to add kernel/bias quantizers.
      gamma_quantizer = get_config(
          quantizer_config, layer, "QBatchNormalization",
          "gamma_quantizer")
      beta_quantizer = get_config(
          quantizer_config, layer, "QBatchNormalization",
          "beta_quantizer")
      mean_quantizer = get_config(
          quantizer_config, layer, "QBatchNormalization",
          "mean_quantizer")
      variance_quantizer = get_config(
          quantizer_config, layer, "QBatchNormalization",
          "variance_quantizer")

      layer_config["gamma_quantizer"] = gamma_quantizer
      layer_config["beta_quantizer"] = beta_quantizer
      layer_config["mean_quantizer"] = mean_quantizer
      layer_config["variance_quantizer"] = variance_quantizer

    elif layer["class_name"] in ["AveragePooling2D", "GlobalAveragePooling2D"]:
      q_name = "Q" + layer["class_name"]
      # Adds the average quanizer to config.
      average_quantizer = get_config(
          quantizer_config, layer, q_name, "average_quantizer")

      # This is to avoid unwanted transformations.
      if average_quantizer is None:
        continue

      layer["class_name"] = q_name

      layer_config["average_quantizer"] = average_quantizer

      # Adds activation to config.
      quantizer = get_config(
          quantizer_config, layer, q_name, "activation_quantizer")

      if quantizer:
        layer_config["activation"] = quantizer
      else:
        quantize_activation(layer_config, activation_bits)

    registered_name = layer.pop("registered_name", None)
    if registered_name:
      layer["registered_name"] = q_name or registered_name

  # We need to keep a dictionary of custom objects as our quantized library
  # is not recognized by keras.

  qmodel = quantized_model_from_json(json.dumps(jm), custom_objects)

  # If transfer_weights is true, we load the weights from model to qmodel.

  if transfer_weights and not enable_bn_folding:
    for layer, qlayer in zip(model.layers, qmodel.layers):
      if layer.get_weights():
        qlayer.set_weights(copy.deepcopy(layer.get_weights()))

  return qmodel


def _add_supported_quantized_objects(custom_objects):
  """Map all the quantized objects."""
  custom_objects["QInitializer"] = QInitializer
  custom_objects["QDense"] = QDense
  custom_objects["QConv1D"] = QConv1D
  custom_objects["QConv2D"] = QConv2D
  custom_objects["QConv2DTranspose"] = QConv2DTranspose
  custom_objects["QSimpleRNNCell"] = QSimpleRNNCell
  custom_objects["QSimpleRNN"] = QSimpleRNN
  custom_objects["QLSTMCell"] = QLSTMCell
  custom_objects["QLSTM"] = QLSTM
  custom_objects["QGRUCell"] = QGRUCell
  custom_objects["QGRU"] = QGRU
  custom_objects["QBidirectional"] = QBidirectional
  custom_objects["QDepthwiseConv2D"] = QDepthwiseConv2D
  custom_objects["QSeparableConv1D"] = QSeparableConv1D
  custom_objects["QSeparableConv2D"] = QSeparableConv2D
  custom_objects["QActivation"] = QActivation
  custom_objects["QAdaptiveActivation"] = QAdaptiveActivation
  custom_objects["QBatchNormalization"] = QBatchNormalization
  custom_objects["Clip"] = Clip
  custom_objects["quantized_bits"] = quantized_bits
  custom_objects["bernoulli"] = bernoulli
  custom_objects["stochastic_ternary"] = stochastic_ternary
  custom_objects["ternary"] = ternary
  custom_objects["stochastic_binary"] = stochastic_binary
  custom_objects["binary"] = binary
  custom_objects["quantized_relu"] = quantized_relu
  custom_objects["quantized_ulaw"] = quantized_ulaw
  custom_objects["quantized_tanh"] = quantized_tanh
  custom_objects["quantized_sigmoid"] = quantized_sigmoid
  custom_objects["quantized_po2"] = quantized_po2
  custom_objects["quantized_relu_po2"] = quantized_relu_po2
  # custom_objects["quantized_bits_learnable_scale"] = quantized_bits_learnable_scale

  custom_objects["QConv2DBatchnorm"] = QConv2DBatchnorm
  custom_objects["QDepthwiseConv2DBatchnorm"] = QDepthwiseConv2DBatchnorm

  custom_objects["QAveragePooling2D"] = QAveragePooling2D
  custom_objects["QGlobalAveragePooling2D"] = QGlobalAveragePooling2D
  custom_objects["QScaleShift"] = QScaleShift


def clone_model(model, custom_objects=None):
  """Clones model with custom_objects."""
  if not custom_objects:
    custom_objects = {}

  # Makes a deep copy to make sure our objects are not shared elsewhere.
  custom_objects = copy.deepcopy(custom_objects)

  _add_supported_quantized_objects(custom_objects)

  json_string = model.to_json()
  qmodel = quantized_model_from_json(json_string, custom_objects=custom_objects)
  qmodel.set_weights(model.get_weights())

  return qmodel


def quantized_model_from_json(json_string, custom_objects=None):
  if not custom_objects:
    custom_objects = {}

  # Makes a deep copy to make sure our objects are not shared elsewhere.
  custom_objects = copy.deepcopy(custom_objects)

  _add_supported_quantized_objects(custom_objects)

  qmodel = model_from_json(json_string, custom_objects=custom_objects)

  return qmodel


def load_qmodel(filepath, custom_objects=None, compile=True):
  """Loads quantized model from Keras's model.save() h5 file.

  Arguments:
      filepath: one of the following:
          - string, path to the saved model
          - h5py.File or h5py.Group object from which to load the model
          - any file-like object implementing the method `read` that returns
          `bytes` data (e.g. `io.BytesIO`) that represents a valid h5py file
          image.
      custom_objects: Optional dictionary mapping names (strings) to custom
          classes or functions to be considered during deserialization.
      compile: Boolean, whether to compile the model after loading.

  Returns:
      A Keras model instance. If an optimizer was found as part of the saved
      model, the model is already compiled. Otherwise, the model is uncompiled
      and a warning will be displayed. When `compile` is set to False, the
      compilation is omitted without any warning.
  """

  if not custom_objects:
    custom_objects = {}

  # Makes a deep copy to make sure our objects are not shared elsewhere.
  custom_objects = copy.deepcopy(custom_objects)

  _add_supported_quantized_objects(custom_objects)

  qmodel = tf.keras.models.load_model(filepath, custom_objects=custom_objects,
                                      compile=compile)
  return qmodel


def print_model_sparsity(model):
  """Prints sparsity for the pruned layers in the model."""

  def _get_sparsity(weights):
    return 1.0 - np.count_nonzero(weights) / float(weights.size)

  print("Model Sparsity Summary ({})".format(model.name))
  print("--")
  for layer in model.layers:
    if isinstance(layer, pruning_wrapper.PruneLowMagnitude):
      prunable_weights = layer.layer.get_prunable_weights()
    elif isinstance(layer, prunable_layer.PrunableLayer):
      prunable_weights = layer.get_prunable_weights()
    elif prune_registry.PruneRegistry.supports(layer):
      weight_names = prune_registry.PruneRegistry._weight_names(layer)
      prunable_weights = [getattr(layer, weight) for weight in weight_names]
    else:
      prunable_weights = None
    if prunable_weights:
      print("{}: {}".format(
          layer.name, ", ".join([
              "({}, {})".format(weight.name,
                  str(_get_sparsity(K.get_value(weight))))
              for weight in prunable_weights
          ])))
  print("\n")


def get_model_sparsity(model, per_layer=False, allow_list=None):
  """Calculates the sparsity of the model's weights and biases.

  Quantizes the model weights using model_save_quantized_weights (but does not
    save the quantized weights) before calculating the proportion of weights and
    biases set to zero.

  Arguments:
      model: The model to use to calculate sparsity. Assumes that this is a
          QKeras model with trained weights.
      per_layer: If to return a per-layer breakdown of sparsity
      allow_list: A list of layer class names that sparsity will be calculated
        for. If set to None, a default list will be used.

  Returns:
      A float value representing the proportion of weights and biases set to
      zero in the quantized model. If per_layer is True, it also returns a
      per-layer breakdown of model sparsity formatted as a list of tuples in the
      form (<layer name>, <sparsity proportion>)
  """
  # Checks if to use a default list of allowed layers to calculate sparsity.
  if allow_list is None:
    allow_list = [
        "QDense", "Dense", "QConv1D", "Conv1D", "QConv2D", "Conv2D",
        "QDepthwiseConv2D", "DepthwiseConv2D",
        "QSeparableConv1D", "SeparableConv1D",
        "QSeparableConv2D", "SeparableConv2D", "QOctaveConv2D",
        "QSimpleRNN", "RNN", "QLSTM", "QGRU",
        "QConv2DTranspose", "Conv2DTranspose",
        "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm",
    ]

  # Quantizes the model weights for a more accurate sparsity calculation.
  model_save_quantized_weights(model)

  # Calculates the sparsity layer by layer.
  layer_sparsity = []
  total_sparsity = 0.
  all_weights = []
  for layer in model.layers:
    if hasattr(layer, "quantizers") and layer.__class__.__name__ in allow_list:
      if layer.__class__.__name__ in [
          "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
        weights_to_examine = layer.get_folded_weights()
      else:
        weights_to_examine = layer.get_weights()

      layer_weights = []
      for weight in weights_to_examine:
        try:
          weight_numpy = weight.ravel()
        except AttributeError:
          # In case of EagerTensor.
          weight_numpy = weight.numpy().ravel()
        layer_weights.append(weight_numpy)
        all_weights.append(weight_numpy)
      layer_weights = np.concatenate(layer_weights)
      layer_sparsity.append((layer.name, np.mean(layer_weights == 0)))

  if len(all_weights) > 0:
    # Average the sparsity for the entire model.
    all_weights = np.concatenate(all_weights)
    total_sparsity = np.mean(all_weights == 0)
  if per_layer:
    return (total_sparsity, layer_sparsity)
  else:
    return total_sparsity


def quantized_model_debug(model, X_test, plot=False, plt_instance=None):
  """Debugs and plots model weights and activations.

  Args:
    model: The QKeras model to debug
    X_test: The sample data to use to give to model.predict
    plot: Bool. If to plot the results.
    plt_instance: A matplotlib.pyplot instance used to plot in an IPython
      environment.
  """
  assert (plt_instance and plot) or not plot, (
      "plt_instance is required if plt is True")

  outputs = []
  output_names = []

  for layer in model.layers:
    if layer.__class__.__name__ in REGISTERED_LAYERS:
      output_names.append(layer.name)
      outputs.append(layer.output)

  model_debug = Model(inputs=model.inputs, outputs=outputs)

  y_pred = model_debug.predict(X_test)

  print("{:30} {: 8.4f} {: 8.4f}".format(
      "input", np.min(X_test), np.max(X_test)))

  for n, p in zip(output_names, y_pred):
    layer = model.get_layer(n)
    if (layer.__class__.__name__ in "QActivation" or
        layer.__class__.__name__ in "QAdaptiveActivation"):
      alpha = get_weight_scale(layer.activation, p)
    else:
      alpha = 1.0
    print(
        "{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p / alpha),
                                         np.max(p / alpha)),
        end="")
    if alpha != 1.0:
      print(" a[{: 8.4f} {:8.4f}]".format(np.min(alpha), np.max(alpha)))
    if plot and layer.__class__.__name__ in [
        "QConv1D", "QConv2D", "QConv2DTranspose", "QDense", "QActivation",
        "QAdaptiveActivation", "QSimpleRNN", "QLSTM", "QGRU", "QBidirectional",
        "QSeparableConv1D", "QSeparableConv2D"
    ]:
      plt_instance.hist(p.flatten(), bins=25)
      plt_instance.title(layer.name + "(output)")
      plt_instance.show()
    alpha = None

    if layer.__class__.__name__ not in [
        "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
      weights_to_examine = layer.get_weights()
    else:
      weights_to_examine = layer.get_folded_weights()

    for i, weights in enumerate(weights_to_examine):
      if hasattr(layer, "get_quantizers") and layer.get_quantizers()[i]:
        weights = K.eval(layer.get_quantizers()[i](K.constant(weights)))
        if i == 0 and layer.__class__.__name__ in [
            "QConv1D", "QConv2D", "QConv2DTranspose", "QDense",
            "QSimpleRNN", "QLSTM", "QGRU",
            "QSeparableConv1D", "QSeparableConv2D",
            "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"
        ]:
          alpha = get_weight_scale(layer.get_quantizers()[i], weights)
          # if alpha is 0, let's remove all weights.
          alpha_mask = (alpha == 0.0)
          weights = np.where(alpha_mask, weights * alpha, weights / alpha)
          if plot:
            plt_instance.hist(weights.flatten(), bins=25)
            plt_instance.title(layer.name + "(weights)")
            plt_instance.show()
      print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)),
            end="")
    if alpha is not None and isinstance(alpha, np.ndarray):
      print(" a({: 10.6f} {: 10.6f})".format(
          np.min(alpha), np.max(alpha)), end="")
    print("")


def quantized_model_dump(model,
                         x_test,
                         output_dir=None,
                         layers_to_dump=[]):
  """Dumps tensors of target layers to binary files.

  Arguments:
    model: QKeras model object.
    x_test: numpy type, test tensors to generate output tensors.
    output_dir: a string for the directory to hold binary data.
    layers_to_dump: a list of string, specified layers by layer
      customized name.
  """
  outputs = []
  y_names = []

  if not output_dir:
    with tempfile.TemporaryDirectory() as output_dir:
      print("temp dir", output_dir)

  if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print("create dir", output_dir)

  for layer in model.layers:
    if not layers_to_dump or layer.name in layers_to_dump:
      y_names.append(layer.name)
      outputs.append(layer.output)

  # Gather the tensor outputs from specified layers at layers_to_dump.
  model_debug = Model(inputs=model.inputs, outputs=outputs)
  y_pred = model_debug.predict(x_test)

  # Dumps tensors to files.
  for name, tensor_data in zip(y_names, y_pred):
    filename = os.path.join(output_dir, name + ".bin")
    print("writing the layer output tensor to ", filename)
    with open(filename, "w") as fid:
      tensor_data.astype(np.float32).tofile(fid)


def clone_model_and_freeze_auto_po2_scale(
    orig_model, orig_model_path=None, quantize_model_weights=False):
  """Clone model and freeze the scale value of auto_po2 type quantizers.

  Args:
    orig_model: original model which will be used to clone the new model.
      If set to None, the function will load the original model
      from orig_model_path argument.
    orig_model_path: The path to the original model file.
      If set to None, the function will load the original model from the
      orig_model argument.
    quantize_model_weights: Bool to quantize weights to HW format.
      If set to False, the model weights will be in float format.
      If set to True, the model weights will be in HW format and the function
        will also check if the hw weights extracted from the new model matches
        the original model.

  Returns:
    A tuple of the new model and the new model's hw weights.

  Note:
    + When using this function to retrain model with fixed scale value.
      Set quantize_model_weights to False in this case.
    + This function only supports a collection of common layers that will use
      auto_po2 quantizers. For less common layers, it will raise errors and we
      will add more support case by case.

  Example usage:
    model, _ = clone_model_and_freeze_auto_po2_scale(
        orig_model_path="path/to/model",
        quantize_model_weights=False)
  """

  def _create_bn_layer(layer_cfg, bn_inv_quantizer):
    # Clone batch normalization layer with the new inverse quantizer.
    if bn_inv_quantizer is not None:
      layer_cfg["inverse_quantizer"]["config"] = bn_inv_quantizer.get_config()
    return QBatchNormalization(**layer_cfg)

  def _create_qconv2d_layer(layer_cfg, kernel_quantizer):
    # Clone QConv2D layer wiht the new kernel quantizers.
    if kernel_quantizer is not None:
      layer_cfg["kernel_quantizer"]["config"] = kernel_quantizer.get_config()
    return QConv2D(**layer_cfg)

  def _create_qdepthwise_conv2d_layer(layer_cfg, depthwise_quantizer):
    # Clone QDepthwiseConv2D layer with the new depthwise_quantizer quantizer.
    if depthwise_quantizer is not None:
      layer_cfg["depthwise_quantizer"][
          "config"] = depthwise_quantizer.get_config()
    return QDepthwiseConv2D(**layer_cfg)

  def _create_qdense_layer(layer_cfg, kernel_quantizer):
    # Clone QDense layer with the new kernel quantizer.
    if kernel_quantizer is not None:
      layer_cfg["kernel_quantizer"]["config"] = kernel_quantizer.get_config()
    return QDense(**layer_cfg)

  def _create_other_layer(orig_layer):
    # Clone other layers.
    config = orig_layer.get_config()
    return orig_layer.__class__.from_config(config)

  def _create_quantized_bits_with_post_training_scale(q):
    # Create a new quantized_bits instance with the fixed scale value.
    if q is not None:
      q_cfg = q.get_config()
      q_cfg["post_training_scale"] = q.scale.numpy()
      q = quantized_bits(**q_cfg)
    return q

  def _find_auto_po2_quantizer(layer):
    # Find the auto_po2 quantizer in the layer. Note that we allow at
    # most one auto_po2 quantizer in each layer due to the limitation of
    # the current HW implementation.
    num_auto_po2_quantizers = 0
    auto_po2_quantizer = None
    if hasattr(layer, "quantizers"):
      for q in layer.quantizers:
        if hasattr(q, "alpha") and q.alpha == "auto_po2":
          num_auto_po2_quantizers += 1
          auto_po2_quantizer = q
    if num_auto_po2_quantizers > 1:
      raise ValueError(
          f"{layer.name} has more than one auto_po2 quantizer. "
          "Please check if this is expected.")
    else:
      return auto_po2_quantizer

  def _check_hw_weights_equal(hw_weights_1, hw_weights_2):
    # Check if the hw weights extracted from the new model matches the
    # original model.
    for layer_name in hw_weights_2.keys():
      for key in hw_weights_2[layer_name].keys():

        val1 = hw_weights_2[layer_name][key]
        val2 = hw_weights_1[layer_name][key]
        if isinstance(val1, list):
          for (v1, v2) in zip(val1, val2):
            if not np.all(v1 == v2):
              raise ValueError(
                  f"{layer_name}/{key}: No Match! v1={v1}, v2={v2}")
        else:
          if not np.all(val1 == val2):
            raise ValueError(
                f"{layer_name}/{key}: No Match! val1={val1}, val2={val2}")

  # Load the original model with float weights.
  # Note: weights will be quantized later in silicon flow by calling
  # model_save_quantized_weights.
  if orig_model is not None and orig_model_path is not None:
    raise ValueError(
        "Only one of orig_model and orig_model_path can be set.")
  elif orig_model is None and orig_model_path is None:
    raise ValueError(
        "One of orig_model and orig_model_path must be set.")
  elif orig_model_path is not None:
    orig_model = load_qmodel(orig_model_path, compile=False)

  # Quantize model weights and compute quantizer scale values.
  quantized_model = tf.keras.models.clone_model(orig_model)
  quantized_model.set_weights(orig_model.get_weights())
  # In silicon flow, weight binary files are generated from hw weights.
  orig_hw_weights = model_save_quantized_weights(
      quantized_model)

  # Create a new model with fixed scale quantizers.
  x = inputs = tf.keras.Input(
      shape=orig_model.input_shape[1:], name=orig_model.layers[0].name)
  for layer in quantized_model.layers[1:]:
    layer_class = layer.__class__.__name__
    auto_po2_quantizer = _find_auto_po2_quantizer(layer)
    auto_po2_quantizer_with_frozen_scale = (
        _create_quantized_bits_with_post_training_scale(auto_po2_quantizer))
    layer_cfg = layer.get_config()

    # To be compatible with different python versions, we do not use
    # match-case style here.
    if layer_class == "QConv2D":
      x = _create_qconv2d_layer(layer_cfg,
                                auto_po2_quantizer_with_frozen_scale)(x)
    elif layer_class == "QDepthwiseConv2D":
      x = _create_qdepthwise_conv2d_layer(
          layer_cfg, auto_po2_quantizer_with_frozen_scale)(x)
    elif layer_class == "QBatchNormalization":
      x = _create_bn_layer(layer_cfg,
                           auto_po2_quantizer_with_frozen_scale)(x)
    elif layer_class == "QDense":
      x = _create_qdense_layer(layer_cfg,
                               auto_po2_quantizer_with_frozen_scale)(x)
    else:
      x = _create_other_layer(layer)(x)

  new_model = tf.keras.Model(inputs, x)
  # Set the weights of the new model to the original model (float weights).
  new_model.set_weights(orig_model.get_weights())

  # Check if the new model still has auto_po2 quantizer.
  # This function only supports a colleciton of common layers that will use
  # auto_po2 quantizers. For less common layers, we need to add extra support
  # in the future.
  for layer in new_model.layers:
    q = _find_auto_po2_quantizer(layer)
    if q is not None and q.post_training_scale is None:
      raise ValueError(
          f"{layer.name} in the new model still has auto_po2 quantizer with "
          "adaptive scales. Please check if this is expected!")

  new_hw_weights = None
  if quantize_model_weights:
    new_hw_weights = model_save_quantized_weights(new_model)
    # Check if the hw weights extracted from the new model matches the original
    # nima model.
    _check_hw_weights_equal(orig_hw_weights, new_hw_weights)

  return new_model, new_hw_weights


================================================
FILE: requirements.txt
================================================
tensorflow>=2.5.0rc0
numpy>=1.16.5
pyparser
pandas>=1.1.0
matplotlib>=3.3.0
scipy>=1.4.1
setuptools>=41.0.0
argparse>=1.4.0
pyasn1<0.5.0,>=0.4.6
requests<3,>=2.21.0
pyparsing
pytest>=4.6.9
tensorflow-model-optimization>=0.2.1
networkx>=2.1
# prompt_toolkit is required by IPython.
# IPython is required by keras-tuner.
# Later prompt_toolkit version requires Python 3.6.2,
# which is not supported. cl/380856863
prompt_toolkit<=3.0.18
keras-tuner==1.0.3
scikit-learn>=0.23.1
tqdm>=4.48.0


================================================
FILE: setup.cfg
================================================
[metadata]
name = qkeras
version = 0.9.0
author = Google
author_email = qkeras-team@google.com
description = A quantization extension to Keras that provides drop-in layer replacements
long_description = file: README.md
long_description_content_type = text/markdown
url = https://github.com/google/qkeras
classifiers =
    Programming Language :: Python :: 3
    License :: OSI Approved :: Apache Software License
    Operating System :: OS Independent

[options]
packages = find:
python_requires = >=3.7

[options.packages.find]
where = qkeras

[aliases]
test=pytest

================================================
FILE: setup.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Setup script for qkeras."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import io
import setuptools

with io.open("README.md", "r", encoding="utf8") as fh:
  long_description = fh.read()

setuptools.setup(
    name="QKeras",
    version="0.9.0",
    author="QKeras Team",
    author_email="qkeras-team@google.com",
    maintainer="Shan Li",
    maintainer_email="lishanok@google.com",
    packages=setuptools.find_packages(),
    scripts=[],
    url="",
    license="Apache v.2.0",
    description="Quantization package for Keras",
    long_description=long_description,
    install_requires=[
        "numpy>=1.16.0",
        "scipy>=1.4.1",
        "pyparser",
        "setuptools>=41.0.0",
        "tensorflow-model-optimization>=0.2.1",
        "networkx>=2.1",
        "keras-tuner>=1.0.1",
        "scikit-learn>=0.23.1",
        "tqdm>=4.48.0"
    ],
    setup_requires=[
        "pytest-runner",
    ],
    tests_require=[
        "pytest",
    ],
)


================================================
FILE: tests/automatic_conversion_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
from tensorflow.keras.layers import *
from tensorflow.keras.models import *

from qkeras import *
from qkeras.utils import model_quantize


def create_network():
  xi = Input((28,28,1))
  x = Conv2D(32, (3, 3))(xi)
  x = Activation("relu", name='relu_act')(x)
  x = Conv2D(32, (3, 3), activation="relu")(x)
  x = Activation("softmax")(x)
  x = QConv2D(32, (3, 3), activation="quantized_relu(4)")(x)
  return Model(inputs=xi, outputs=x)

def create_network_with_bn():
  xi = Input((28,28,1))
  x = Conv2D(32, (3, 3))(xi)
  x = BatchNormalization(axis=-1)(x)
  x = Activation("relu", name='relu_act')(x)
  x = Conv2D(32, (3, 3), activation="relu")(x)
  x = Activation("softmax")(x)
  x = DepthwiseConv2D((3, 3))(x)
  x = BatchNormalization(axis=-1)(x)
  return Model(inputs=xi, outputs=x)

def create_network_sequential():
  model = Sequential([
    Conv2D(32, (3, 3), input_shape=(28,28,1)),
    Activation('relu'),
    Conv2D(32, (3, 3), activation="relu"),
    Activation('softmax'),
    QConv2D(32, (3, 3), activation="quantized_relu(4)")
  ])
  return model

def test_linear_activation():
  m = create_network()

  assert m.layers[1].activation.__name__ == "linear", "test failed"


def test_linear_activation_conversion():
  m = create_network()

  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary",
          "activation_quantizer": "binary"
      }
  }
  qq = model_quantize(m, d, 4)

  assert str(qq.layers[1].activation) == "binary()"


def test_no_activation_conversion_to_quantized():
  m = create_network()
  d = {"QConv2D": {"kernel_quantizer": "binary", "bias_quantizer": "binary"}}
  qq = model_quantize(m, d, 4)
  assert qq.layers[2].__class__.__name__ == "Activation"
  assert qq.layers[4].__class__.__name__ == "Activation"


def test_automatic_conversion_from_relu_to_qr():
  m = create_network()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      }}
  qq = model_quantize(m, d, 4)
  assert str(qq.layers[3].activation) == "quantized_relu(4,0)"


def test_conversion_from_relu_activation_to_qr_qactivation():
  m = create_network()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QActivation": {
          "relu": "ternary"
      }
  }
  qq = model_quantize(m, d, 4)
  assert qq.layers[2].__class__.__name__ == "QActivation"
  assert str(qq.layers[2].quantizer) == "ternary()"
  assert qq.layers[4].__class__.__name__ == "Activation"


def test_conversion_from_relu_activation_to_qadaptiveactivation():
  m = create_network()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QAdaptiveActivation": {
          "relu": "quantized_relu(8)"
      }
  }
  qq = model_quantize(m, d, 4)
  assert qq.layers[2].__class__.__name__ == "QAdaptiveActivation"
  assert str(qq.layers[2].quantizer).startswith("quantized_relu(8,")
  assert qq.layers[4].__class__.__name__ == "Activation"


def test_conversion_qadaptiveactivation_with_preference():
  m = create_network()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "relu_act": {
          "relu": "quantized_relu(8)"
      }
  }

  # Test with QActivation preference
  qq1 = model_quantize(m, d, 4, prefer_qadaptiveactivation=False)
  assert qq1.layers[2].__class__.__name__ == "QActivation"
  assert str(qq1.layers[2].quantizer).startswith("quantized_relu(8,")
  assert qq1.layers[4].__class__.__name__ == "Activation"

  # Test with QAdaptiveActivation preference
  qq2 = model_quantize(m, d, 4, prefer_qadaptiveactivation=True)
  assert qq2.layers[2].__class__.__name__ == "QAdaptiveActivation"
  assert str(qq2.layers[2].quantizer).startswith("quantized_relu(8,")
  assert qq2.layers[4].__class__.__name__ == "Activation"


def test_sequential_model_conversion():
  m = create_network_sequential()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      }}
  qq = model_quantize(m, d, 4)
  assert str(qq.layers[2].activation) == "quantized_relu(4,0)"


def test_folded_layer_conversion():
  # create a sequential model with conv2d layer and activation layers
  m1 = create_network()

  # create a sequantial model with conv2d layer followed by bn layer
  m2 = create_network_with_bn()

  # quantization config
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QDepthwiseConv2D": {
          "depthwise_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QConv2DBatchnorm": {
          "kernel_quantizer": "ternary",
          "bias_quantizer": "ternary",
      },
      "QDepthwiseConv2DBatchnorm": {
          "depthwise_quantizer": "ternary",
          "bias_quantizer": "ternary",
      },
      "relu_act": {
          "relu": "quantized_relu(8)"
      }
  }

  # test when model has no layer to fold
  # desired behavior: un-folded layers
  qq1 = model_quantize(m1, d, 4, enable_bn_folding=True)
  assert qq1.layers[1].__class__.__name__ == "QConv2D"
  assert str(qq1.layers[1].quantizers[0]).startswith("binary")

  # test when the 1st conv2d layers needs to fold but the 2nd conv2d layer
  # does not (not followed by bn layer)
  # desired behavior: 1st conv2d is folded, 2nd conv2d unfolded
  # also test the depthwiseconv2d layer should fold
  qq2 = model_quantize(m2, d, 4, enable_bn_folding=True)
  assert qq2.layers[1].__class__.__name__ == "QConv2DBatchnorm"
  assert str(qq2.layers[1].quantizers[0]).startswith("ternary")
  assert qq2.layers[3].__class__.__name__ == "QConv2D"
  assert str(qq2.layers[3].quantizers[0]).startswith("binary")
  assert qq2.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm"
  assert str(qq2.layers[5].quantizers[0]).startswith("ternary")

  # test when there are layers to fold but folding is disabled
  # desired behavior: all conv2d/depthwise2d layers are not folded
  qq3 = model_quantize(m2, d, 4, enable_bn_folding=False)
  assert qq3.layers[1].__class__.__name__ == "QConv2D"
  assert str(qq3.layers[1].quantizers[0]).startswith("binary")
  assert qq3.layers[2].__class__.__name__ == "BatchNormalization"
  assert str(qq3.layers[3].quantizer).startswith("quantized_relu")
  assert qq3.layers[6].__class__.__name__ == "QDepthwiseConv2D"
  assert str(qq3.layers[6].quantizers[0]).startswith("binary")

  # test when QConv2DBatchnorm quantizer, e.g., is not given in config
  # desired behavior: quantizers for QConv2DBatchnorm layer fall back to QConv2D
  #   quantizers
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QDepthwiseConv2D": {
          "depthwise_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "relu_act": {
          "relu": "quantized_relu(8)"
      }
  }
  qq4 = model_quantize(m2, d, 4, enable_bn_folding=True)
  assert qq4.layers[1].__class__.__name__ == "QConv2DBatchnorm"
  assert str(qq4.layers[1].quantizers[0]).startswith("binary")
  assert qq4.layers[3].__class__.__name__ == "QConv2D"
  assert str(qq4.layers[3].quantizers[0]).startswith("binary")
  assert qq4.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm"
  assert str(qq4.layers[5].quantizers[0]).startswith("binary")


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/autoqkeras_test.py
================================================
# ==============================================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import tempfile
import numpy as np
import pytest
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from qkeras.autoqkeras import AutoQKerasScheduler


def dense_model():
  """Creates test dense model."""

  x = x_in = Input((4,), name="input")
  x = Dense(20, name="dense_0")(x)
  x = BatchNormalization(name="bn0")(x)
  x = Activation("relu", name="relu_0")(x)
  x = Dense(40, name="dense_1")(x)
  x = BatchNormalization(name="bn1")(x)
  x = Activation("relu", name="relu_1")(x)
  x = Dense(20, name="dense_2")(x)
  x = BatchNormalization(name="bn2")(x)
  x = Activation("relu", name="relu_2")(x)
  x = Dense(3, name="dense")(x)
  x = Activation("softmax", name="softmax")(x)

  model = Model(inputs=x_in, outputs=x)
  return model


def test_autoqkeras():
  """Tests AutoQKeras scheduler."""
  np.random.seed(42)
  tf.random.set_seed(42)

  x_train, y_train = load_iris(return_X_y=True)

  scaler = MinMaxScaler(feature_range=(-0.5, 0.5))
  scaler.fit(x_train)
  x_train = scaler.transform(x_train)

  nb_classes = np.max(y_train) + 1
  y_train = to_categorical(y_train, nb_classes)

  quantization_config = {
      "kernel": {
          "stochastic_ternary": 2,
          "quantized_bits(8,0,1,alpha=1.0)": 8
      },
      "bias": {
          "quantized_bits(4,0,1)": 4
      },
      "activation": {
          "quantized_relu(4,1)": 4
      },
      "linear": {
          "binary": 1
      }
  }

  goal = {
      "type": "energy",
      "params": {
          "delta_p": 8.0,
          "delta_n": 8.0,
          "rate": 2.0,
          "stress": 1.0,
          "process": "horowitz",
          "parameters_on_memory": ["sram", "sram"],
          "activations_on_memory": ["sram", "sram"],
          "rd_wr_on_io": [False, False],
          "min_sram_size": [0, 0],
          "reference_internal": "int8",
          "reference_accumulator": "int32"
      }
  }

  model = dense_model()
  model.summary()
  optimizer = Adam(lr=0.01)
  model.compile(optimizer=optimizer, loss="categorical_crossentropy",
                metrics=["acc"])

  limit = {
      "dense_0": [["stochastic_ternary"], 8, 4],
      "dense": [["quantized_bits(8,0,1,alpha=1.0)"], 8, 4],
      "BatchNormalization": [],
      "Activation": [4]
  }

  run_config = {
      "output_dir": tempfile.mkdtemp(),
      "goal": goal,
      "quantization_config": quantization_config,
      "learning_rate_optimizer": False,
      "transfer_weights": False,
      "mode": "random",
      "seed": 42,
      "limit": limit,
      "tune_filters": "layer",
      "tune_filters_exceptions": "^dense$",
      "max_trials": 1,

      "blocks": [
          "^.*0$",
          "^dense$"
      ],
      "schedule_block": "cost"
  }

  autoqk = AutoQKerasScheduler(model, metrics=["acc"], **run_config)
  autoqk.fit(x_train, y_train, validation_split=0.1, batch_size=150, epochs=4)

  qmodel = autoqk.get_best_model()

  optimizer = Adam(lr=0.01)
  qmodel.compile(optimizer=optimizer, loss="categorical_crossentropy",
                 metrics=["acc"])
  history = qmodel.fit(x_train, y_train, epochs=5, batch_size=150,
                       validation_split=0.1)

  quantized_acc = history.history["acc"][-1]

if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/bn_folding_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests layers from folded_layers.py."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from numpy.testing import assert_allclose
from numpy.testing import assert_equal
from numpy.testing import assert_raises
import tempfile
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.backend import clear_session
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import metrics

from qkeras import QConv2DBatchnorm
from qkeras import QConv2D
from qkeras import QDense
from qkeras import QActivation
from qkeras import QDepthwiseConv2D
from qkeras import QDepthwiseConv2DBatchnorm
from qkeras import utils as qkeras_utils
from qkeras import bn_folding_utils

def get_sgd_optimizer(learning_rate):
  if hasattr(tf.keras.optimizers, "legacy"):
    return tf.keras.optimizers.legacy.SGD(learning_rate)
  else:
    return tf.keras.optimizers.SGD(learning_rate)


def get_qconv2d_model(input_shape, kernel_size, kernel_quantizer=None):
  num_class = 2

  x = x_in = layers.Input(input_shape, name="input")

  x = QConv2D(
      filters=2, kernel_size=kernel_size, strides=(4, 4),
      kernel_initializer="ones",
      bias_initializer="zeros", use_bias=False,
      kernel_quantizer=kernel_quantizer, bias_quantizer=None,
      name="conv2d")(x)

  x = layers.BatchNormalization(
      axis=-1,
      momentum=0.99,
      epsilon=0.001,
      center=True,
      scale=True,
      beta_initializer="zeros",
      gamma_initializer="ones",
      moving_mean_initializer="zeros",
      moving_variance_initializer="ones",
      beta_regularizer=None,
      gamma_regularizer=None,
      beta_constraint=None,
      gamma_constraint=None,
      renorm=False,
      renorm_clipping=None,
      renorm_momentum=0.99,
      fused=None,
      trainable=True,
      virtual_batch_size=None,
      adjustment=None,
      name="bn")(
          x)
  x = layers.Flatten(name="flatten")(x)
  x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones",
                   name="dense")(x)
  x = layers.Activation("softmax", name="softmax")(x)
  model = Model(inputs=[x_in], outputs=[x])
  return model


def get_qconv2d_batchnorm_model(input_shape, kernel_size, folding_mode,
                                kernel_quantizer=None):
  num_class = 2

  x = x_in = layers.Input(input_shape, name="input")
  x = QConv2DBatchnorm(
      filters=2, kernel_size=kernel_size, strides=(4, 4),
      kernel_initializer="ones", bias_initializer="zeros", use_bias=False,
      kernel_quantizer=kernel_quantizer, beta_initializer="zeros",
      gamma_initializer="ones", moving_mean_initializer="zeros",
      moving_variance_initializer="ones", folding_mode=folding_mode,
      name="foldconv2d")(x)

  x = layers.Flatten(name="flatten")(x)
  x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones",
                   name="dense")(x)
  x = layers.Activation("softmax", name="softmax")(x)
  model = Model(inputs=[x_in], outputs=[x])
  return model


def get_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay):

  x_shape = (2, 2, 1)
  loss_fn = tf.keras.losses.MeanSquaredError()
  optimizer = get_sgd_optimizer(learning_rate=1e-3)

  # define a model with seperate conv2d and bn layers
  x = x_in = layers.Input(x_shape, name="input")
  x = QConv2D(
      filters=2, kernel_size=(2, 2), strides=(4, 4),
      kernel_initializer="ones",
      bias_initializer="zeros", use_bias=False,
      kernel_quantizer=kernel_quantizer, bias_quantizer=None,
      name="conv2d")(x)
  x = layers.BatchNormalization(
      axis=-1,
      momentum=0.99,
      epsilon=0.001,
      center=True,
      scale=True,
      beta_initializer="zeros",
      gamma_initializer="ones",
      moving_mean_initializer="zeros",
      moving_variance_initializer="ones",
      beta_regularizer=None,
      gamma_regularizer=None,
      beta_constraint=None,
      gamma_constraint=None,
      renorm=False,
      renorm_clipping=None,
      renorm_momentum=0.99,
      fused=None,
      trainable=True,
      virtual_batch_size=None,
      adjustment=None,
      name="bn")(x)
  unfold_model = Model(inputs=[x_in], outputs=[x])
  unfold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc")

  x = x_in = layers.Input(x_shape, name="input")
  x = QConv2DBatchnorm(
      filters=2, kernel_size=(2, 2), strides=(4, 4),
      kernel_initializer="ones", bias_initializer="zeros", use_bias=False,
      kernel_quantizer=kernel_quantizer, beta_initializer="zeros",
      gamma_initializer="ones", moving_mean_initializer="zeros",
      moving_variance_initializer="ones", folding_mode=folding_mode,
      ema_freeze_delay=ema_freeze_delay,
      name="foldconv2d")(x)
  fold_model = Model(inputs=[x_in], outputs=[x])
  fold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc")

  return (unfold_model, fold_model)


def get_debug_model(model):
  layer_output_list = []
  for layer in model.layers:
    if layer.__class__.__name__ not in ["Flatten", "InputLayer"]:
      layer_output_list.append(layer.output)

  debug_model = Model(inputs=model.inputs, outputs=layer_output_list)
  return debug_model


def generate_dataset(train_size=10,
                     batch_size=5,
                     input_shape=(3, 3, 1),
                     num_class=2,
                     output_shape=None):
  """create tf.data.Dataset with shape: (N,) + input_shape."""

  x_train = np.random.randint(
      4, size=(train_size, input_shape[0], input_shape[1], input_shape[2]))
  x_train = np.random.rand(
      train_size, input_shape[0], input_shape[1], input_shape[2])

  if output_shape:
    y_train = np.random.random_sample((train_size,) + output_shape)
  else:
    y_train = np.random.randint(num_class, size=train_size)
    y_train = to_categorical(y_train, num_class)

  train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
  train_ds = train_ds.batch(batch_size)
  return train_ds


def run_training(model, epochs, loss_fn, loss_metric, optimizer,
                 train_ds, do_print=False):

  # Iterate over epochs.
  for epoch in range(epochs):
    if do_print:
      print("- epoch {} -".format(epoch))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_ds):
      if do_print:
        print("\n   - step {} -".format(step))
      with tf.GradientTape() as tape:
        predictions = model(x_batch_train, training=True)

        if epoch == epochs - 1:
          if do_print:
            print("y_pred:", predictions)
            print("y:", y_batch_train)
          output_predictions = predictions

        # Compute loss
        loss = loss_fn(y_batch_train, predictions)

        grads = tape.gradient(loss, model.trainable_weights)
        if do_print:
          if epoch == epochs - 1:
            # print("old trainable:", model.trainable_weights)
            print("grads:", grads)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        if do_print:
          if epoch == epochs - 1:
            # print("new trainable:", model.trainable_weights)
            print("loss:", loss)
        loss_metric(loss)
        if do_print:
          if epoch == epochs - 1:
            print("mean loss = %.4f" % (loss_metric.result()))

  return output_predictions


def test_unfold_model():
  """Test if unfold_model works properly.

  Convert a folded model to a normal model. The kernel/bias weight in
  the normal model should be the same as the folded kernel/bias in the folded
  model. Test if the function can convert both sequential and non-sequantial
  models properly.
  """

  x_shape = (2, 2, 1)
  kernel_quantizer = "quantized_bits(4, 0, 1)"
  folding_mode = "batch_stats_folding"
  ema_freeze_delay = 10
  kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]])
  gamma = np.array([2., 1.])
  beta = np.array([0., 1.])
  moving_mean = np.array([1., 1.])
  moving_variance = np.array([1., 2.])
  iteration = np.array(-1)

  def _get_sequantial_folded_model(x_shape):
    x = x_in = layers.Input(x_shape, name="input")
    x = QConv2DBatchnorm(
        filters=2, kernel_size=(2, 2), strides=(2, 2),
        kernel_initializer="ones", bias_initializer="zeros", use_bias=False,
        kernel_quantizer=kernel_quantizer, beta_initializer="zeros",
        gamma_initializer="ones", moving_mean_initializer="zeros",
        moving_variance_initializer="ones", folding_mode=folding_mode,
        ema_freeze_delay=ema_freeze_delay,
        name="foldconv2d")(x)
    x = QDepthwiseConv2DBatchnorm(
        kernel_size=(2, 2),
        strides=(1, 1),
        use_bias=False,
        depthwise_quantizer=kernel_quantizer,
        folding_mode=folding_mode,
        ema_freeze_delay=ema_freeze_delay,
        name="folddepthwiseconv2d")(x)
    model = Model(inputs=[x_in], outputs=[x])
    model.layers[1].set_weights([
        kernel, gamma, beta, iteration, moving_mean, moving_variance
    ])

    return model

  def _get_nonseq_folded_model(x_shape):
    x = x_in = layers.Input(x_shape, name="input")
    x1 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1),
                       name="conv2d_1")(x)
    x2 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1),
                       name="conv2d_2")(x)
    x = layers.Maximum()([x1, x2])
    x = QConv2DBatchnorm(
        filters=2, kernel_size=(2, 2), strides=(4, 4),
        kernel_initializer="ones", bias_initializer="zeros", use_bias=False,
        kernel_quantizer=kernel_quantizer, beta_initializer="zeros",
        gamma_initializer="ones", moving_mean_initializer="zeros",
        moving_variance_initializer="ones", folding_mode=folding_mode,
        ema_freeze_delay=ema_freeze_delay,
        name="foldconv2d")(x)
    x = layers.Flatten(name="flatten")(x)
    x = layers.Dense(2, use_bias=False, kernel_initializer="ones",
                     name="dense")(x)
    model = Model(inputs=[x_in], outputs=[x])
    model.layers[4].set_weights([
        kernel, gamma, beta, iteration, moving_mean, moving_variance
    ])
    return model

  seq_model = _get_sequantial_folded_model((4, 4, 1))
  nonseq_model = _get_nonseq_folded_model(x_shape)

  for model in [nonseq_model, seq_model]:

    # preparing data for testing if model prediction matches

    output_shape = model.output_shape[1:]
    input_shape = model.input_shape[1:]
    train_ds = generate_dataset(train_size=10, batch_size=5,
                                input_shape=input_shape,
                                output_shape=output_shape)

    # convert model with folded layers to a model with coresspoinding QConv2D
    # or QDepthwiseConv2D layers
    cvt_model = bn_folding_utils.unfold_model(model)

    for layer_type in ["QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
      weight1 = None
      weight2 = None
      for layer in model.layers:
        if layer.__class__.__name__ == layer_type:
          weight1 = layer.get_folded_weights()
          break

      for layer in cvt_model.layers:
        if layer.__class__.__name__ == layer_type[:-9]:
          weight2 = layer.get_weights()
          break

      # test if the corresponding layers have identical weights
      if weight1 and weight2:
        assert_equal(weight1[0], weight2[0])
        assert_equal(weight1[1], weight2[1])

    # test if the predictions of the two models are identical
    pred1 = model.predict(train_ds)
    pred2 = cvt_model.predict(train_ds)
    assert_equal(pred1, pred2)


def test_loading():
  """Test to load model using different approahches."""

  loss_fn = tf.keras.losses.MeanSquaredError()
  loss_metric = metrics.Mean()
  optimizer = get_sgd_optimizer(learning_rate=1e-3)
  x_shape = (2, 2, 1)

  custom_objects = {}
  qkeras_utils._add_supported_quantized_objects(custom_objects)

  train_ds = generate_dataset(train_size=1, batch_size=1,
                              input_shape=x_shape, num_class=2)

  model_fold = get_qconv2d_batchnorm_model(
      input_shape=x_shape, kernel_size=(2, 2),
      folding_mode="ema_stats_folding")
  model_fold.compile(loss=loss_fn, optimizer=optimizer, metrics="acc")

  run_training(model_fold, 10, loss_fn, loss_metric, optimizer, train_ds,
               do_print=False)

  # test load model from json to ensure saving/loading model architecture works
  model_fold.use_legacy_config = True  # Ensures old Keras serialization
  json_string = model_fold.to_json()
  clear_session()
  model_from_json = qkeras_utils.quantized_model_from_json(json_string)
  model_from_json.use_legacy_config = True
  assert json_string == model_from_json.to_json()

  # test reload model from hdf5 files to ensure saving/loading works
  _, fname = tempfile.mkstemp(".h5")
  model_fold.save(fname)
  model_loaded = qkeras_utils.load_qmodel(fname)
  weight1 = model_fold.layers[1].get_folded_weights()
  weight2 = model_loaded.layers[1].get_folded_weights()
  assert_equal(np.array(weight1[0]), np.array(weight2[0]))
  assert_equal(np.array(weight1[1]), np.array(weight2[1]))

  # test convert a folded model to a normal model for zpm
  # the kernel/bias weight in the normal model should be the same as the folded
  # kernel/bias in the folded model
  normal_model = bn_folding_utils.unfold_model(model_fold)
  weight2 = normal_model.layers[1].get_weights()

  assert_equal(weight1[0], weight2[0])
  assert_equal(weight1[1], weight2[1])


def test_same_training_and_prediction():
  """test if fold/unfold layer has the same training and prediction output."""

  epochs = 5
  loss_fn = tf.keras.losses.MeanSquaredError()
  loss_metric = metrics.Mean()
  optimizer = get_sgd_optimizer(learning_rate=1e-3)

  x_shape = (2, 2, 1)
  kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]])
  gamma = np.array([2., 1.])
  beta = np.array([0., 1.])
  moving_mean = np.array([1., 1.])
  moving_variance = np.array([1., 2.])
  iteration = np.array(-1)

  train_ds = generate_dataset(train_size=10, batch_size=10, input_shape=x_shape,
                              num_class=2)

  (unfold_model, fold_model_batch) = get_models_with_one_layer(
      kernel_quantizer=None, folding_mode="batch_stats_folding",
      ema_freeze_delay=10)
  (_, fold_model_ema) = get_models_with_one_layer(
      kernel_quantizer=None, folding_mode="ema_stats_folding",
      ema_freeze_delay=10)

  unfold_model.layers[1].set_weights([kernel])
  unfold_model.layers[2].set_weights(
      [gamma, beta, moving_mean, moving_variance])
  fold_model_batch.layers[1].set_weights([
      kernel, gamma, beta, iteration, moving_mean, moving_variance
  ])
  fold_model_ema.layers[1].set_weights([
      kernel, gamma, beta, iteration, moving_mean, moving_variance
  ])

  # check if prediction is the same
  y1 = unfold_model.predict(train_ds)
  y2_batch = fold_model_batch.predict(train_ds)
  y2_ema = fold_model_ema.predict(train_ds)
  assert_allclose(y1, y2_batch, rtol=1e-4)
  assert_allclose(y1, y2_ema, rtol=1e-4)

  # check if training for a number of epochs, and before bn freeeze, models
  # reached the same point
  y1 = run_training(unfold_model, epochs, loss_fn, loss_metric, optimizer,
                    train_ds, do_print=False)
  y2_batch = run_training(fold_model_batch, epochs, loss_fn, loss_metric,
                          optimizer, train_ds, do_print=False)
  y2_ema = run_training(fold_model_ema, epochs, loss_fn, loss_metric, optimizer,
                        train_ds, do_print=False)
  assert_allclose(y1, y2_batch, rtol=1e-4)
  assert_allclose(y1, y2_ema, rtol=1e-4)

  # check if training for long enough (after bn freezes), unfold model and fold
  # models should be different, but the two folding modes should be the same
  epochs = 5
  iteration = np.array(8)
  (unfold_model, fold_model_batch) = get_models_with_one_layer(
      kernel_quantizer=None, folding_mode="batch_stats_folding",
      ema_freeze_delay=10)
  (_, fold_model_ema) = get_models_with_one_layer(
      kernel_quantizer=None, folding_mode="ema_stats_folding",
      ema_freeze_delay=10)
  unfold_model.layers[1].set_weights([kernel])
  unfold_model.layers[2].set_weights(
      [gamma, beta, moving_mean, moving_variance])
  fold_model_batch.layers[1].set_weights([
      kernel, gamma, beta, iteration, moving_mean, moving_variance
  ])
  fold_model_ema.layers[1].set_weights([
      kernel, gamma, beta, iteration, moving_mean, moving_variance
  ])
  y1 = run_training(
      unfold_model,
      epochs,
      loss_fn,
      loss_metric,
      optimizer,
      train_ds,
      do_print=False)
  y2_batch = run_training(
      fold_model_batch,
      epochs,
      loss_fn,
      loss_metric,
      optimizer,
      train_ds,
      do_print=False)
  y2_ema = run_training(
      fold_model_ema,
      epochs,
      loss_fn,
      loss_metric,
      optimizer,
      train_ds,
      do_print=False)
  assert_raises(AssertionError, assert_allclose, y1, y2_batch, rtol=1e-4)
  assert_allclose(y2_batch, y2_ema, rtol=1e-4)

  # test QDepthwiseConv2DBatchnorm layers
  def _get_models(x_shape, num_class, depthwise_quantizer, folding_mode,
                  ema_freeze_delay):
    x = x_in = layers.Input(x_shape, name="input")
    x = QDepthwiseConv2DBatchnorm(
        kernel_size=(2, 2), strides=(2, 2), depth_multiplier=1,
        depthwise_initializer="ones", bias_initializer="zeros", use_bias=False,
        depthwise_quantizer=depthwise_quantizer, beta_initializer="zeros",
        gamma_initializer="ones", moving_mean_initializer="zeros",
        moving_variance_initializer="ones", folding_mode=folding_mode,
        ema_freeze_delay=ema_freeze_delay,
        name="fold_depthwiseconv2d")(x)
    x = layers.Flatten(name="flatten")(x)
    x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones",
                     name="dense")(x)
    x = layers.Activation("softmax", name="softmax")(x)
    fold_model = Model(inputs=[x_in], outputs=[x])

    x = x_in = layers.Input(x_shape, name="input")
    x = QDepthwiseConv2D(
        kernel_size=(2, 2), strides=(2, 2), depth_multiplier=1,
        depthwise_initializer="ones", bias_initializer="zeros", use_bias=False,
        depthwise_quantizer=depthwise_quantizer,
        name="depthwiseconv2d")(x)
    x = layers.BatchNormalization(
        beta_initializer="zeros",
        gamma_initializer="ones", moving_mean_initializer="zeros",
        moving_variance_initializer="ones",
        name="bn")(x)
    x = layers.Flatten(name="flatten")(x)
    x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones",
                     name="dense")(x)
    x = layers.Activation("softmax", name="softmax")(x)
    model = Model(inputs=[x_in], outputs=[x])

    return (model, fold_model)

  input_shape = (4, 4, 1)
  num_class = 2
  depthwise_quantizer = None
  folding_mode = "ema_stats_folding"
  ema_freeze_delay = 10

  # weights
  depthwise_kernel = np.array([[[[1.]], [[0.]]], [[[0.]], [[1.]]]])
  gamma = np.array([2])
  beta = np.array([0])
  moving_mean = np.array([4.])
  moving_variance = np.array([2.])
  iteration = np.array(2)
  folded_depthwise_kernel_quantized = np.array(
      [[[[1.4138602]], [[0.]]], [[[0.]], [[1.4138602]]]])
  folded_bias_quantized = np.array([-5.655441])
  dense_weight = np.array([[1., 0], [0, 0], [0, 0], [0, 0]])

  # generate dataset
  train_ds = generate_dataset(train_size=3, batch_size=3,
                              input_shape=input_shape, num_class=2)

  # define models, one with folded layer and one without
  (model, fold_model) = _get_models(
      input_shape, num_class=num_class, depthwise_quantizer=depthwise_quantizer,
      folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay)

  # set weights
  fold_model.layers[1].set_weights([
      depthwise_kernel, gamma, beta, iteration, moving_mean, moving_variance])
  fold_model.layers[3].set_weights([dense_weight])

  model.layers[1].set_weights([depthwise_kernel])
  model.layers[2].set_weights([gamma, beta, moving_mean, moving_variance])
  model.layers[4].set_weights([dense_weight])

  # perform training
  epochs = 5
  loss_fn = tf.keras.losses.MeanSquaredError()
  loss_metric = metrics.Mean()
  optimizer = get_sgd_optimizer(learning_rate=1e-3)

  pred1 = run_training(
      model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False)
  pred2 = run_training(
      fold_model, epochs, loss_fn, loss_metric, optimizer, train_ds,
      do_print=False)

  # before bn freezes, the two models should reach the same point
  assert_allclose(pred1, pred2, rtol=1e-4)

  # after bn freezes, the two models will not reach the same
  iteration = np.array(12)
  epochs = 5
  ema_freeze_delay = 10
  (model, fold_model) = _get_models(
      input_shape, num_class=num_class, depthwise_quantizer=depthwise_quantizer,
      folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay)
  fold_model.layers[1].set_weights([
      depthwise_kernel, gamma, beta, iteration, moving_mean, moving_variance])
  fold_model.layers[3].set_weights([dense_weight])
  model.layers[1].set_weights([depthwise_kernel])
  model.layers[2].set_weights([gamma, beta, moving_mean, moving_variance])
  model.layers[4].set_weights([dense_weight])
  pred1 = run_training(
      model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False)
  pred2 = run_training(
      fold_model, epochs, loss_fn, loss_metric, optimizer, train_ds,
      do_print=False)

  assert_raises(AssertionError, assert_allclose, pred1, pred2, rtol=1e-4)


def test_populate_bias_quantizer_from_accumulator():
  """Test populate_bias_quantizer_from_accumulator function.

  Define a qkeras model with a QConv2DBatchnorm layer. Set bias quantizer in the
  layer as None. Call populate_bias_quantizer_from_accumulator function
  to automatically generate bias quantizer type from the MAC accumulator type.
  Set the bias quantizer accordingly in the model.

  Call populate_bias_quantizer_from_accumulator again in this model. This time
  since bias quantizer is already set, populate_bias_quantizer_from_accumulator
  function should not change the bias quantizer.
  """

  x_shape = (2, 2, 1)

  # get a qkeras model with QConv2DBatchnorm layer. Set bias quantizer in the
  # layer as None.
  x = x_in = layers.Input(x_shape, name="input")
  x1 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False,
               kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_1")(x)
  x2 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False,
               kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_2")(x)
  x = layers.Maximum()([x1, x2])
  x = QActivation("quantized_relu(4, 1)")(x)
  x = QConv2DBatchnorm(
      filters=2, kernel_size=(2, 2), strides=(4, 4),
      kernel_initializer="ones", bias_initializer="zeros", use_bias=False,
      kernel_quantizer="quantized_bits(4, 0, 1)", bias_quantizer=None,
      beta_initializer="zeros",
      gamma_initializer="ones", moving_mean_initializer="zeros",
      moving_variance_initializer="ones", folding_mode="batch_stats_folding",
      ema_freeze_delay=10,
      name="foldconv2d")(x)
  x1 = x
  x2 = layers.Flatten(name="flatten")(x)
  x2 = QDense(2, use_bias=False, kernel_initializer="ones",
              kernel_quantizer="quantized_bits(6, 2, 1)", name="dense")(x2)
  model = Model(inputs=[x_in], outputs=[x1, x2])
  assert_equal(model.layers[5].get_quantizers()[1], None)

  # Call populate_bias_quantizer_from_accumulator function
  # to automatically generate bias quantizer from the MAC accumulator type.
  _ = bn_folding_utils.populate_bias_quantizer_from_accumulator(
      model, ["quantized_bits(8, 0, 1)"])
  q = model.layers[5].get_quantizers()[1]
  assert_equal(q.__str__(), "quantized_bits(10,3,1)")

  # Call populate_bias_quantizer_from_accumulator function again
  # bias quantizer should not change
  _ = bn_folding_utils.populate_bias_quantizer_from_accumulator(
      model, ["quantized_bits(8, 0, 1)"])
  q = model.layers[5].get_quantizers()[1]
  assert_equal(q.__str__(), "quantized_bits(10,3,1)")


================================================
FILE: tests/callbacks_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for callbacks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import pytest
from numpy.testing import assert_equal
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow.compat.v2 as tf

from qkeras import *
from qkeras.utils import get_model_sparsity
from qkeras.utils import model_quantize
from qkeras.callbacks import QNoiseScheduler


def qconv_model():
  x = x_in = tf.keras.layers.Input((4, 4, 1), name="input")
  x = QConv2D(
      1,
      2,
      1,
      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name="qconv2d_1")(
          x)
  x = QActivation("quantized_relu(4)", name="QA_1")(x)
  model = keras.Model(inputs=[x_in], outputs=[x])
  return model


def test_QNoiseScheduler():
  model = qconv_model()
  model.compile(optimizer="sgd", loss=tf.keras.losses.MeanSquaredError())
  num_data = 5
  x_train = np.random.rand(num_data, 4, 4, 1)
  y_train = np.random.rand(num_data, 1)

  #########################
  # Test "step" freq_type #
  #########################

  # The number of batch passes the finish of 4.
  gradual_qnoise_callback_0 = QNoiseScheduler(
      start=2, finish=4, freq_type="step", exponent=3.0)

  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=1,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_0,
      ],
  )

  # QConv2D has a kernel_quantizer and a bias_quantizer, and QActivation has a
  # quantizer.
  num_quantizers_with_qnoise_factor = 0
  for quantizer in gradual_qnoise_callback_0.quantizers:
    if hasattr(quantizer, "qnoise_factor"):
      num_quantizers_with_qnoise_factor += 1
  assert_equal(num_quantizers_with_qnoise_factor, 3)  # Test "step"

  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_0.quantizers
  ]
  assert_equal(qnoise_factor, np.ones_like(qnoise_factor))


  # The number of batch does not pass the finish of 10. Exponent 3.0
  gradual_qnoise_callback_1 = QNoiseScheduler(
      start=2, finish=10, freq_type="step", exponent=3.0)

  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=1,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_1,
      ],
  )
  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_1.quantizers
  ]
  val = 1 - np.power((10.0 - 4.0) / (10.0 - 2.0), 3)
  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))

  # The number of batch does not pass the finish of 10. Exponent 2.0
  gradual_qnoise_callback_2 = QNoiseScheduler(
      start=2, finish=10, freq_type="step", exponent=2.0)

  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=1,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_2,
      ],
  )
  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_2.quantizers
  ]
  val = 1 - np.power((10.0 - 4.0) / (10.0 - 2.0), 2)
  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))

  # The number of batch does not pass the start of 6.
  gradual_qnoise_callback_3 = QNoiseScheduler(
      start=6, finish=10, freq_type="step", exponent=3.0)

  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=1,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_3,
      ],
  )
  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_3.quantizers
  ]
  assert_equal(qnoise_factor, np.zeros_like(qnoise_factor))


  # The number of training iterations passes the number of batches of an epoch.
  gradual_qnoise_callback_4 = QNoiseScheduler(
      start=6, finish=20, freq_type="step", exponent=3.0)
  epochs = 2
  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=epochs,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_4,
      ],
  )
  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_4.quantizers
  ]
  val = 1 - np.power((20.0 - (epochs*num_data - 1)) / (20.0 - 6.0), 3)
  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))

  # The number of training iterations passes the number of batches of an epoch
  # with update_freq = 2.
  gradual_qnoise_callback_5 = QNoiseScheduler(
      start=0,
      finish=20,
      freq_type="step",
      update_freq=2,
      exponent=3.0)
  epochs = 2
  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=epochs,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_5,
      ],
  )
  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_5.quantizers
  ]
  # It updates when the number of training iterations modulo update_freq is 0.
  val = 1 - np.power(
      (20.0 - epochs * ((epochs * num_data - 1) // epochs)) / (20.0 - 0.0), 3)
  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))


  ##########################
  # Test "epoch" freq_type #
  ##########################
  # The number of epoch does not pass the finish of 5.
  gradual_qnoise_callback_6 = QNoiseScheduler(
      start=1, finish=5, freq_type="epoch", exponent=3.0)

  model.fit(
      x_train,
      y_train,
      batch_size=1,
      epochs=3,
      verbose=0,
      callbacks=[
          gradual_qnoise_callback_6,
      ],
  )
  qnoise_factor = [
      np.array(d.qnoise_factor) for d in gradual_qnoise_callback_6.quantizers
  ]
  val = 1 - np.power((5.0 - 2.0) / (5.0 - 1.0), 3)
  assert_equal(qnoise_factor, np.full_like(qnoise_factor, val))
  assert_equal(len(gradual_qnoise_callback_6.quantizers), 3)  # Test "epoch"


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/codebook_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test activation from qlayers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from numpy.testing import assert_allclose

import pytest

from qkeras import quantized_bits
from qkeras.codebook import weight_compression


@pytest.mark.parametrize(
  'bits, axis, quantizer, weights, expected_result',
  [
    (
      3, 3, quantized_bits(4, 0, 1, alpha='auto_po2'),
      np.array([
       [[ 0.14170583, -0.34360626,  0.29548156],
        [ 0.6517242,  0.06870092, -0.21646781],
        [ 0.12486842, -0.05406165, -0.23690471]],

       [[-0.07540564,  0.2123149 ,  0.2382695 ],
        [ 0.78434753,  0.36171672, -0.43612534],
        [ 0.3685556,  0.41328752, -0.48990643]],

      [[-0.04438099,  0.0590747 , -0.0644061 ],
        [ 0.15280165,  0.40714318, -0.04622072],
        [ 0.21560416, -0.22131851, -0.5365659 ]]], dtype=np.float32),
      np.array([
       [[ 0.125 , -0.375 ,  0.25  ],
        [ 0.75  ,  0.125 , -0.25  ],
        [ 0.125 ,  0.0   , -0.25  ]],

       [[ 0.0   ,  0.25  ,  0.25  ],
        [ 0.75  ,  0.375 , -0.375 ],
        [ 0.375 ,  0.375 , -0.5   ]],

       [[ 0.0   ,  0.0   ,  0.0   ],
        [ 0.125 ,  0.375 ,  0.0   ],
        [ 0.25  , -0.25  , -0.5   ]]], dtype=np.float32)
    )
  ]
)
def test_codebook_weights(bits, axis, quantizer, weights, expected_result):
  np.random.seed(22)
  weights = weights.reshape(weights.shape + (1,))
  expected_result = expected_result.reshape(expected_result.shape + (1,))
  index_table, codebook_table = weight_compression(weights,
                                                   bits,
                                                   axis,
                                                   quantizer)
  new_weights = np.zeros(weights.shape)
  for i in range(weights.shape[axis]):
    new_weights[:, :, :, i] = codebook_table[i][index_table[:, :, :, i]]

  assert_allclose(new_weights, expected_result, rtol=1e-4)


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/leakyrelu_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test activation from qlayers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from numpy.testing import assert_allclose

import pytest
from tensorflow.keras import backend as K

from qkeras import quantized_relu
from qkeras import quantized_relu_po2


@pytest.mark.parametrize(
    'bits, integer, use_sigmoid, negative_slope, test_values, expected_values',
    [
        (6, 2, 0, 0.25,
         np.array(
             [[-3.0, -2.0, -1.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875,
               0.054688, 6.0]],
             dtype=K.floatx()),
         np.array([[-0.75, -0.5, -0.25, 0.0, 2.5, 3.375, 1.5, 1.0, 0.0, 3.875]],
             dtype=K.floatx()),
        ),
        (6, 2, 1, 0.125,
         np.array([[
             0.458069, 0.573227, 0.194336, 1.539047, 0.045883, 4.009995,
             3.962494, 3.937500, 0.363266, 0.875198, 0.710938, 4.000000,
             7.000000, 3.937500, 3.937592, 0.199326, 0.458008, 0.625977,
             0.544922, 1.046875, 0.586899, 3.367188, 3.804688, 0.312500,
             0.062500, 0.562500, 0.375000, 3.367188, 1.046875, 2.796875,
             0.054688, 1.562500, 2.562500
         ]], dtype=K.floatx()),
         np.array([[
             0.5  , 0.5  , 0.25 , 1.5  , 0.   , 3.875, 3.875, 3.875, 0.25 ,
             1.   , 0.75 , 3.875, 3.875, 3.875, 3.875, 0.25 , 0.5  , 0.75 ,
             0.5  , 1.   , 0.5  , 3.25 , 3.75 , 0.25 , 0.   , 0.5  , 0.5  ,
             3.25 , 1.   , 2.75 , 0.   , 1.5  , 2.5
         ]], dtype=K.floatx())),
        (6, 2, 1, 0.125,
         np.array([[
             -0.458069, -0.573227, -0.194336, -1.539047, -0.045883, -4.009995,
             -3.962494, -3.937500, -0.363266, -0.875198, -0.710938, -4.000000,
             -7.000000, -3.937500, -3.937592, -0.199326, -0.458008, -0.625977,
             -0.544922, -1.046875, -0.586899, -3.367188, -3.804688, -0.312500,
             -0.062500, -0.562500, -0.375000, -3.367188, -1.046875, -2.796875,
             -0.054688, -1.562500, -2.562500
         ]], dtype=K.floatx()),
         np.array([[
              0.0,       0.0,       0.0,      -0.25,      0.0,      -0.5,
             -0.5,      -0.5,       0.0,       0.0,       0.0,      -0.5,
             -0.5,      -0.5,      -0.5,       0.0,       0.0,       0.0,
              0.0,      -0.25,      0.0,      -0.5,      -0.5,       0.0,
              0.0,       0.0,       0.0,      -0.5,      -0.25,     -0.25,
              0.0,      -0.25,     -0.25
         ]], dtype=K.floatx())),
    ])
def test_quantized_relu(bits, integer, use_sigmoid, negative_slope, test_values,
                        expected_values):
  """Test quantized_relu function."""
  x = K.placeholder(ndim=2)
  f = K.function([x], [quantized_relu(bits, integer, use_sigmoid,
                                      negative_slope)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, negative_slope, test_values, expected_values',
    [
        (
            8, 2**-4,
            np.array([[
                -1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01,
                -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01,
                -2.00000000e-01, -1.00000000e-01, -2.22044605e-16,  1.00000000e-01,
                 2.00000000e-01,  3.00000000e-01,  4.00000000e-01,  5.00000000e-01,
                 6.00000000e-01,  7.00000000e-01,  8.00000000e-01,  9.00000000e-01
                ]], dtype=K.floatx()),
            np.array([[
                -0.0625   , -0.0625   , -0.0625   , -0.03125  , -0.03125  ,
                -0.03125  , -0.03125  , -0.015625 , -0.015625 , -0.0078125,
                 0.       ,  0.125    ,  0.25     ,  0.25     ,  0.5      ,
                 0.5      ,  0.5      ,  0.5      ,  1.       ,  1.       
                ]], dtype=K.floatx())
        ),
        (
            3, 2**-4,
            np.array([[
                -1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01,
                -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01,
                -2.00000000e-01, -1.00000000e-01, -2.22044605e-16,  1.00000000e-01,
                 2.00000000e-01,  3.00000000e-01,  4.00000000e-01,  5.00000000e-01,
                 6.00000000e-01,  7.00000000e-01,  8.00000000e-01,  9.00000000e-01
                ]], dtype=K.floatx()),
            np.array([[
                -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625,
                -0.0625, -0.0625, -0.0625, -0.0625,  0.125 ,  0.25  ,  0.25  ,
                0.5   ,  0.5   ,  0.5   ,  0.5   ,  1.    ,  1.    
                ]], dtype=K.floatx())
        ),
        (
            6, 2**-3,
            np.array([[
                -3.0, -2.0, -1.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875,
                0.054688, 6.0]], dtype=K.floatx()),
            np.array([[
                -5.00000000e-01, -2.50000000e-01, -1.25000000e-01,  2.32830644e-10,
                2.00000000e+00,  4.00000000e+00,  2.00000000e+00,  1.00000000e+00,
                6.25000000e-02,  8.00000000e+00   
                ]], dtype=K.floatx())
        )
        
    ])
def test_quantized_relu_po2(bits, negative_slope, test_values, expected_values):
  x = K.placeholder(ndim=2)
  f = K.function([x], [quantized_relu_po2(bits, negative_slope=negative_slope)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/min_max_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests min/max values that are used for autorange."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pytest
from qkeras import *
from tensorflow.keras import backend as K


def test_binary():
  q = binary(alpha=1.0)
  assert q.min() == -1.0
  assert q.max() == 1.0

  q = stochastic_binary(alpha=1.0)
  assert q.min() == -1.0
  assert q.max() == 1.0


def test_ternary():
  q = ternary(alpha=1.0)
  assert q.min() == -1.0
  assert q.max() == 1.0

  q = stochastic_ternary(alpha=1.0)
  assert q.min() == -1.0
  assert q.max() == 1.0


def test_quantized_bits():
  results = {
      (1,0): [-1.0, 1.0],
      (2,0): [-1.0, 1.0],
      (3,0): [-1.0, 1.0],
      (4,0): [-1.0, 1.0],
      (5,0): [-1.0, 1.0],
      (6,0): [-1.0, 1.0],
      (7,0): [-1.0, 1.0],
      (8,0): [-1.0, 1.0],
      (1,1): [-1.0, 1.0],
      (2,1): [-2.0, 2.0],
      (3,1): [-2.0, 2.0],
      (4,1): [-2.0, 2.0],
      (5,1): [-2.0, 2.0],
      (6,1): [-2.0, 2.0],
      (7,1): [-2.0, 2.0],
      (8,1): [-2.0, 2.0],
      (3,2): [-4.0, 4.0],
      (4,2): [-4.0, 4.0],
      (5,2): [-4.0, 4.0],
      (6,2): [-4.0, 4.0],
      (7,2): [-4.0, 4.0],
      (8,2): [-4.0, 4.0],
  }

  for i in range(3):
    for b in range(1,9):
      if b <= i: continue
      q = quantized_bits(b,i,1)
      expected = results[(b,i)]
      assert expected[0] == q.min()
      assert expected[1] == q.max()


def test_po2():
  po2 = {
    3: [-2, 2],
    4: [-8, 8],
    5: [-128, 128],
    6: [-32768, 32768]
  }

  po2_max_value = {
      (3,1): [-1.0, 1.0],
      (3,2): [-2, 2],
      (3,4): [-4, 4],
      (4,1): [-1.0, 1.0],
      (4,2): [-2, 2],
      (4,4): [-4, 4],
      (4,8): [-8, 8],
      (5,1): [-1.0, 1.0],
      (5,2): [-2, 2],
      (5,4): [-4, 4],
      (5,8): [-8, 8],
      (5,16): [-16, 16],
      (6,1): [-1.0, 1.0],
      (6,2): [-2, 2],
      (6,4): [-4, 4],
      (6,8): [-8, 8],
      (6,16): [-16, 16],
      (6,32): [-32, 32]
  }

  po2_quadratic = {
    4: [-4, 4],
    5: [-64, 64],
    6: [-16384, 16384]
  }

  relu_po2_quadratic = {
    4: [0.00390625, 64],
    5: [1.52587890625e-05, 16384],
    6: [2.3283064365386963e-10, 1073741824]
  }

  for b in range(3,7):
    q = quantized_po2(b)
    assert po2[b][0] == q.min()
    assert po2[b][1] == q.max()
    for i in range(0,b):
      q = quantized_po2(b,2**i)
      assert po2_max_value[(b,2**i)][0] == q.min()
      assert po2_max_value[(b,2**i)][1] == q.max()

  for b in range(4,7):
    q = quantized_po2(b,quadratic_approximation=True)
    assert po2_quadratic[b][0] == q.min()
    assert po2_quadratic[b][1] == q.max()
    q = quantized_relu_po2(b,quadratic_approximation=True)
    assert relu_po2_quadratic[b][0] == q.min()
    assert relu_po2_quadratic[b][1] == q.max()

if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/print_qstats_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import DepthwiseConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

from qkeras.estimate import print_qstats
from qkeras.utils import model_quantize
from qkeras import QConv2D
from qkeras.quantizers import *


def create_network():
  xi = Input((28, 28, 1))
  x = Conv2D(32, (3, 3))(xi)
  x = Activation("relu")(x)
  x = Conv2D(32, (3, 3), activation="relu")(x)
  x = Activation("softmax")(x)
  return Model(inputs=xi, outputs=x)


def create_mix_network():

  xi = Input((28, 28, 1))
  x = QConv2D(32, (3, 3), kernel_quantizer=binary())(xi)
  x = Activation("relu")(x)
  x = Conv2D(32, (3, 3))(x)
  x = Activation("softmax")(x)
  return Model(inputs=xi, outputs=x)


def create_network_with_bn():
  """Creates a network contains both QConv2D and QDepthwiseConv2D layers."""

  xi = Input((28, 28, 1))
  x = Conv2D(32, (3, 3))(xi)
  x = BatchNormalization()(x)
  x = Activation("relu")(x)
  x = DepthwiseConv2D((3, 3), activation="relu")(x)
  x = BatchNormalization()(x)
  x = Activation("softmax")(x)
  return Model(inputs=xi, outputs=x)


def test_conversion_print_qstats():
  # this tests if references in tensorflow are working properly.
  m = create_network()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QActivation": {
          "relu": "ternary"
      }
  }
  qq = model_quantize(m, d, 4)
  qq.summary()
  print_qstats(qq)

  # test if print_qstats works with unquantized layers
  print_qstats(m)

  # test if print_qstats works with mixture of quantized and unquantized layers
  m1 = create_mix_network()
  print_qstats(m1)

  m2 = create_network_with_bn()
  d2 = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      },
      "QActivation": {
          "relu": "ternary"
      },
      "QConv2DBatchnorm": {
          "kernel_quantizer": "ternary",
          "bias_quantizer": "ternary",
      },
      "QDepthwiseConv2DBatchnorm": {
          "depthwise_quantizer": "ternary",
          "bias_quantizer": "ternary",
      },
  }
  m2 = model_quantize(m2, d2, 4, enable_bn_folding=True)
  m2.summary()
  print_qstats(m2)


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qactivation_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test activation from qlayers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from numpy.testing import assert_allclose, assert_array_equal

import pytest
from tensorflow import keras
from tensorflow.keras import backend as K
import tempfile

from qkeras import set_internal_sigmoid
from qkeras import binary
from qkeras import hard_sigmoid
from qkeras import quantized_bits
from qkeras import quantized_hswish
from qkeras import quantized_po2
from qkeras import quantized_relu
from qkeras import quantized_relu_po2
from qkeras import quantized_sigmoid
from qkeras import quantized_tanh
from qkeras import smooth_sigmoid
from qkeras import stochastic_binary
from qkeras import stochastic_ternary
from qkeras import ternary
from qkeras.quantizers import _default_sigmoid_type


@pytest.mark.parametrize(
    'bits, max_value, use_stochastic_rounding, quadratic_approximation, '
    'log2_rounding, test_values, expected_values', [
        # bits=4 without max_value. Therefore the max exponent is 4 when
        # quadratic approximiation is enabled. The max and min values from this
        # quantization function are 16 and -16 respectively.
        (
            4, None, 0, 1, "floor",
            np.array(
                [[-10.0, -0.25, 0.25, 1.0, 1.99, 2.0, 5.0, 10.0, 16.0, 32.0]],
                dtype=K.floatx()),
            np.array(
                [[-4.0, -0.25, 0.25, 1.0, 1.0, 1.0, 4.0, 4.0, 16.0, 16.0]],
                dtype=K.floatx()),
        ),
        # bits=3. The minimum exponent is -4. Therefore, the smallest absolute
        # value is 0.0625 in this quantization. The max absolute value is 0.5,
        # which is specified by the second input argument.
        (
            3, 0.5, 0, 0, "floor",
            np.array([[-7, -0.12, -0.03, 0.01, 5]], dtype=K.floatx()),
            np.array([[-0.5, -0.0625, -0.0625, 0.0625, 0.5]], dtype=K.floatx()),
        ),
        (8, None, 0, 0, "floor",
         np.array(
             [[-3, -2, -1.5, -0.5, -0.033, 0.5, 0.667, 1, 1.5, 4, 10]],
             dtype=K.floatx()),
         np.array(
             [[-2, -2, -1, -0.5, -0.03125, 0.5, 0.5, 1, 1, 4, 8]],
             dtype=K.floatx()),
        ),
        (4, None, 0, 0, "floor",
         np.array(
             [[-16, -7, -0.12, -0.03, 0, 0.01, 5, 10]],
             dtype=K.floatx()),
         np.array(
             [[-8, -4, -0.0625, -0.0625, 0.0625, 0.0625, 4, 8]],
             dtype=K.floatx()),
        ),
        (3, 0.5, 0, 0, "floor",
         np.array([[-7, -0.12, -0.03, 0.01, 5]], dtype=K.floatx()),
         np.array([[-0.5, -0.0625, -0.0625, 0.0625, 0.5]], dtype=K.floatx()),
        ),
        (4, 4, 0, 0, "floor",
         np.array([[-7, -0.12, -0.03, 0, 0.01, 5]], dtype=K.floatx()),
         np.array([[-4, -0.0625, -0.0625, 0.0625, 0.0625, 4]],
                  dtype=K.floatx()),
        ),
        (4, None, 0, 1, "floor",
         np.array(
             [[0.01, 0.03, 0.06, 0.5, 1, 2, 5, 10, 16, 32]],
             dtype=K.floatx()),
         np.array(
             [[0.00390625, 0.015625, 0.015625, 0.25, 1, 1, 4, 4, 16, 16]],
             dtype=K.floatx()),
        ),
        (4, None, 0, 1, "floor",
         np.array(
             [[-32, -16, -10, -5, -2, -1, -0.5, -0.03, -0.01]],
             dtype=K.floatx()),
         np.array(
             [[-16, -16, -4, -4, -1, -1, -0.25, -0.015625, -0.00390625]],
             dtype=K.floatx()),
        ),
        (4, None, 0, 1, "floor",
         np.array(
             [[-32, -16, -10, -5, -2, -1, -0.5, -0.03, -0.01]],
             dtype=K.floatx()),
         np.array(
             [[-16, -16, -4, -4, -1, -1, -0.25, -0.015625, -0.00390625]],
             dtype=K.floatx()),
        ),
    ])
def disable_test_quantized_po2(
    bits,
    max_value,
    use_stochastic_rounding,
    quadratic_approximation,
    log2_rounding,
    test_values,
    expected_values):
  """Test quantized_po2 function."""
  x = K.placeholder(ndim=2)
  f = K.function([x], [quantized_po2(
      bits, max_value, use_stochastic_rounding,
      quadratic_approximation, log2_rounding)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05, atol=1e-05)


@pytest.mark.parametrize(
    'bits, max_value, use_stochastic_rounding, quadratic_approximation, ' +
    'log2_rounding, test_values, expected_values',
    [
        # bits=3 without max_value. Therefore the max exponent is 4 when
        # quadratic approximiation is enabled. The max value from this
        # quantization function is 16. For the negative value, relu enforce it
        # to be the minimum value of this quantization function, which is 2**-4.
        (
            3, None, 0, 1, "floor",
            np.array(
                [[-10.0, -0.25, 0.25, 1.0, 1.99, 2.01, 5.0, 10.0, 16.0, 32.0]],
                dtype=K.floatx()),
            np.array(
                [[0.0625, 0.0625, 0.25, 1.0, 1.0, 1.0, 4.0, 4.0, 16.0, 16.0]],
                dtype=K.floatx()),
        ),
        # bits=3. The minimum exponent is -4. Therefore, the smallest absolute
        # value is 0.0625 in this quantization. The max absolute value is 4,
        # which is specified by the second input argument.
        (3, 4, 0, 0, "floor",
         np.array([[-7.0, -0.12, -0.03, 0, 0.01, 5.0]], dtype=K.floatx()),
         np.array([[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0]],
                  dtype=K.floatx())
        ),
        (8, None, 0, 0, "floor",
         np.array([[-0.033, 0.5, 0.667, 1, 1.5, 4, 10]], dtype=K.floatx()),
         np.array([[0, 0.5, 0.5, 1, 1, 4, 8]], dtype=K.floatx()),
        ),
        (3, None, 0, 0, "floor",
         np.array(
             [[-16.0, -7.0, -0.12, -0.03, 0, 0.01, 5.0, 10.0]],
             dtype=K.floatx()),
         np.array(
             [[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0, 8.0]],
             dtype=K.floatx()),
        ),
        (2, 0.5, 0, 0, "floor",
         np.array([[-7.0, -0.12, -0.03, 0.01, 5.0]], dtype=K.floatx()),
         np.array([[0.0625, 0.0625, 0.0625, 0.0625, 0.5]], dtype=K.floatx()),
        ),
        (3, 4, 0, 0, "floor",
         np.array(
             [[-7.0, -0.12, -0.03, 0, 0.01, 5.0]],
             dtype=K.floatx()),
         np.array(
             [[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0]],
             dtype=K.floatx()),
        ),
        (3, None, 0, 1, "floor",
         np.array(
             [[0.01, 0.03, 0.06, 0.5, 1, 2, 5, 10, 16, 32]],
             dtype=K.floatx()),
         np.array(
             [[0.00390625, 0.015625, 0.015625, 0.25, 1, 1, 4, 4, 16, 16]],
             dtype=K.floatx()),
        ),
    ])
def disable_test_quantized_relu_po2(bits, max_value, use_stochastic_rounding,
                                    quadratic_approximation, log2_rounding,
                                    test_values, expected_values):
  """Test quantized_po2 function."""
  x = K.placeholder(ndim=2)
  f = K.function([x],
                 [quantized_relu_po2(bits, max_value, 0,
                                     use_stochastic_rounding,
                                     quadratic_approximation,
                                     log2_rounding)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05, atol=1e-05)


def test_smooth_sigmoid():
  """Test smooth_sigmoid function."""
  test_values = np.array(
      [[-3.0, -2.0, -1.0, -0.5, 0.005, 0.0, 0.005, 0.5, 1, 4, 10]],
      dtype=K.floatx())

  def ref_smooth_sigmoid(y):
    x = 0.1875 * y + 0.5
    z = 0.0 if x <= 0.0 else (1.0 if x >= 1.0 else x)
    return z

  sigmoid = np.vectorize(ref_smooth_sigmoid)
  x = K.placeholder(ndim=2)
  f = K.function([x], [smooth_sigmoid(x)])
  result = f([test_values])[0]
  expected = sigmoid(test_values)
  assert_allclose(result, expected, rtol=1e-05)


def test_hard_sigmoid():
  """Test hard_sigmoid function."""
  test_values = np.array(
      [[-3.0, -2.0, -1.0, -0.5, 0.005, 0.0, 0.005, 0.5, 1, 4, 10]],
      dtype=K.floatx())

  def ref_hard_sigmoid(y):
    x = 0.5 * y + 0.5
    z = 0.0 if x <= 0.0 else (1.0 if x >= 1.0 else x)
    return z

  sigmoid = np.vectorize(ref_hard_sigmoid)

  x = K.placeholder(ndim=2)
  f = K.function([x], [hard_sigmoid(x)])
  result = f([test_values])[0]
  expected = sigmoid(test_values)
  assert_allclose(result, expected, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, sigmoid_type, use_real_sigmoid, test_values, expected_values', [
        (
            6,
            "hard",
            False,
            np.array(
                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],
                dtype=K.floatx()),
            np.array([[0.015625, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875]],
                     dtype=K.floatx()),
        ),
        (
            6,
            "smooth",
            False,
            np.array(
                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],
                dtype=K.floatx()),
            np.array([[0.3125, 0.359375, 0.40625, 0.453125,
                       0.5, 0.546875, 0.59375, 0.640625]],
                     dtype=K.floatx()),
        ),
        (
            6,
            "real",
            True,
            np.array(
                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],
                dtype=K.floatx()),
            np.array([[0.265625, 0.328125, 0.375, 0.4375, 0.5,
                       0.5625, 0.625, 0.671875]],
                     dtype=K.floatx()),
        ),
    ])
def test_quantized_sigmoid(bits, sigmoid_type, use_real_sigmoid,
                           test_values, expected_values):
  """Test quantized_sigmoid function with three different sigmoid variants."""

  set_internal_sigmoid(sigmoid_type)
  x = K.placeholder(ndim=2)
  f = K.function([x],
                 [quantized_sigmoid(bits, symmetric=True,
                                    use_real_sigmoid=use_real_sigmoid)(x)])
  set_internal_sigmoid(_default_sigmoid_type)

  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, sigmoid_type, use_real_sigmoid, test_values, expected_values', [
        (
            4,
            "hard",
            False,
            np.array(
                [-15, 15],
                dtype=K.floatx()),
            np.array([0.0625, 0.9375],
                     dtype=K.floatx()),
        ),
        (
            4,
            "smooth",
            False,
            np.array(
                [-15, 15],
                dtype=K.floatx()),
            np.array([0.0625, 0.9375],
                     dtype=K.floatx()),
        ),
        (
            4,
            "real",
            True,
            np.array(
                [-15, 15],
                dtype=K.floatx()),
            np.array([0.0625, 0.9375],
                     dtype=K.floatx()),
        ),
    ])

def test_quantized_sigmoid_limits(
    bits, sigmoid_type, use_real_sigmoid, test_values, expected_values):
  """Test the min and max values of quantized_sigmoid function with three different sigmoid variants."""

  set_internal_sigmoid(sigmoid_type)
  x = K.placeholder(ndim=2)
  f = K.function([x],
                 [quantized_sigmoid(bits, symmetric=True,
                                    use_real_sigmoid=use_real_sigmoid)(x)])
  set_internal_sigmoid(_default_sigmoid_type)

  result = f([test_values])[0]
  min_max = np.array(
      [quantized_sigmoid(bits, symmetric=True,
                         use_real_sigmoid=use_real_sigmoid).min(),
       quantized_sigmoid(bits, symmetric=True,
                         use_real_sigmoid=use_real_sigmoid).max()])

  assert_allclose(result, expected_values, rtol=1e-05)
  assert_allclose(result, min_max, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, use_real_tanh, test_values, expected_values', [
        (
            4,
            False,
            np.array(
                [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],
                dtype=K.floatx()),
            np.array([[-0.875, -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]],
                     dtype=K.floatx()),
        ),
        (
            4,
            True,
            np.array(
                [[-1., -0.75, -0.5, -0.25,  0., 0.25, 0.5, 0.75]],
                dtype=K.floatx()),
            np.array([[-0.75, -0.625, -0.5, -0.25, 0., 0.25, 0.5, 0.625]],
                     dtype=K.floatx()),
        )
    ])
def test_quantized_tanh(bits, use_real_tanh, test_values, expected_values):
  """Test quantized_tanh function with three different sigmoid variants."""
  # store previous sigmoid type

  set_internal_sigmoid('hard')
  x = K.placeholder(ndim=2)
  f = K.function([x], [quantized_tanh(
      bits, symmetric=True, use_real_tanh=use_real_tanh)(x)])
  set_internal_sigmoid(_default_sigmoid_type)

  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, sigmoid_type, use_real_tanh, test_values, expected_values', [
        (
            4,
            "hard",
            False,
            np.array(
                [-15, 15],
                dtype=K.floatx()),
            np.array([-0.875, 0.875],
                     dtype=K.floatx()),
        ),
        (
            4,
            "smooth",
            False,
            np.array(
                [-15, 15],
                dtype=K.floatx()),
            np.array([-0.875, 0.875],
                     dtype=K.floatx()),
        ),
        (
            4,
            "real",
            True,
            np.array(
                [-15, 15],
                dtype=K.floatx()),
            np.array([-0.875, 0.875],
                     dtype=K.floatx()),
        ),
    ])
def test_quantized_tanh_limits(bits, sigmoid_type, use_real_tanh, test_values,
                               expected_values):
  """Test the min and max values of quantized_tanh function with three different sigmoid variants."""

  set_internal_sigmoid(sigmoid_type)
  x = K.placeholder(ndim=2)
  f = K.function([x], [quantized_tanh(
      bits, symmetric=True, use_real_tanh=use_real_tanh)(x)])
  set_internal_sigmoid(_default_sigmoid_type)

  result = f([test_values])[0]
  min_max = np.array(
      [quantized_tanh(bits, symmetric=True, use_real_tanh=use_real_tanh).min(),
       quantized_tanh(bits, symmetric=True, use_real_tanh=use_real_tanh).max()])

  assert_allclose(result, expected_values, rtol=1e-05)
  assert_allclose(result, min_max, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, integer, use_sigmoid, test_values, expected_values', [
        (
            6,
            2,
            0,
            np.array(
                [[-3.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875, 0.054688]],
                dtype=K.floatx()),
            np.array([[0.0, 0.0, 2.5625, 3.375, 1.5625, 1.0625, 0.0625]],
                     dtype=K.floatx()),
        ),
        (6, 2, 1,
         np.array([[
             0.458069, 0.573227, 0.194336, 1.539047, 0.045883, 4.009995,
             3.962494, 3.937500, 0.363266, 0.875198, 0.710938, 4.000000,
             7.000000, 3.937500, 3.937592, 0.199326, 0.458008, 0.625977,
             0.544922, 1.046875, 0.586899, 3.367188, 3.804688, 0.312500,
             0.062500, 0.562500, 0.375000, 3.367188, 1.046875, 2.796875,
             0.054688, 1.562500, 2.562500
         ]], dtype=K.floatx()),
         np.array([[
             0.500000, 0.625000, 0.250000, 1.500000, 0.000000, 3.937500,
             3.937500, 3.937500, 0.375000, 0.875000, 0.750000, 3.937500,
             3.937500, 3.937500, 3.937500, 0.250000, 0.500000, 0.625000,
             0.500000, 1.000000, 0.625000, 3.375000, 3.750000, 0.250000,
             0.000000, 0.500000, 0.375000, 3.375000, 1.000000, 2.750000,
             0.000000, 1.500000, 2.500000
         ]], dtype=K.floatx())),
    ])
def test_quantized_relu(bits, integer, use_sigmoid, test_values, expected_values):
  """Test quantized_relu function."""
  x = K.placeholder(ndim=2)
  f = K.function([x], [quantized_relu(bits, integer, use_sigmoid)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    (
        "bits, integer, symmetric, keep_negative, test_values, expected_values,"
        " rtol"
    ),
    [
        (
            8,
            100,
            1,
            True,
            np.array([[1.25e+29, 3, -1.1e+30, 4.0e+32]], dtype=K.floatx()),
            np.array([[1.23794004e+29, 0.0, -1.09929075e+30, 1.26269884e+30]],
                     dtype=K.floatx()),
            5.0e+27,  # Effective quantization step size
        ),
        (
            6,
            2,
            0,
            True,
            np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],
                     dtype=K.floatx()),
            np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 3.875, 3.875]],
                     dtype=K.floatx()),
            1e-05,
        ),
        (
            6,
            2,
            0,
            False,
            np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],
                     dtype=K.floatx()),
            np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1, 3.9375, 3.9375]],
                     dtype=K.floatx()),
            1e-05,
        ),
        (
            6,
            2,
            1,
            True,
            np.array([[-10, -4, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],
                     dtype=K.floatx()),
            np.array([[-3.875, -3.875, -1.0, -0.5, 0.0, 0.5, 1, 3.875, 3.875]],
                     dtype=K.floatx()),
            1e-05,
        )
    ])
def test_quantized_bits(bits, integer, symmetric, keep_negative, test_values,
                        expected_values, rtol):
  x = K.placeholder(ndim=2)
  f = K.function([x],
                 [quantized_bits(bits, integer, symmetric, keep_negative)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=rtol)


@pytest.mark.parametrize(
    "bits, integer, expected_output, expected_scale",
    [(4, 2,
      [[0.25, 3.0, 0.09375, 0.25], [0.4375, 0.0, 0.21875, 1.5]],
      [[0.125, 1., 0.0625, 0.5]]),
     (4, 1, [[0.25, 3., 0.09375, 0.25], [0.4375, 0., 0.21875, 1.5]],
      [[0.25, 2., 0.125, 1.]]),
     (5, 2,
      [[0.21875, 2.75, 0.09375, 0.375], [0.46875, 0.25, 0.234375, 1.375]],
      [[0.125, 1, 0.0625, 0.5]]),
    ])
def test_quantized_bits_with_auto_po2_scale(
    bits, integer, expected_output, expected_scale):
  # Test if quantizer with the fixed scale works properly.
  x = np.array([[0.23, 2.76, 0.1, 0.33], [0.53, 0.16, 0.3, 1.43]])

  q = quantized_bits(
      bits=bits, integer=integer, alpha="auto_po2")
  q_out = q(x).numpy()
  scale = q.scale.numpy()

  np.testing.assert_array_equal(q_out, expected_output)
  np.testing.assert_array_equal(scale, expected_scale)


def test_quantized_bits_with_post_training_scale():
  # Test if quantizer with the fixed scale works properly.
  np.random.seed(42)
  array = np.random.uniform(low=0, high=10, size=(7, 64, 64, 3))

  auto_po2_quantizer = quantized_bits(
      bits=8, integer=3, alpha="auto_po2")
  qw = auto_po2_quantizer(array)
  auto_po2_scale = auto_po2_quantizer.scale.numpy()
  alpha_ndarray_quantizer = quantized_bits(
      bits=8, integer=3, alpha="auto_po2",
      post_training_scale=auto_po2_scale)

  # Check if the scale is the same as auto_po2 quantizer.
  np.testing.assert_array_equal(auto_po2_scale,
                                alpha_ndarray_quantizer.scale)

  qw_ndarray = alpha_ndarray_quantizer(array)
  # Check if the quantized values are the same as auto_po2 quantizer.
  np.testing.assert_array_equal(qw.numpy(), qw_ndarray.numpy())


@pytest.mark.parametrize('alpha, threshold, test_values, expected_values', [
    (1.0, 0.33,
     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),
     np.array([[-1.0, -1.0, -1.0, 0, 0.0, 0.0, 1, 1, 1]], dtype=K.floatx())),
    (10.0, 5.0,
     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],
              dtype=K.floatx()),
     np.array([[-10.0, -10.0, 0.0, 0, 0.0, 0.0, 0, 0, 10]], dtype=K.floatx())),
])
def test_ternary(alpha, threshold, test_values, expected_values):
  x = K.placeholder(ndim=2)
  f = K.function([x],
                 [ternary(alpha, threshold)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize('use_01, alpha, test_values, expected_values', [
    (False, 1.0,
     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),
     np.array([[-1.0, -1.0, -1.0, -1.0, 1, 1, 1, 1, 1]], dtype=K.floatx())),
    (False, 5.0,
     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],
              dtype=K.floatx()),
     np.array([[-5.0, -5.0, -5.0, -5, 5.0, 5.0, 5, 5, 5]], dtype=K.floatx())),
    (True, 5.0,
     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],
              dtype=K.floatx()),
     np.array([[0, 0, 0, 0, 5, 5, 5, 5, 5]], dtype=K.floatx())),
])
def test_binary(use_01, alpha, test_values, expected_values):
  x = K.placeholder(ndim=2)
  f = K.function([x], [binary(use_01, alpha)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize('test_values, expected_values', [
    (np.array([[42.0] * 100000], dtype=K.floatx()), 42.0),
    (np.array([[100.0] * 100000], dtype=K.floatx()), 100.0),
    (np.array([[48.0] * 100000], dtype=K.floatx()), 48.0),
    (np.array([[-141.0] * 100000], dtype=K.floatx()), -141.0),
    (np.array([[-32.0] * 100000], dtype=K.floatx()), -32.0),
    (np.array([[32.0] * 100000], dtype=K.floatx()), 32.0),
    (np.array([[10031.0] * 100000], dtype=K.floatx()), 10031.0),
    (np.array([[0.0] * 100000], dtype=K.floatx()), 0.0),
])
def test_stochastic_round_quantized_po2(test_values, expected_values):
  K.set_learning_phase(1)
  np.random.seed(666)
  x = K.placeholder(ndim=2)
  q = quantized_po2(use_stochastic_rounding=True)
  f = K.function([x], [q(x)])
  res = f([test_values])[0]
  res = np.average(res)
  assert_allclose(res, expected_values, rtol=1e-01, atol=1e-6)


@pytest.mark.parametrize('test_values, expected_values', [
    (np.array([[42.0] * 100000], dtype=K.floatx()), 42.0),
    (np.array([[-42.0] * 100000], dtype=K.floatx()), 0.0),
    (np.array([[0.0] * 100000], dtype=K.floatx()), 0.0),
    (np.array([[100.0] * 100000], dtype=K.floatx()), 100.0),
    (np.array([[48.0] * 100000], dtype=K.floatx()), 48.0),
])
def test_stochastic_round_quantized_relu_po2(test_values, expected_values):
  K.set_learning_phase(1)
  np.random.seed(666)
  x = K.placeholder(ndim=2)
  q = quantized_relu_po2(use_stochastic_rounding=True)
  f = K.function([x], [q(x)])
  res = f([test_values])[0]
  res = np.average(res)
  assert_allclose(res, expected_values, rtol=1e-01, atol=1e-6)


def test_stochastic_binary():
  np.random.seed(42)
  K.set_learning_phase(1)

  x = np.random.uniform(-0.01, 0.01, size=10)
  x = np.sort(x)
  # Adding a dimension to have a common channel axis for quantization. This is
  # to cope with a bug fix in "_get_scale" without changing the test cases.
  x = np.expand_dims(x, axis=1)

  s = stochastic_binary(alpha="auto_po2")

  ty = np.zeros_like(s)
  ts = 0.0

  n = 1000

  for _ in range(n):
    y = K.eval(s(K.constant(x)))
    scale = K.eval(s.scale)[0]
    ts = ts + scale
    ty = ty + (y / scale)

  # Perform squeezing to remove the common channel axis.
  result = (ty/n).astype(np.float32)
  result = np.squeeze(result)
  scale = np.array([ts/n])
  scale = np.squeeze(scale)

  expected = np.array(
      [-1., -1., -1., -0.852, 0.782, 0.768, 0.97, 0.978, 1.0, 1.0]
  ).astype(np.float32)
  expected_scale = np.array([0.003906])

  assert_allclose(result, expected, atol=0.1)
  assert_allclose(scale, expected_scale, rtol=0.1)


@pytest.mark.parametrize('alpha, test_values, expected_values', [
    (1.0,
     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),
     np.array([[-1.0, -1.0, -1.0, -1.0, 1, 1, 1, 1, 1]], dtype=K.floatx())),
    (5.0,
     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],
              dtype=K.floatx()),
     np.array([[-5.0, -5.0, -5.0, -5, 5.0, 5.0, 5, 5, 5]], dtype=K.floatx()))
])
def test_stochastic_binary_inference_mode(alpha, test_values, expected_values):
  K.set_learning_phase(0)
  x = K.placeholder(ndim=2)
  q = stochastic_binary(alpha)
  f = K.function([x], [q(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    'bound, alpha, temperature, expected_values, expected_scale', [
        (
            0.01,
            "auto",
            8,
            np.array([-0.973, -0.903, -0.759, -0.574, -0.242, 0.161, 0.508,
                      0.723, 0.874, 0.975]).astype(np.float32),
            np.array([0.008427, 0.007001, 0.0057, 0.004457, 0.003537, 0.003416,
                      0.004507, 0.005536, 0.006853, 0.008282]
                     ).astype(np.float32)
            ),
        (
            0.01,
            "auto_po2",
            8,
            np.array([-0.979, -0.877, -0.639, -0.586, -0.23, 0.154,
                      0.327, 0.603, 0.83, 0.986]).astype(np.float32),
            np.array([0.007812, 0.007812, 0.007812, 0.003906, 0.003906,
                      0.003906, 0.007812, 0.007812, 0.007812, 0.007812]
                     ).astype(np.float32)
        )
    ])
def test_stochastic_ternary(bound, alpha, temperature, expected_values,
                            expected_scale):
  np.random.seed(42)
  K.set_learning_phase(1)

  n = 1000

  x = np.random.uniform(-bound, bound, size=(n, 10))
  x = np.sort(x, axis=1)

  s = stochastic_ternary(alpha=alpha, temperature=temperature)

  y = K.eval(s(K.constant(x)))
  scale = K.eval(s.scale).astype(np.float32)[0]

  ty = np.zeros_like(s)
  for i in range(n):
    ty = ty + (y[i] / scale)

  result = (ty/n).astype(np.float32)

  assert_allclose(result, expected_values, atol=0.1)
  assert_allclose(scale, expected_scale, rtol=0.1)


@pytest.mark.parametrize('alpha, threshold, test_values, expected_values', [
    (1.0, 0.33,
     np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()),
     np.array([[-1.0, -1.0, -1.0, 0, 0.0, 0.0, 1, 1, 1]], dtype=K.floatx())),
    (10.0, 5.0,
     np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]],
              dtype=K.floatx()),
     np.array([[-10.0, -10.0, 0.0, 0, 0.0, 0.0, 0, 0, 10]], dtype=K.floatx())),
])
def test_stochastic_ternary_inference_mode(alpha, threshold, test_values,
                                           expected_values):
  K.set_learning_phase(0)
  x = K.placeholder(ndim=2)
  q = stochastic_ternary(alpha, threshold)
  f = K.function([x],
                 [q(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    # y = x * relu6(x+3)/6, the total world length is 6 bits with 2 integer
    # bits. The quantization is in asymmetric mode.
    ('bits, integer, symmetric, relu_shift, relu_upper_bound,'
     'test_values, expected_values'), [
         (6, 2, 0, 3, 6,
          np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]],
                   dtype=K.floatx()),
          np.array([[0., -0.375, -0.375, -0.25, 0., 0.25, 0.625,
                     3.875, 3.875]], dtype=K.floatx()),
         ),
         (6, 4, 1, 3, 6,
          np.array([[-10.0, -2.0, -2.3, -0.25, 0.0, 0.5, 1, 4, 10]],
                   dtype=K.floatx()),
          np.array([[0., -0.5, -0.5, 0., 0., 0.5, 0.5, 4., 10.]],
                   dtype=K.floatx()),
         ),
         (2, 0, 0, 3, 6,
          np.array([[-10.0, -2.0, -2.3, -0.25, 0.0, 0.5, 1, 4, 10]],
                   dtype=K.floatx()),
          np.array([[0., -0.5, -0.5, 0., 0., 0.5, 0.5, 0.5, 0.5]],
                   dtype=K.floatx()),
         ),])
def test_quantized_hswish(bits, integer, symmetric, relu_shift,
                          relu_upper_bound, test_values, expected_values):
  x = K.placeholder(ndim=2)
  f = K.function(
      [x], [quantized_hswish(bits, integer, symmetric, relu_shift=relu_shift,
                             relu_upper_bound=relu_upper_bound)(x)])
  result = f([test_values])[0]
  assert_allclose(result, expected_values, rtol=1e-05)


def test_quantized_relu_fast_inference():
  q1 = quantized_relu(10, 2, enable_fast_inference=False)
  q2 = quantized_relu(10, 2, enable_fast_inference=True)
  x = np.array([-2.1, 0.73, 2.36, 4.98])
  np.testing.assert_array_equal(q1(x).numpy(), q2(x).numpy())


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/qadaptiveactivation_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the QAdaptiveActivation layer from qlayers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools

import numpy as np
import pytest
import tensorflow.compat.v2 as tf

from qkeras.qlayers import QAdaptiveActivation
from qkeras.quantizers import _get_integer_bits


def run_qadaptiveactivation_test(input_val, kwargs):
  """Helper function to test QAdaptiveActivation inputs and outputs."""
  err = 'Failed test with {} on input {}'.format(kwargs, input_val)

  # Only test inputs of shape (batch_size, width, height, channels)
  assert len(input_val.shape) == 4, err

  # Only test short term layer usage with ema_decay == 0
  assert kwargs['ema_decay'] == 0, err
  assert kwargs['ema_freeze_delay'] is None, err

  # Prepare layer in a static TF graph
  model = tf.keras.Sequential([QAdaptiveActivation(**kwargs)])
  model.compile()

  # Test input on untrained EMAs
  qout = model(input_val, training=False).numpy()
  assert np.isclose(model.layers[0].quantizer(input_val), qout).all(), err
  assert np.isclose(model.layers[0].ema_min.numpy().flatten(), 0).all(), err
  assert np.isclose(model.layers[0].ema_max.numpy().flatten(), 0).all(), err

  # Run an unquantized input and train the EMA
  unquantized_out = model(input_val, training=True).numpy()
  assert kwargs['current_step'].numpy() == 0, err
  if kwargs['activation'] == 'quantized_relu':
    assert np.isclose(unquantized_out, np.maximum(input_val, 0)).all(), err
  elif kwargs['activation'] == 'quantized_bits':
    assert np.isclose(unquantized_out, input_val).all(), err
  else:
    raise ValueError('Invalid quantizer type ', kwargs['activation'])

  # Check EMAs
  if kwargs['per_channel']:
    assert np.isclose(model.layers[0].ema_min.numpy(),
                      np.min(input_val, axis=(0, 1, 2))).all(), err
    assert np.isclose(model.layers[0].ema_max.numpy(),
                      np.max(input_val, axis=(0, 1, 2))).all(), err
  else:
    assert np.isclose(model.layers[0].ema_min.numpy(),
                      np.min(input_val, axis=(0, 1, 2, 3))).all(), err
    assert np.isclose(model.layers[0].ema_max.numpy(),
                      np.max(input_val, axis=(0, 1, 2, 3))).all(), err

  # Check quantizer
  quant = model.layers[0].quantizer
  assert quant.__class__.__name__ == kwargs['activation'], err
  assert quant.bits == kwargs['total_bits'], err
  assert quant.symmetric == kwargs['symmetric'], err
  keep_negative = None
  if kwargs['activation'] == 'quantized_relu':
    assert not quant.is_quantized_clip, err
    assert quant.negative_slope == kwargs['relu_neg_slope'], err
    assert quant.relu_upper_bound is None, err
    keep_negative = kwargs['relu_neg_slope'] != 0
  elif kwargs['activation'] == 'quantized_bits':
    assert quant.keep_negative, err
    assert quant.alpha == 1.0, err
    keep_negative = True
  expected_integer_bits = _get_integer_bits(model.layers[0].ema_min.numpy(),
                                            model.layers[0].ema_max.numpy(),
                                            kwargs['total_bits'],
                                            kwargs['symmetric'],
                                            keep_negative,
                                            kwargs['po2_rounding']).numpy()
  assert np.isclose(expected_integer_bits, quant.integer.numpy()).all(), err

  # Skip to a step where the quantization is used
  kwargs['current_step'].assign(tf.constant(kwargs['quantization_delay'],
                                            tf.int64))

  # Check quantized output
  # To set qnoise_factor to 1.0 explicitly.
  qnoise_factor = np.array(quant.qnoise_factor)
  quant.update_qnoise_factor(1.0)
  expected_qout = np.copy(quant(input_val))
  # Revert qnoise_factor to its original value.
  quant.update_qnoise_factor(qnoise_factor)
  qout = model(input_val, training=True).numpy()
  assert np.isclose(expected_qout, qout).all(), err

  # Check testing mode
  qout = model(input_val, training=False).numpy()
  assert np.isclose(quant(input_val), qout).all(), err


@pytest.mark.parametrize(
    'momentum, ema_freeze_delay, total_steps, estimate_step_count',
    [(0.9, 50, 100, False), (0.5, 1000, 1500, False), (0.1, 2, 100, False),
     (0.999, 98, 100, False), (0.9, 50, 100, True), (0.5, 1000, 1500, True),
     (0.1, 2, 100, True), (0.999, 98, 100, True)])
def test_qadaptiveact_ema(momentum, ema_freeze_delay, total_steps,
                          estimate_step_count):
  """Test the exponential moving averages over time for QAdaptiveActivation."""

  # Initialize a QAdaptiveActivation layer just for testing the EMA
  if estimate_step_count:
    step = None
  else:
    step = tf.Variable(0, dtype=tf.int64)
  q_act = QAdaptiveActivation(activation='quantized_bits',
                              total_bits=8,
                              current_step=step,
                              quantization_delay=total_steps*2,
                              ema_freeze_delay=ema_freeze_delay,
                              ema_decay=momentum,
                              per_channel=True,
                              po2_rounding=False)
  model = tf.keras.Sequential([q_act])
  model.compile()

  # Simulate a number of training steps and check the EMA values
  exp_ema_max = 0.0
  exp_ema_min = 0.0
  for i in range(0, total_steps):
    vals = np.random.random((1, 2, 1)) * i  # generate random values for update
    model(vals, training=True)  # Simulate training

    # Check the steps match
    if estimate_step_count:
      assert np.equal(q_act.step.numpy(), i)

    # Calculate expected values
    if i <= ema_freeze_delay:
      exp_ema_max = (exp_ema_max * momentum) + (vals.max() * (1.0 - momentum))
      exp_ema_min = (exp_ema_min * momentum) + (vals.min() * (1.0 - momentum))
    exp_int_bits = _get_integer_bits(exp_ema_min, exp_ema_max,
                                     q_act.quantizer.bits,
                                     q_act.quantizer.symmetric,
                                     q_act.quantizer.symmetric, False)

    # Check results
    assert np.abs(exp_ema_max - q_act.ema_max.numpy()[0]) < 0.0001

    assert np.isclose(exp_int_bits.numpy(), q_act.quantizer.integer.numpy())
    if not estimate_step_count:
      step.assign_add(1)


def test_qadaptiveactivation():
  """Test a wide variety of inputs to the QAdaptiveActivation layer."""
  test_options = {
      'activation': ['quantized_bits', 'quantized_relu'],
      'total_bits': [1, 2, 4, 8, 16],
      'symmetric': [True, False],
      'quantization_delay': [1],  # We will only run for one step
      'per_channel': [True, False],
      'po2_rounding': [True, False],
      'relu_neg_slope': [0.0, -0.5]
  }

  for args in itertools.product(*test_options.values()):
    args = {list(test_options.keys())[i]: args[i] for i in range(len(args))}
    args['ema_freeze_delay'] = None  # This test does not test the EMA freeze
    args['ema_decay'] = 0 # This test not test the EMA delay
    for img_shape in [(1, 28, 28, 3), (1, 3, 4, 5)]:
      for input_scale in [255, 1]:
        args['current_step'] = tf.Variable(0, dtype=tf.int64)
        img = np.random.random(img_shape) * input_scale
        run_qadaptiveactivation_test(img, args)

if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/qalpha_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test get_weight_scale function with auto and auto_po2 modes of quantizers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import logging
from numpy.testing import assert_allclose
from numpy.testing import assert_equal
import pytest
from tensorflow.keras import backend as K
from qkeras import binary
from qkeras import get_weight_scale
from qkeras import ternary
from qkeras.quantizers import _get_integer_bits


# expected value if input is uniform distribution is:
#   - alpha = m/2.0 for binary
#   - alpha = (m+d)/2.0 for ternary


def test_binary_auto():
  """Test binary auto scale quantizer."""

  np.random.seed(42)
  N = 1000000
  m_list = [1.0, 0.1, 0.01, 0.001]

  for m in m_list:
    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())
    x = K.constant(x)

    quantizer = binary(alpha="auto")
    q = K.eval(quantizer(x))

    result = get_weight_scale(quantizer, q)
    expected = m / 2.0
    logging.info("expect %s", expected)
    logging.info("result %s", result)
    assert_allclose(result, expected, rtol=0.02)


def test_binary_auto_po2():
  """Test binary auto_po2 scale quantizer."""

  np.random.seed(42)
  N = 1000000
  m_list = [1.0, 0.1, 0.01, 0.001]

  for m in m_list:
    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())
    x = K.constant(x)

    quantizer_ref = binary(alpha="auto")
    quantizer = binary(alpha="auto_po2")

    q_ref = K.eval(quantizer_ref(x))
    q = K.eval(quantizer(x))

    ref = get_weight_scale(quantizer_ref, q_ref)

    expected = np.power(2.0, np.round(np.log2(ref)))
    result = get_weight_scale(quantizer, q)

    assert_allclose(result, expected, rtol=0.0001)


def test_ternary_auto():
  """Test ternary auto scale quantizer."""

  np.random.seed(42)
  N = 1000000
  m_list = [1.0, 0.1, 0.01, 0.001]

  for m in m_list:
    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())
    x = K.constant(x)

    quantizer = ternary(alpha="auto")
    q = K.eval(quantizer(x))

    d = m/3.0
    result = np.mean(get_weight_scale(quantizer, q))
    expected = (m + d) / 2.0
    assert_allclose(result, expected, rtol=0.02)


def test_ternary_auto_po2():
  """Test ternary auto_po2 scale quantizer."""

  np.random.seed(42)
  N = 1000000
  m_list = [1.0, 0.1, 0.01, 0.001]

  for m in m_list:
    x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx())
    x = K.constant(x)

    quantizer_ref = ternary(alpha="auto")
    quantizer = ternary(alpha="auto_po2")

    q_ref = K.eval(quantizer_ref(x))
    q = K.eval(quantizer(x))

    ref = get_weight_scale(quantizer_ref, q_ref)

    expected = np.power(2.0, np.round(np.log2(ref)))
    result = get_weight_scale(quantizer, q)

    assert_allclose(result, expected, rtol=0.0001)


def test_get_integer_bits():
  """Test automated integer bit (po2 scale) estimator."""

  bits = 4
  min_value = np.array([
      -4.0, -4.0, -4.0, -4.0, 1.0, -3.0, -10.0, -16, -25, 0, 0, 0, 0.1, 0.0,
      -1.0, 0.0, 0.0, 0.0, 0, 0, 0
  ])
  max_value = np.array([
      3.5, 3.51, 3.75, 3.751, 2.0, 4.0, 5.0, 8, 0, 0, 0.1, 0.999, 0.5, 0.8751,
      0.9375, 0.93751, 1.875, 1.8751, 9, 11, 12
  ])

  # unsigned number (keep_negative=False) without clippling.
  symmetric = False  # symmetric is irrelevant.
  keep_negative = False
  is_clipping = False
  integer_bits = _get_integer_bits(
      min_value=min_value,
      max_value=max_value,
      bits=bits,
      symmetric=symmetric,
      keep_negative=keep_negative,
      is_clipping=is_clipping)
  assert_equal(
      integer_bits,
      np.array([2, 2, 2, 3, 2, 3, 3, 4, 0, 0, 0, 1, 0, 0, 0, 1, 1, 2, 4, 4, 4]))

  # unsigned number (keep_negative=False) with clippling.
  symmetric = False  # symmetric is irrelevant.
  keep_negative = False
  is_clipping = True
  integer_bits = _get_integer_bits(
      min_value=min_value,
      max_value=max_value,
      bits=bits,
      symmetric=symmetric,
      keep_negative=keep_negative,
      is_clipping=is_clipping)
  assert_equal(
      integer_bits,
      np.array([2, 2, 2, 2, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 4]))

  # signed number (keep_negative=True) non-symmetric without clippling
  symmetric = False
  keep_negative = True
  is_clipping = False
  integer_bits = _get_integer_bits(
      min_value=min_value,
      max_value=max_value,
      bits=bits,
      symmetric=symmetric,
      keep_negative=keep_negative,
      is_clipping=is_clipping)
  assert_equal(
      integer_bits,
      np.array([2, 3, 3, 3, 2, 3, 3, 3, 3, 0, 0, 1, 0, 1, 1, 1, 2, 2, 3, 3, 3]))

  # signed number (keep_negative=True) non-symmetric with clippling
  symmetric = False
  keep_negative = True
  is_clipping = True
  integer_bits = _get_integer_bits(
      min_value=min_value,
      max_value=max_value,
      bits=bits,
      symmetric=symmetric,
      keep_negative=keep_negative,
      is_clipping=is_clipping)
  assert_equal(
      integer_bits,
      np.array([2, 2, 2, 2, 1, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3]))

  # signed number (keep_negative=True) symmetric without clippling
  symmetric = True
  keep_negative = True
  is_clipping = False
  integer_bits = _get_integer_bits(
      min_value=min_value,
      max_value=max_value,
      bits=bits,
      symmetric=symmetric,
      keep_negative=keep_negative,
      is_clipping=is_clipping)
  assert_equal(
      integer_bits,
      np.array([3, 3, 3, 3, 2, 3, 3, 3, 3, 0, 0, 1, 0, 1, 1, 1, 2, 2, 3, 3, 3]))

  # signed number (keep_negative=True) symmetric with clippling
  symmetric = True
  keep_negative = True
  is_clipping = True
  integer_bits = _get_integer_bits(
      min_value=min_value,
      max_value=max_value,
      bits=bits,
      symmetric=symmetric,
      keep_negative=keep_negative,
      is_clipping=is_clipping)
  assert_equal(
      integer_bits,
      np.array([2, 2, 2, 2, 1, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3]))


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qconvolutional_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test layers from qconvolutional.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from numpy.testing import assert_allclose
import pytest
import tempfile

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.backend import clear_session

from qkeras import binary
from qkeras import ternary
from qkeras import QActivation
from qkeras import QDense
from qkeras import QConv1D
from qkeras import QConv2D
from qkeras import QConv2DTranspose
from qkeras import QSeparableConv1D
from qkeras import QSeparableConv2D
from qkeras import quantized_bits
from qkeras import quantized_relu
from qkeras.utils import model_save_quantized_weights
from qkeras.utils import quantized_model_from_json
from qkeras.utils import load_qmodel
from qkeras import print_qstats
from qkeras import extract_model_operations


def test_qnetwork():
  K.set_learning_phase(1)
  x = x_in = Input((28, 28, 1), name='input')
  x = QSeparableConv2D(
      32, (2, 2),
      strides=(2, 2),
      depthwise_quantizer=binary(alpha=1.0),
      pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0),
      activation=quantized_bits(6, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='conv2d_0_m')(
          x)
  x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x)
  x = QConv2D(
      64, (3, 3),
      strides=(2, 2),
      kernel_quantizer=ternary(alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='conv2d_1_m',
      activation=quantized_relu(6, 3, 1))(
          x)
  x = QConv2D(
      64, (2, 2),
      strides=(2, 2),
      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='conv2d_2_m')(
          x)
  x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x)
  x = Flatten(name='flatten')(x)
  x = QDense(
      10,
      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='dense')(
          x)
  x = Activation('softmax', name='softmax')(x)

  model = Model(inputs=[x_in], outputs=[x])

  # reload the model to ensure saving/loading works
  json_string = model.to_json()
  clear_session()
  model = quantized_model_from_json(json_string)

  # generate same output for weights
  np.random.seed(42)
  for layer in model.layers:
    all_weights = []

    for i, weights in enumerate(layer.get_weights()):
      input_size = np.prod(layer.input.shape.as_list()[1:])
      if (len(layer.get_weights()) == 3 and i > 0): # pointwise kernel and bias
        input_size = input_size // np.prod(layer.kernel_size)
      shape = weights.shape
      print(shape)
      assert input_size > 0, 'input size for {} {}'.format(layer.name, i)
      # he normal initialization with a scale factor of 2.0
      all_weights.append(
          10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape))
    if all_weights:
      layer.set_weights(all_weights)

  # apply quantizer to weights
  model_save_quantized_weights(model)

  all_weights = []

  for layer in model.layers:
    for i, weights in enumerate(layer.get_weights()):

      w = np.sum(weights)
      all_weights.append(w)

  all_weights = np.array(all_weights)

  # test_qnetwork_weight_quantization
  all_weights_signature = np.array(
      [2., -6.75, -0.625, -2., -0.25, -56., 1.125, -1.625, -1.125])

  assert all_weights.size == all_weights_signature.size
  assert np.all(all_weights == all_weights_signature)

  # test_qnetwork_forward:
  expected_output = np.array(
      [[0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
        0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 1.e+00, 0.e+00, 0.e+00, 7.6e-06],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00],
      [0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00,
       0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00,
       0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00],
      [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00,
       1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00]]).astype(np.float16)
  inputs = 2 * np.random.rand(10, 28, 28, 1)
  actual_output = model.predict(inputs).astype(np.float16)
  assert_allclose(actual_output, expected_output, rtol=1e-4)


def test_sequential_qnetwork():
  model = tf.keras.Sequential()
  model.add(Input((28, 28, 1), name='input'))
  model.add(
      QConv2D(
          32, (2, 2),
          strides=(2, 2),
          kernel_quantizer=quantized_bits(4, 0, 1),
          bias_quantizer=quantized_bits(4, 0, 1),
          name='conv2d_0_m'))
  model.add(QActivation(quantized_relu(4, 0), name='act0_m'))
  model.add(
      QConv2D(
          64, (3, 3),
          strides=(2, 2),
          kernel_quantizer=quantized_bits(4, 0, 1),
          bias_quantizer=quantized_bits(4, 0, 1),
          name='conv2d_1_m'))
  model.add(QActivation(quantized_relu(4, 0), name='act1_m'))
  model.add(
      QConv2D(
          64, (2, 2),
          strides=(2, 2),
          kernel_quantizer=quantized_bits(4, 0, 1),
          bias_quantizer=quantized_bits(4, 0, 1),
          name='conv2d_2_m'))
  model.add(QActivation(quantized_relu(4, 0), name='act2_m'))
  model.add(Flatten())
  model.add(
      QDense(
          10,
          kernel_quantizer=quantized_bits(4, 0, 1),
          bias_quantizer=quantized_bits(4, 0, 1),
          name='dense'))
  model.add(Activation('softmax', name='softmax'))

  # Check that all model operation were found correctly
  model_ops = extract_model_operations(model)
  for layer in model_ops.keys():
    assert model_ops[layer]['type'][0] != 'null'
  return model


@pytest.mark.parametrize("layer_cls", ["QConv1D", "QSeparableConv1D"])
def test_qconv1d(layer_cls):
  np.random.seed(33)
  if layer_cls == "QConv1D":
    x = Input((4, 4,))
    y = QConv1D(
      2, 1,
      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='qconv1d')(
          x)
    model = Model(inputs=x, outputs=y)
  else:
    x = Input((4, 4,))
    y = QSeparableConv1D(
      2, 2,
      depthwise_quantizer=quantized_bits(6, 2, 1, alpha=1.0),
      pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='qconv1d')(
          x)
    model = Model(inputs=x, outputs=y)

  # Extract model operations
  model_ops = extract_model_operations(model)

  # Check the input layer model operation was found correctly
  assert model_ops['qconv1d']['type'][0] != 'null'

  # Assertion about the number of operations for this (Separable)Conv1D layer
  if layer_cls == "QConv1D":
    assert model_ops['qconv1d']['number_of_operations'] == 32
  else:
    assert model_ops['qconv1d']['number_of_operations'] == 30

  # Print qstats to make sure it works with Conv1D layer
  print_qstats(model)

  # reload the model to ensure saving/loading works
  # json_string = model.to_json()
  # clear_session()
  # model = quantized_model_from_json(json_string)

  for layer in model.layers:
    all_weights = []
    for i, weights in enumerate(layer.get_weights()):
      input_size = np.prod(layer.input.shape.as_list()[1:])
      if input_size is None:
        input_size = 10 * 10
      shape = weights.shape
      assert input_size > 0, 'input size for {} {}'.format(layer.name, i)
      all_weights.append(
          10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape))
    if all_weights:
      layer.set_weights(all_weights)
  # Save the model as an h5 file using Keras's model.save()
  fd, fname = tempfile.mkstemp('.h5')
  model.save(fname)
  del model  # Delete the existing model

  # Return a compiled model identical to the previous one
  model = load_qmodel(fname)

  # Clean the created h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  # apply quantizer to weights
  model_save_quantized_weights(model)

  inputs = np.random.rand(2, 4, 4)
  p = model.predict(inputs).astype(np.float16)
  if layer_cls == "QConv1D":
    y = np.array([[[-2.441, 3.816], [-3.807, -1.426], [-2.684, -1.317],
                   [-1.659, 0.9834]],
                  [[-4.99, 1.139], [-2.559, -1.216], [-2.285, 1.905],
                   [-2.652, -0.467]]]).astype(np.float16)
  else:
    y = np.array([[[-2.275,   -3.178], [-0.4358, -3.262], [ 1.987,  0.3987]],
                  [[-0.01251, -0.376], [ 0.3928, -1.328], [-1.243, -2.43  ]]]
                ).astype(np.float16)
  assert_allclose(p, y, rtol=1e-4)

def test_qconv2dtranspose():
  x = Input((4, 4, 1,))
  y = QConv2DTranspose(
    1,
    kernel_size=(3, 3),
    kernel_quantizer=binary(),
    bias_quantizer=binary(),
    name='conv2d_tran')(x)
  model = Model(inputs=x, outputs=y)
  data = np.ones(shape=(1,4,4,1))
  kernel = np.ones(shape=(3,3,1,1))
  bias = np.ones(shape=(1,))
  model.get_layer('conv2d_tran').set_weights([kernel, bias])
  actual_output = model.predict(data).astype(np.float16)
  expected_output = np.array(
      [ [2., 3., 4., 4., 3., 2.],
      [3., 5., 7., 7., 5., 3.],
      [4., 7., 10., 10., 7., 4.],
      [4., 7., 10., 10., 7., 4.],
      [3., 5., 7., 7., 5., 3.],
      [2., 3., 4., 4., 3., 2.] ]).reshape((1,6,6,1)).astype(np.float16)
  assert_allclose(actual_output, expected_output, rtol=1e-4)


def test_masked_qconv2d_creates_correct_parameters():
  mask = mask = np.ones((5, 5), dtype=np.float32)
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Input(shape=(10, 10, 1)))
  model.add(QConv2D(mask=mask, filters=1, kernel_size=(5, 5), use_bias=False))

  # There should be no non-trainable params.
  np.testing.assert_equal(len(model.non_trainable_weights), 0)

  # Validate number of trainable params. This should be equal to one (5,5)
  # kernel.
  np.testing.assert_equal(len(model.trainable_weights), 1)
  num_trainable_params = np.prod(model.trainable_weights[0].shape)
  np.testing.assert_equal(num_trainable_params, 25)


def test_qconv2d_masks_weights():
  # Create an arbitrary mask.
  mask = np.array(
      [
          [1.0, 0.0, 1.0, 0.0, 1.0],
          [0.0, 0.0, 1.0, 0.0, 0.0],
          [1.0, 0.0, 1.0, 0.0, 1.0],
          [0.0, 0.0, 1.0, 0.0, 0.0],
          [1.0, 0.0, 1.0, 0.0, 1.0],
      ],
      dtype=np.float32,
  )
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Input(shape=(5, 5, 1)))
  model.add(QConv2D(mask=mask, filters=1, kernel_size=(5, 5), use_bias=False))

  # Set the weights to be all ones.
  model.layers[0].set_weights([np.ones((5, 5, 1, 1), dtype=np.float32)])

  # Run inference on a all ones input.
  output = model.predict(np.ones((1, 5, 5, 1), dtype=np.float32))
  # Output should just be summation of number of ones in the mask.
  np.testing.assert_array_equal(
      output, np.array([[[[11.0]]]], dtype=np.float32)
  )


def test_masked_qconv2d_load_restore_works():
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Input(shape=(10, 10, 1)))
  model.add(
      QConv2D(
          mask=np.ones((5, 5), dtype=np.float32),
          filters=1,
          kernel_size=(5, 5),
          use_bias=False,
      )
  )

  with tempfile.TemporaryDirectory() as temp_dir:
    model_path = os.path.join(temp_dir, 'model.keras')
    # Can save the model.
    model.save(model_path)

    # Can load the model.
    custom_objects = {
        'QConv2D': QConv2D,
    }
    loaded_model = tf.keras.models.load_model(
        model_path, custom_objects=custom_objects
    )

    np.testing.assert_array_equal(
        model.layers[0].weights[0], loaded_model.layers[0].weights[0]
    )


def test_qconv2d_groups_works():
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Input(shape=(10, 10, 10)))
  model.add(
      QConv2D(
          filters=6,
          kernel_size=(1, 1),
          use_bias=True,
          groups=2,
      )
  )
  # Validate number of trainable params.
  np.testing.assert_equal(len(model.trainable_weights), 2)
  num_trainable_params = np.prod(model.trainable_weights[0].shape) + np.prod(
      model.trainable_weights[1].shape
  )
  expected_trainable_params = 36  # (5*3)*2 + 6
  np.testing.assert_equal(num_trainable_params, expected_trainable_params)


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/qdepthwise_conv2d_transpose_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test layers from qconvolutional.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tempfile

import numpy as np
from numpy.testing import assert_allclose, assert_equal
import pytest
import tensorflow as tf

from qkeras import QDepthwiseConv2DTranspose
from qkeras import quantized_bits


# Predicted output from float model.
_FLOAT_PREDICTED_OUTPUT = np.array([[
    [
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
    ],
    [
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0],
    ],
    [
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
    ],
    [
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
        [1.0, 2.0, 3.0],
    ],
    [
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
    ],
    [
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
        [2.0, 4.0, 6.0],
    ],
    [
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
    ],
    [
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
        [3.0, 6.0, 9.0],
    ],
]])


def create_model(group_size=1):
  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))
  x = QDepthwiseConv2DTranspose(
      filters=2,
      kernel_size=(2, 2),
      strides=(2, 2),
      padding="same",
      name="conv2d_tran",
      depthwise_activation=None,
      depthwise_kernel_quantizer=None,
      bias_quantizer=None,
      group_size=group_size,
  )(x)

  model = tf.keras.Model(inputs=img_input, outputs=x)

  return model


def create_quantized_model(group_size=1):
  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))
  x = QDepthwiseConv2DTranspose(
      filters=2,
      kernel_size=(2, 2),
      strides=(1, 1),
      padding="same",
      name="conv2d_tran",
      depthwise_activation="quantized_bits(10, 6, 1)",
      depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
      bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0),
      group_size=group_size,
  )(x)

  model = tf.keras.Model(inputs=img_input, outputs=x)

  return model


def test_qseparable_conv2d_transpose():
  # By setting the weights and input values manually, we can test
  # the correctness of the output.

  # Input is (1, 4, 4, 3), with 3 output channels. For i-th channel,
  # with shape (1, 4, 4, 1), it will convolve with the depthwise kernel at
  # i-th channel. Depthwise outputs are (1, 8, 8, 3).

  # Create model.
  model = create_model()

  output_shape = model.output_shape
  ws = model.layers[1].weights

  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])
  inputs = np.concatenate([x, x, x], axis=-1)
  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)

  # depthwise kernel of shape (2, 2, 3, 1)
  dw_kernel = np.array([
      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],
      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],
  ])

  bias = tf.zeros((2,))

  model.layers[1].set_weights([dw_kernel, bias])

  actual_output = model.predict(inputs).astype(np.float16)
  assert_equal(output_shape[1:], (8, 8, 3))
  assert_equal(len(ws), 2)

  # Test if the depthwise conv kernel shape is correct.
  assert_equal(ws[0].shape, (2, 2, 3, 1))

  # Test if the bias shape is correct.
  assert_equal(ws[1].shape, (2,))

  # Test if overall output is correct.
  assert_equal(actual_output, _FLOAT_PREDICTED_OUTPUT)


def test_quantization_in_separable_conv2d_transpose():
  # Test if quantization is applied correctly.

  # Create model with quantization.
  model = create_quantized_model()

  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])
  inputs = np.concatenate([x, x, x], axis=-1)
  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)

  # depthwise kernel of shape (2, 2, 3, 1)
  dw_kernel = np.array([
      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],
      [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]],
  ])

  bias = tf.ones((2,))

  model.layers[1].set_weights([dw_kernel, bias])

  actual_output = model.predict(inputs).astype(np.float16)

  qs = model.layers[1].get_quantizers()
  assert_equal(len(qs), 3)
  assert_equal(str(qs[0]), "quantized_bits(1,0,1,alpha=1.0)")
  assert_equal(str(qs[1]), "quantized_bits(2,2,1,alpha=1.0)")
  assert_equal(str(qs[2]), "quantized_bits(10,6,1)")

  expected = np.array([[
      [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
      [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
      [[3.0, 3.0, 3.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0]],
      [
          [5.0, 5.0, 5.0],
          [10.0, 10.0, 10.0],
          [10.0, 10.0, 10.0],
          [10.0, 10.0, 10.0],
      ],
  ]])

  assert_equal(actual_output, expected)


def test_qseparable_conv2d_transpose_with_groups():
  model = create_model(group_size=3)

  output_shape = model.output_shape
  ws = model.layers[1].weights

  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])
  inputs = np.concatenate([x, x, x], axis=-1)
  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)

  # depthwise kernel of shape (2, 2, 3, 3)
  dw_kernel = np.array([
      [
          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],
          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],
      ],
      [
          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],
          [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]],
      ],
  ])

  bias = tf.zeros((2,))

  model.layers[1].set_weights([dw_kernel, bias])

  actual_output = model.predict(inputs).astype(np.float16)

  predicted = _FLOAT_PREDICTED_OUTPUT * 3.0  # kernel values replicated 3 times

  assert_equal(output_shape[1:], (8, 8, 3))
  assert_equal(len(ws), 2)

  # Test if the depthwise conv kernel shape is correct.
  assert_equal(ws[0].shape, (2, 2, 3, 3))

  # Test if the bias shape is correct.
  assert_equal(ws[1].shape, (2,))

  # Test if overall output is correct.
  assert_equal(actual_output, predicted)


def test_save_and_load_model():
  # Test if the model can be loaded from a saved model.
  model = create_quantized_model(group_size=3)

  fd, fname = tempfile.mkstemp(".hdf5")
  model.save(fname)

  custom_object = {
      "QDepthwiseConv2DTranspose": QDepthwiseConv2DTranspose,
  }

  model_loaded = tf.keras.models.load_model(
      fname, custom_objects=custom_object)

  # Clean the h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  model_weights = model.layers[1].weights
  loaded_model_weights = model_loaded.layers[1].weights

  assert_equal(len(model_weights), len(loaded_model_weights))
  for i, model_weight in enumerate(model_weights):
    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qlayers_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test layers from qlayers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import os
import tempfile

import numpy as np
from numpy.testing import assert_allclose
from numpy.testing import assert_equal
import pytest
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.backend import clear_session
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

from qkeras import QActivation
from qkeras import QDense
from qkeras import quantized_bits
from qkeras import quantized_relu
from qkeras.utils import load_qmodel
from qkeras.utils import model_save_quantized_weights
from qkeras.utils import quantized_model_from_json

def qdense_util(layer_cls,
                kwargs=None,
                input_data=None,
                weight_data=None,
                expected_output=None):
  """qlayer test utility."""
  input_shape = input_data.shape
  input_dtype = input_data.dtype
  layer = layer_cls(**kwargs)
  x = Input(shape=input_shape[1:], dtype=input_dtype)
  y = layer(x)
  layer.set_weights(weight_data)
  model = Model(x, y)
  actual_output = model.predict(input_data)
  if expected_output is not None:
    assert_allclose(actual_output, expected_output, rtol=1e-4)


@pytest.mark.parametrize(
    'layer_kwargs, input_data, weight_data, bias_data, expected_output',
    [
        (
            {
                'units': 2,
                'use_bias': True,
                'kernel_initializer': 'glorot_uniform',
                'bias_initializer': 'zeros'
            },
            np.array([[1, 1, 1, 1]], dtype=K.floatx()),
            np.array([[10, 20], [10, 20], [10, 20], [10, 20]],
                     dtype=K.floatx()),  # weight_data
            np.array([0, 0], dtype=K.floatx()),  # bias
            np.array([[40, 80]], dtype=K.floatx())),  # expected_output
        (
            {
                'units': 2,
                'use_bias': True,
                'kernel_initializer': 'glorot_uniform',
                'bias_initializer': 'zeros',
                'kernel_quantizer': 'quantized_bits(2,0,alpha=1.0)',
                'bias_quantizer': 'quantized_bits(2,0)',
            },
            np.array([[1, 1, 1, 1]], dtype=K.floatx()),
            np.array([[10, 20], [10, 20], [10, 20], [10, 20]],
                     dtype=K.floatx()),  # weight_data
            np.array([0, 0], dtype=K.floatx()),  # bias
            np.array([[2, 2]], dtype=K.floatx())),  #expected_output
    ])
def test_qdense(layer_kwargs, input_data, weight_data, bias_data,
                expected_output):
  qdense_util(
      layer_cls=QDense,
      kwargs=layer_kwargs,
      input_data=input_data,
      weight_data=[weight_data, bias_data],
      expected_output=expected_output)


def test_qactivation_loads():
  layer_size = 10

  # Create a small model with QActivation layer.
  x = xin = tf.keras.layers.Input(shape=(layer_size,), name='input')
  x = QDense(
      layer_size,
      name='qdense',
  )(x)
  x = QActivation(activation=quantized_relu(8), name='relu')(x)
  model = tf.keras.Model(inputs=xin, outputs=x)

  # Generate random weights for the model.
  w_k = np.random.rand(layer_size, layer_size)
  w_b = np.random.rand(
      layer_size,
  )
  model.set_weights([w_k, w_b])

  # Save the model as an h5 file.
  fd, fname = tempfile.mkstemp('.h5')
  model.save(fname)

  # Load the model.
  loaded_model = load_qmodel(fname)

  # Clean the h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  # Compare weights of original and loaded models.
  model_weights = model.weights
  loaded_model_weights = loaded_model.weights
  assert_equal(len(model_weights), len(loaded_model_weights))
  for i, model_weight in enumerate(model_weights):
    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/qmac_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test layers from qlayers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import os
import tempfile

import numpy as np
from numpy.testing import assert_allclose
from numpy.testing import assert_equal
import pytest
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

from qkeras import QScaleShift
from qkeras.utils import load_qmodel


def create_qmac_model(layer_cls,
                      kwargs=None,
                      input_data=None,
                      weight_data=None):
  """Create a QMAC model for test purpose."""
  layer = layer_cls(**kwargs)
  x = Input(shape=input_data.shape[1:], dtype=input_data.dtype)
  y = layer(x)
  layer.set_weights(weight_data)

  return Model(x, y)


@pytest.mark.parametrize(
    'layer_kwargs, input_data, weight_data, bias_data, expected_output',
    [
        (
            {
                'weight_quantizer': 'quantized_bits(8,2,alpha=1.0)',
                'bias_quantizer': 'quantized_bits(8,2,alpha=1.0)',
                'activation': 'quantized_bits(8,4,alpha=1.0)'
            },
            np.array([[1, 1], [2, 2]], dtype=K.floatx()),
            np.array([[1.0]]),
            np.array([[4.0]]),
            np.array([[5, 5], [6, 6]], dtype=K.floatx())),
    ])
def test_qmac(layer_kwargs, input_data, weight_data, bias_data,
              expected_output):
  model = create_qmac_model(
      layer_cls=QScaleShift,
      kwargs=layer_kwargs,
      input_data=input_data,
      weight_data=[weight_data, bias_data])

  actual_output = model.predict(input_data)
  assert_allclose(actual_output, expected_output, rtol=1e-4)

  # Test model loading and saving.
  fd, fname = tempfile.mkstemp('.h5')
  model.save(fname)

  # Load the model.
  loaded_model = load_qmodel(fname)

  # Clean the h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  # Compare weights of original and loaded models.
  model_weights = model.weights
  loaded_model_weights = loaded_model.weights

  assert_equal(len(model_weights), len(loaded_model_weights))
  for i, model_weight in enumerate(model_weights):
    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())

  # Compare if loaded models have the same prediction as original models.
  loaded_model_output = loaded_model.predict(input_data)
  assert_equal(actual_output, loaded_model_output)


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/qnoise_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test gradual quantization noise injection with quantizers of quantizers.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import logging
from numpy.testing import assert_allclose
from numpy.testing import assert_equal
import pytest
from tensorflow.keras import backend as K
from qkeras.quantizers import quantized_bits
from qkeras.quantizers import quantized_relu


def test_qnoise_quantized_bits():
  # 1 sign bit, 1 integer bit, and 2 fractional bits.
  bits = 4
  integer = 1
  symmetric = True
  keep_negative = True
  alpha = 1
  use_stochastic_rounding = False

  qb = quantized_bits(
      bits=bits,
      integer=integer,
      symmetric=symmetric,
      keep_negative=keep_negative,
      alpha=alpha,
      use_stochastic_rounding=use_stochastic_rounding,
      use_variables=True)

  inputs = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32)
  x = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32)
  xq = np.array([0.0, 0.5, -0.5, 0.5, -0.5, 1.75, -1.75], dtype=np.float32)
  x_xq = 0.5 * (x + xq)

  # no quantization
  qb.update_qnoise_factor(qnoise_factor=0.0)
  x_q_0 = qb(inputs)
  assert_equal(x_q_0, x)

  # full quantization
  qb.update_qnoise_factor(qnoise_factor=1.0)
  x_q_1 = qb(inputs)
  assert_equal(x_q_1, xq)

  # mixing half and half of x and xq
  qb.update_qnoise_factor(qnoise_factor=0.5)
  x_q_05 = qb(inputs)
  assert_equal(x_q_05, x_xq)


def test_qnoise_quantized_relu():
  # 0 sign bit, 1 integer bit, and 3 fractional bits.
  bits = 4
  integer = 1
  use_sigmoid = False
  negative_slope = 0
  use_stochastic_rounding = False

  # input to quantized relu
  inputs = np.array([0.0, 0.5, -0.5, 0.6, 2.0, 3.0], dtype=np.float32)
  # float relu
  x = np.array([0.0, 0.5, 0.0, 0.6, 2.0, 3.0], dtype=np.float32)
  # float relu with upper bound 1.5
  x_ub = np.array([0.0, 0.5, 0.0, 0.6, 1.5, 1.5], dtype=np.float32)
  # float relu with quantized clipping
  x_clipped = np.array([0.0, 0.5, 0.0, 0.6, 1.875, 1.875], dtype=np.float32)
  # quantized relu
  xq = np.array([0.0, 0.5, 0.0, 0.625, 1.875, 1.875], dtype=np.float32)

  # mixing half and half
  x_xq = 0.5 * (x + xq)
  x_clipped_xq = 0.5 * (x_clipped + xq)
  x_ub_xq = 0.5 * (x_ub + xq)

  #########################################
  # No relu upper bound
  # No quantized clip for float relu
  #########################################
  qr_qc_false = quantized_relu(
      bits=bits,
      integer=integer,
      use_sigmoid=use_sigmoid,
      negative_slope=negative_slope,
      use_stochastic_rounding=use_stochastic_rounding,
      relu_upper_bound=None,
      is_quantized_clip=False,
      use_variables=True)
  # no quantization
  qr_qc_false.update_qnoise_factor(qnoise_factor=0.0)
  x_q_0 = qr_qc_false(inputs)
  assert_equal(x_q_0, x)

  # full quantization
  qr_qc_false.update_qnoise_factor(qnoise_factor=1.0)
  x_q_1 = qr_qc_false(inputs)
  assert_equal(x_q_1, xq)

  # mixing half and half
  qr_qc_false.update_qnoise_factor(qnoise_factor=0.5)
  x_q_05 = qr_qc_false(inputs)
  assert_equal(x_q_05, x_xq)

  #########################################
  # No relu upper bound
  # Quantized clip for float relu
  #########################################
  qr_qc_true = quantized_relu(
      bits=bits,
      integer=integer,
      use_sigmoid=use_sigmoid,
      negative_slope=negative_slope,
      use_stochastic_rounding=use_stochastic_rounding,
      relu_upper_bound=None,
      is_quantized_clip=True,
      use_variables=True)
  # no quantization
  qr_qc_true.update_qnoise_factor(qnoise_factor=0.0)
  x_q_0 = qr_qc_true(inputs)
  assert_equal(x_q_0, x_clipped)

  # full quantization
  qr_qc_true.update_qnoise_factor(qnoise_factor=1.0)
  x_q_1 = qr_qc_true(inputs)
  assert_equal(x_q_1, xq)

  # mixing half and half
  qr_qc_true.update_qnoise_factor(qnoise_factor=0.5)
  x_q_05 = qr_qc_true(inputs)
  assert_equal(x_q_05, x_clipped_xq)

  #########################################
  # Relu upper bound
  # No quantized clip for float relu
  #########################################
  qr_ub_qc_false = quantized_relu(
      bits=bits,
      integer=integer,
      use_sigmoid=use_sigmoid,
      negative_slope=negative_slope,
      use_stochastic_rounding=use_stochastic_rounding,
      relu_upper_bound=1.5,
      is_quantized_clip=False,
      use_variables=True)
  # no quantization
  qr_ub_qc_false.update_qnoise_factor(qnoise_factor=0.0)
  x_q_0 = qr_ub_qc_false(inputs)
  assert_equal(x_q_0, np.clip(x_ub, a_min=None, a_max=1.5))

  # full quantization
  qr_ub_qc_false.update_qnoise_factor(qnoise_factor=1.0)
  x_q_1 = qr_ub_qc_false(inputs)
  assert_equal(x_q_1, np.clip(xq, a_min=None, a_max=1.5))

  # mixing half and half
  qr_ub_qc_false.update_qnoise_factor(qnoise_factor=0.5)
  x_q_05 = qr_ub_qc_false(inputs)
  assert_equal(x_q_05, np.clip(x_ub_xq, a_min=None, a_max=1.5))

  #########################################
  # Relu upper bound
  # Quantized clip for float relu
  # (The quantized clip has precedence over the relu upper bound.)
  #########################################
  qr_ub_qc_true = quantized_relu(
      bits=bits,
      integer=integer,
      use_sigmoid=use_sigmoid,
      negative_slope=negative_slope,
      use_stochastic_rounding=use_stochastic_rounding,
      relu_upper_bound=1.5,
      is_quantized_clip=True,
      use_variables=True)
  # no quantization
  qr_ub_qc_true.update_qnoise_factor(qnoise_factor=0.0)
  x_q_0 = qr_ub_qc_true(inputs)
  assert_equal(x_q_0, x_clipped)

  # full quantization
  qr_ub_qc_true.update_qnoise_factor(qnoise_factor=1.0)
  x_q_1 = qr_ub_qc_true(inputs)
  assert_equal(x_q_1, xq)

  # mixing half and half
  qr_ub_qc_true.update_qnoise_factor(qnoise_factor=0.5)
  x_q_05 = qr_ub_qc_true(inputs)
  assert_equal(x_q_05, x_clipped_xq)


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qpooling_test.py
================================================
# Copyright 2021 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test layers from qpooling.py."""
import numpy as np
from numpy.testing import assert_allclose
from numpy.testing import assert_raises
from numpy.testing import assert_equal
import pytest
import logging
import tempfile
import os
import tensorflow.compat.v2 as tf

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.backend import clear_session

from qkeras import QAveragePooling2D
from qkeras import QGlobalAveragePooling2D
from qkeras import quantized_bits
from qkeras import binary
from qkeras import ternary
from qkeras.utils import model_save_quantized_weights
from qkeras.utils import quantized_model_from_json
from qkeras.utils import load_qmodel
from qkeras.utils import model_quantize
from qkeras import print_qstats
from qkeras.qtools import qgraph
from qkeras.qtools import generate_layer_data_type_map
from qkeras.qtools import interface


@pytest.mark.parametrize(
    ('pooling, input_size, pool_size, strides, padding, data_format,'
     'average_quantizer, activation_quantizer,  y'), [
         ('QAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid',
          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),
          np.array([[[[0.375, 0.625, 0.375], [0.25, 0.75, 0.5]],
                     [[0.375, 0.25, 0.625], [0.625, 0.5, 0.375]]],
                    [[[0.375, 0.375, 0.5], [0.375, 0.5, 0.625]],
                     [[0.75, 0.625, 0.5], [0.5, 0.5, 0.75]]]]).astype(
                         np.float16)),
         ('QAveragePooling2D', (4, 4, 3), (3, 3), (3, 3), 'valid',
          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),
          np.array([[[[0.375, 0.625, 0.625]]], [[[0.625, 0.5, 0.625]]]]).astype(
              np.float16)),
         ('QGlobalAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid',
          'channels_last', quantized_bits(10, 0, 1), quantized_bits(4, 0, 1),
          np.array([[0.5, 0.5, 0.375], [0.5, 0.5, 0.625]]).astype(np.float16)),
         ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'valid',
          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),
          np.array([[[[0.375, 0.625, 0.375]]], [[[0.375, 0.375, 0.5]]]]).astype(
              np.float16)),
         ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'same',
          'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1),
          np.array([[[[0.375, 0.625, 0.375], [0.375, 0.75, 0.25]],
                     [[0.75, 0.25, 0.375], [0.75, 0.75, 0.25]]],
                    [[[0.375, 0.375, 0.5], [0.25, 0.625, 0.5]],
                     [[0.625, 0.625, 0.5], [0.625, 0.625, 0.875]]]]).astype(
                         np.float16)),
         ('QAveragePooling2D', (4, 4, 3), (2, 2),
          (2, 2), 'valid', 'channels_first', quantized_bits(
              4, 0, 1), quantized_bits(4, 0, 1), None),
     ])
def test_q_average_pooling(pooling, input_size, pool_size, strides, padding,
                           data_format, average_quantizer,
                           activation_quantizer, y):
  """q_average_pooling test utility."""

  np.random.seed(33)

  x = Input(input_size)
  xin = x
  if pooling == 'QAveragePooling2D':
    x = QAveragePooling2D(
        pool_size=pool_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        average_quantizer=average_quantizer,
        activation=activation_quantizer,
        name='qpooling')(x)
  else:
    x = QGlobalAveragePooling2D(
        data_format=data_format,
        average_quantizer=average_quantizer,
        activation=activation_quantizer,
        name='qpooling')(
            x)
  model = Model(inputs=xin, outputs=x)

  # Prints qstats to make sure it works with Conv1D layer
  print_qstats(model)

  size = (2,) + input_size
  inputs = np.random.rand(size[0], size[1], size[2], size[3])

  if data_format == 'channels_first':
    assert_raises(tf.errors.InvalidArgumentError, model.predict, inputs)
  else:
    p = model.predict(inputs).astype(np.float16)
    assert_allclose(p, y, rtol=1e-4)

    # Reloads the model to ensure saving/loading works
    json_string = model.to_json()
    clear_session()
    reload_model = quantized_model_from_json(json_string)
    p = reload_model.predict(inputs).astype(np.float16)
    assert_allclose(p, y, rtol=1e-4)

    # Saves the model as an h5 file using Keras's model.save()
    fd, fname = tempfile.mkstemp(".h5")
    model.save(fname)
    del model  # Delete the existing model

    # Returns a compiled model identical to the previous one
    loaded_model = load_qmodel(fname)

    # Cleans the created h5 file after loading the model
    os.close(fd)
    os.remove(fname)

    # Applys quantizer to weights
    model_save_quantized_weights(loaded_model)
    p = loaded_model.predict(inputs).astype(np.float16)
    assert_allclose(p, y, rtol=1e-4)


def test_qpooling_in_model_quantize():
  input_size = (16, 16, 3)
  pool_size = (2, 2)

  x = Input(input_size)
  xin = x
  x = AveragePooling2D(pool_size=pool_size, name="pooling")(x)
  x = GlobalAveragePooling2D(name="global_pooling")(x)
  model = Model(inputs=xin, outputs=x)

  quantize_config = {
      "QAveragePooling2D": {
          "average_quantizer": "binary",
          "activation_quantizer": "binary"
      },
      "QGlobalAveragePooling2D": {
          "average_quantizer": "quantized_bits(4, 0, 1)",
          "activation_quantizer": "ternary"
      }
  }

  qmodel = model_quantize(model, quantize_config, 4)
  print_qstats(qmodel)
  assert_equal(str(qmodel.layers[1].average_quantizer_internal), "binary()")
  assert_equal(str(qmodel.layers[1].activation), "binary()")
  assert_equal(
      str(qmodel.layers[2].average_quantizer_internal), "quantized_bits(4,0,1)")
  assert_equal(str(qmodel.layers[2].activation), "ternary()")


def test_qpooling_in_qtools():
  input_size = (16, 16, 3)
  pool_size = (2, 2)
  input_quantizers = [quantized_bits(8, 0, 1)]
  is_inference = False

  x = Input(input_size)
  xin = x
  x = QAveragePooling2D(
      pool_size=pool_size,
      average_quantizer=binary(),
      activation=quantized_bits(4, 0, 1),
      name="pooling")(
          x)
  x = QGlobalAveragePooling2D(
      average_quantizer=quantized_bits(4, 0, 1),
      activation=ternary(),
      name="global_pooling")(
          x)
  model = Model(inputs=xin, outputs=x)

  (graph, source_quantizer_list) = qgraph.CreateGraph(
      model, input_quantizers)

  qgraph.GraphPropagateActivationsToEdges(graph)

  layer_map = generate_layer_data_type_map.generate_layer_data_type_map(
      graph, source_quantizer_list, is_inference)

  dtype_dict = interface.map_to_json(layer_map)

  # Checks the QAveragePpooling layer datatype
  multiplier = dtype_dict["pooling"]["pool_avg_multiplier"]
  accumulator = dtype_dict["pooling"]["pool_sum_accumulator"]
  average_quantizer  = dtype_dict["pooling"]["average_quantizer"]
  output = dtype_dict["pooling"]["output_quantizer"]

  assert_equal(multiplier["quantizer_type"], "quantized_bits")
  assert_equal(multiplier["bits"], 10)
  assert_equal(multiplier["int_bits"], 3)
  assert_equal(multiplier["is_signed"], 1)
  assert_equal(multiplier["op_type"], "mux")

  assert_equal(accumulator["quantizer_type"], "quantized_bits")
  assert_equal(accumulator["bits"], 10)
  assert_equal(accumulator["int_bits"], 3)
  assert_equal(accumulator["is_signed"], 1)
  assert_equal(accumulator["op_type"], "add")

  assert_equal(output["quantizer_type"], "quantized_bits")
  assert_equal(output["bits"], 4)
  assert_equal(output["int_bits"], 1)
  assert_equal(output["is_signed"], 1)

  assert_equal(average_quantizer["quantizer_type"], "binary")
  assert_equal(average_quantizer["bits"], 1)
  assert_equal(average_quantizer["int_bits"], 1)
  assert_equal(average_quantizer["is_signed"], 1)

  # Checks the QGlobalAveragePooling layer datatype
  multiplier = dtype_dict["global_pooling"]["pool_avg_multiplier"]
  accumulator = dtype_dict["global_pooling"]["pool_sum_accumulator"]
  average_quantizer  = dtype_dict["global_pooling"]["average_quantizer"]
  output = dtype_dict["global_pooling"]["output_quantizer"]

  assert_equal(multiplier["quantizer_type"], "quantized_bits")
  assert_equal(multiplier["bits"], 13)
  assert_equal(multiplier["int_bits"], 7)
  assert_equal(multiplier["is_signed"], 1)
  assert_equal(multiplier["op_type"], "mul")

  assert_equal(accumulator["quantizer_type"], "quantized_bits")
  assert_equal(accumulator["bits"], 10)
  assert_equal(accumulator["int_bits"], 7)
  assert_equal(accumulator["is_signed"], 1)
  assert_equal(accumulator["op_type"], "add")

  assert_equal(output["quantizer_type"], "ternary")
  assert_equal(output["bits"], 2)
  assert_equal(output["int_bits"], 2)
  assert_equal(output["is_signed"], 1)

  assert_equal(average_quantizer["quantizer_type"], "quantized_bits")
  assert_equal(average_quantizer["bits"], 4)
  assert_equal(average_quantizer["int_bits"], 1)
  assert_equal(average_quantizer["is_signed"], 1)


def test_QAveragePooling_output():
  # Checks if the output of QAveragePooling layer with average_quantizer
  # is correct.
  x = np.ones(shape=(2, 6, 6, 1))
  x[0, 0, :, :] = 0
  x = tf.constant(x)

  y = QAveragePooling2D(
      pool_size=(3, 3),
      strides=3,
      padding="valid",
      average_quantizer="quantized_bits(8, 1, 1)")(x)
  y = y.numpy()
  assert np.all(y == [[[[0.65625], [0.65625]], [[0.984375], [0.984375]]],
                      [[[0.984375], [0.984375]], [[0.984375], [0.984375]]]])


def test_QGlobalAveragePooling_output():
  # Checks if the output of QGlobalAveragePooling layer with average_quantizer
  # is correct.
  x = np.ones(shape=(2, 3, 3, 2))
  x[0, 0, 1, :] = 0
  x = tf.constant(x)
  y = QGlobalAveragePooling2D(average_quantizer="quantized_bits(8, 1, 1)")(x)
  y = y.numpy()
  assert np.all(y == np.array([[0.875, 0.875], [0.984375, 0.984375]]))


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qrecurrent_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for qrecurrent.py."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
import json
import os
import tempfile

import numpy as np
from numpy.testing import assert_allclose
import pytest
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.backend import clear_session
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential

from qkeras import QActivation
from qkeras import QBidirectional
from qkeras import QDense
from qkeras import QGRU
from qkeras import QLSTM
from qkeras import QSimpleRNN
from qkeras import quantized_bits
from qkeras import quantized_tanh
from qkeras.utils import load_qmodel
from qkeras.utils import model_quantize
from qkeras.utils import model_save_quantized_weights
from qkeras.utils import quantized_model_from_json

@pytest.mark.skip(reason="Test failing due to random weight initializaiton")
@pytest.mark.parametrize('rnn, all_weights_signature, expected_output', [
    (QSimpleRNN,
     np.array([5.109375, -1.8828125, 0.0, -0.5, 0.0], dtype=np.float32),
     np.array(
              [[0.281, 0.4956, 0.1047, 0.1188],
               [0.185, 0.6016, 0.0977, 0.1157],
               [0.3892, 0.483, 0.03528, 0.0926],
               [0.4038, 0.511, 0.01686, 0.06824],
               [0.3354, 0.5376, 0.02602, 0.101],
               [0.2043, 0.587, 0.04147, 0.1675],
               [0.2297, 0.6455, 0.0456, 0.0789],
               [0.4512, 0.4326, 0.01938, 0.0968],
               [0.6304, 0.2498, 0.03345, 0.0866],
               [0.4924, 0.3735, 0.011925, 0.1222]],
              dtype=np.float16)),
    (QLSTM, np.array([3.7421875, 2.1328125, 15.875, -0.5, 0.0],
                     dtype=np.float32),
     np.array(
              [[0.27, 0.1814, 0.3108, 0.2378],
               [0.2976, 0.2424, 0.248, 0.2119],
               [0.3054, 0.2004, 0.2705, 0.2238],
               [0.325, 0.1656, 0.269, 0.2401],
               [0.271, 0.1796, 0.3, 0.2493],
               [0.3066, 0.1873, 0.2477, 0.2583],
               [0.2798, 0.1757, 0.2944, 0.25],
               [0.2693, 0.2335, 0.2534, 0.2437],
               [0.2808, 0.2057, 0.2712, 0.2422],
               [0.2732, 0.2336, 0.2491, 0.244]],
              dtype=np.float16)),
    (QGRU, np.array([4.6875, 4.3984375, 0.0, -0.5, 0.0], dtype=np.float32),
     np.array(
              [[0.2025, 0.3467, 0.2952, 0.1556],
               [0.2935, 0.3313, 0.2058, 0.1694],
               [0.2046, 0.4465, 0.1827, 0.1661],
               [0.1913, 0.498, 0.1583, 0.1525],
               [0.1578, 0.477, 0.1973, 0.1677],
               [0.2018, 0.44, 0.1714, 0.1869],
               [0.157, 0.551, 0.1709, 0.12115],
               [0.1973, 0.4353, 0.1672, 0.2001],
               [0.1622, 0.5146, 0.1741, 0.149],
               [0.2101, 0.3855, 0.2069, 0.1976]],
              dtype=np.float16)),
])
def test_qrnn(rnn, all_weights_signature, expected_output):
  K.set_learning_phase(0)
  np.random.seed(22)
  tf.random.set_seed(22)

  x = x_in = Input((2, 4), name='input')
  x = rnn(
    16,
    activation=quantized_tanh(bits=8, symmetric=True),
    kernel_quantizer=quantized_bits(8, 0, 1, alpha=1.0),
    recurrent_quantizer=quantized_bits(8, 0, 1, alpha=1.0),
    bias_quantizer=quantized_bits(8, 0, 1, alpha=1.0),
    state_quantizer=quantized_bits(4, 0, 1, alpha=1.0),
    name='qrnn_0')(
        x)
  x = QDense(
      4,
      kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(4, 0, 1),
      name='dense')(
          x)
  x = Activation('softmax', name='softmax')(x)

  model = Model(inputs=[x_in], outputs=[x])

  # reload the model to ensure saving/loading works
  json_string = model.to_json()
  clear_session()
  model = quantized_model_from_json(json_string)

  # Save the model as an h5 file using Keras's model.save()
  fd, fname = tempfile.mkstemp('.h5')
  model.save(fname)
  del model  # Delete the existing model

  # Return a compiled model identical to the previous one
  model = load_qmodel(fname)

  # Clean the created h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  # apply quantizer to weights
  model_save_quantized_weights(model)

  all_weights = []

  for layer in model.layers:
    for i, weights in enumerate(layer.get_weights()):
      w = np.sum(weights)
      all_weights.append(w)

  all_weights = np.array(all_weights)

  assert all_weights.size == all_weights_signature.size
  assert np.all(all_weights == all_weights_signature)

  # test forward:
  inputs = 2 * np.random.rand(10, 2, 4)
  actual_output = model.predict(inputs).astype(np.float16)
  assert_allclose(actual_output, expected_output, rtol=1e-4)


@pytest.mark.skip(reason="Test failing due to random weight initializaiton")
@pytest.mark.parametrize('rnn, all_weights_signature, expected_output', [
    (QSimpleRNN,
     np.array([
         -2.6562500e+00, -4.3466797e+00, 8.6736174e-19, 6.2548828e-01,
         -6.0751953e+00, 8.6736174e-19, -7.5000000e-01, 0.0
     ],
              dtype=np.float32),
     np.array(
         [[0.0851, 0.1288, 0.586, 0.2002], [0.1044, 0.1643, 0.7217, 0.00978],
          [0.04135, 0.0537, 0.8706, 0.03455], [0.03354, 0.0489, 0.889, 0.02852],
          [0.04358, 0.05246, 0.7563, 0.1478], [0.03403, 0.0743, 0.4177, 0.4739],
          [0.0859, 0.1567, 0.3972, 0.36], [0.27, 0.1945, 0.4841, 0.05124],
          [0.12115, 0.05722, 0.728, 0.0938], [0.2864, 0.1262, 0.339, 0.2484]],
         dtype=np.float16)),
    (QLSTM,
     np.array(
         [-4.1406555, 3.2921143, 16., 7.0236816, 4.1237793, 16., -0.75, 0.],
         dtype=np.float32),
     np.array(
         [[0.301, 0.2236, 0.2275, 0.2478], [0.2135, 0.2627, 0.2439, 0.2798],
          [0.1671, 0.2252, 0.2844, 0.3232], [0.2211, 0.2178, 0.2817, 0.2795],
          [0.3384, 0.1732, 0.2451, 0.2434], [0.296, 0.1979, 0.2468, 0.2593],
          [0.2698, 0.1753, 0.288, 0.267], [0.258, 0.1888, 0.3228, 0.2301],
          [0.2169, 0.1578, 0.3699, 0.2554], [0.2783, 0.1816, 0.2986, 0.2415]],
         dtype=np.float16)),
    (QGRU,
     np.array([
         -6.7578125e-01, 3.6837769e-01, 2.6020852e-18, 4.1682129e+00,
         -7.5769043e-01, 2.6020852e-18, -7.5000000e-01, 0.0
     ],
              dtype=np.float32),
     np.array(
         [[0.278, 0.1534, 0.314, 0.2546], [0.1985, 0.1788, 0.3823, 0.2402],
          [0.1997, 0.1621, 0.3792, 0.259], [0.2534, 0.1605, 0.281, 0.3052],
          [0.3794, 0.1266, 0.2296, 0.2642], [0.285, 0.1754, 0.2847, 0.255],
          [0.2878, 0.1339, 0.3042, 0.274], [0.2874, 0.1475, 0.279, 0.2861],
          [0.2379, 0.1356, 0.3186, 0.3079], [0.2234, 0.1476, 0.3274, 0.3015]],
         dtype=np.float16))
])
def test_qbidirectional(rnn, all_weights_signature, expected_output):
  K.set_learning_phase(0)
  np.random.seed(22)
  tf.random.set_seed(22)

  x = x_in = Input((2, 4), name='input')
  x = QBidirectional(
      rnn(16,
          activation='quantized_po2(8)',
          kernel_quantizer='quantized_po2(8)',
          recurrent_quantizer='quantized_po2(8)',
          bias_quantizer='quantized_po2(8)',
          state_quantizer='quantized_po2(8)',
          name='qbirnn_0'))(
              x)
  x = QDense(
      4,
      kernel_quantizer=quantized_bits(8, 2, 1, alpha=1.0),
      bias_quantizer=quantized_bits(8, 0, 1),
      name='dense')(
          x)
  x = Activation('softmax', name='softmax')(x)

  model = Model(inputs=[x_in], outputs=[x])

  # reload the model to ensure saving/loading works
  json_string = model.to_json()
  clear_session()
  model = quantized_model_from_json(json_string)

  # Save the model as an h5 file using Keras's model.save()
  fd, fname = tempfile.mkstemp('.h5')
  model.save(fname)
  del model  # Delete the existing model

  # Return a compiled model identical to the previous one
  model = load_qmodel(fname)

  # Clean the created h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  # apply quantizer to weights
  model_save_quantized_weights(model)

  all_weights = []

  for layer in model.layers:
    for _, weights in enumerate(layer.get_weights()):

      w = np.sum(weights)
      all_weights.append(w)

  all_weights = np.array(all_weights)
  assert all_weights.size == all_weights_signature.size
  assert np.all(all_weights == all_weights_signature)

  # test forward:
  inputs = 2 * np.random.rand(10, 2, 4)
  actual_output = model.predict(inputs).astype(np.float16)
  assert_allclose(actual_output, expected_output, rtol=1e-4)


def create_network_rnn(rnn):
  xi = Input((16, 1,))
  x = rnn(8)(xi)
  return Model(inputs=xi, outputs=x)


@pytest.mark.parametrize('rnn', [SimpleRNN, LSTM, GRU])
def test_rnn_conversion(rnn):
  m = create_network_rnn(rnn)
  m.use_legacy_config = True
  name = 'Q' + m.layers[1].__class__.__name__
  d = {
      name: {
          'kernel_quantizer': 'binary',
          'recurrent_quantizer': 'binary',
          'bias_quantizer': 'binary',
          'state_quantizer': 'binary',
          'activation_quantizer': 'binary',
      }
  }
  if name != 'QSimpleRNN':
    d[name]['recurrent_activation_quantizer'] = 'binary'

  qq = model_quantize(m, d, 4)
  assert str(qq.layers[1].kernel_quantizer) == 'binary'
  assert str(qq.layers[1].recurrent_quantizer) == 'binary'
  assert str(qq.layers[1].bias_quantizer) == 'binary'
  assert str(qq.layers[1].state_quantizer) == 'binary'
  assert str(qq.layers[1].activation) == 'binary()'
  if name != 'QSimpleRNN':
    assert str(qq.layers[1].recurrent_activation) == 'binary()'


def create_network_birnn(rnn):
  xi = Input((16, 1,))
  x = Bidirectional(rnn(8))(xi)
  return Model(inputs=xi, outputs=x)


@pytest.mark.parametrize('rnn', [SimpleRNN, LSTM, GRU])
def test_birnn_conversion(rnn):
  m = create_network_birnn(rnn)
  m.use_legacy_config = True
  name = 'Q' + m.layers[1].layer.__class__.__name__
  d = {
      'QBidirectional': {
          'kernel_quantizer': 'binary',
          'recurrent_quantizer': 'binary',
          'bias_quantizer': 'binary',
          'state_quantizer': 'binary',
          'activation_quantizer': 'binary',
      }
  }
  if name != 'QSimpleRNN':
    d['QBidirectional']['recurrent_activation_quantizer'] = 'binary'

  qq = model_quantize(m, d, 4)
  layer = qq.layers[1].layer
  assert str(layer.kernel_quantizer) == 'binary'
  assert str(layer.recurrent_quantizer) == 'binary'
  assert str(layer.bias_quantizer) == 'binary'
  assert str(layer.state_quantizer) == 'binary'
  assert str(layer.activation) == 'binary()'
  if name != 'QSimpleRNN':
    assert str(layer.recurrent_activation) == 'binary()'
  backward_layer = qq.layers[1].backward_layer
  # backwards weight quantizers are dict because of contraints.serialize
  assert str(backward_layer.kernel_quantizer['class_name']) == 'binary'
  assert str(backward_layer.recurrent_quantizer['class_name']) == 'binary'
  assert str(backward_layer.bias_quantizer['class_name']) == 'binary'
  assert str(backward_layer.state_quantizer['class_name']) == 'binary'
  assert str(backward_layer.activation) == 'binary()'
  if name != 'QSimpleRNN':
    assert str(backward_layer.recurrent_activation) == 'binary()'


def test_birnn_subrnn():
  model = Sequential([Bidirectional(LSTM(16)), LSTM(8)])
  d = {
      'QLSTM': {
          'activation_quantizer': 'ternary',
          'recurrent_activation_quantizer': 'ternary',
          'kernel_quantizer': 'ternary',
          'recurrent_quantizer': 'ternary',
          'bias_quantizer': 'ternary',
          'state_quantizer': 'ternary',
      },
      'QBidirectional': {
          'activation_quantizer': 'binary',
          'recurrent_activation_quantizer': 'binary',
          'kernel_quantizer': 'binary',
          'recurrent_quantizer': 'binary',
          'bias_quantizer': 'binary',
          'state_quantizer': 'binary',
      }
  }
  qmodel = model_quantize(model, d, 4)
  layer = qmodel.layers[1]
  assert str(layer.kernel_quantizer) == 'ternary'
  assert str(layer.recurrent_quantizer) == 'ternary'
  assert str(layer.bias_quantizer) == 'ternary'
  assert str(layer.state_quantizer) == 'ternary'
  assert str(layer.activation) == 'ternary()'


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/qseparable_conv2d_transpose_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test layers from qconvolutional.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tempfile

import numpy as np
from numpy.testing import assert_allclose, assert_equal
import pytest
import tensorflow as tf

from qkeras import QSeparableConv2DTranspose
from qkeras import quantized_bits


def create_model():
  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))
  x = QSeparableConv2DTranspose(
      filters=2,
      kernel_size=(2, 2),
      strides=(2, 2),
      padding="same",
      name="conv2d_tran",
      depthwise_activation=None,
      pointwise_activation=None,
      depthwise_kernel_quantizer=None,
      pointwise_kernel_quantizer=None,
      bias_quantizer=None,
  )(x)

  model = tf.keras.Model(inputs=img_input, outputs=x)

  return model


def create_quantized_model():
  x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))
  x = QSeparableConv2DTranspose(
      filters=2,
      kernel_size=(2, 2),
      strides=(1, 1),
      padding="same",
      name="conv2d_tran",
      depthwise_activation="quantized_bits(10, 6, 1)",
      pointwise_activation="quantized_bits(5, 3, 1)",
      depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
      pointwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
      bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0),
  )(x)

  model = tf.keras.Model(inputs=img_input, outputs=x)

  return model


def test_qseparable_conv2d_transpose():
  # By setting the weights and input values manually, we can test
  # the correctness of the output.

  # Input is (1, 4, 4, 3), with 3 output channels. For i-th channel,
  # with shape (1, 4, 4, 1), it will convolve with the depthwise kernel at
  # i-th channel. Depthwise outputs are (1, 8, 8, 3). DW output is then
  # mapped from input channel(3) to output channel (2) by pointwise conv.
  # Pointwise conv output is (1, 8, 8, 2).

  # Create model.
  model = create_model()

  output_shape = model.output_shape
  ws = model.layers[1].weights

  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])
  inputs = np.concatenate([x, x, x], axis=-1)
  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)

  k = tf.ones((2, 2, 1, 1))
  dw_kernel = tf.concat([k, 1+k, 2+k], axis=-1)

  k = tf.ones((1, 1, 1, 3))
  pt_kernel = tf.concat([k, 1+k], axis=-2)

  bias = tf.zeros((2,))

  model.layers[1].set_weights([dw_kernel, pt_kernel, bias])

  actual_output = model.predict(inputs).astype(np.float16)

  predicted = np.array(
      [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.],
         [0., 0.], [0., 0.]],
        [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.],
         [0., 0.], [0., 0.]],
        [[6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.],
         [6., 12.], [6., 12.]],
        [[6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.],
         [6., 12.], [6., 12.], [6., 12.]],
        [[12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.],
         [12., 24.], [12., 24.], [12., 24.]],
        [[12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.],
         [12., 24.], [12., 24.], [12., 24.]],
        [[18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.],
         [18., 36.], [18., 36.], [18., 36.]],
        [[18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.],
         [18., 36.], [18., 36.], [18., 36.]]]])

  assert_equal(output_shape[1:], (8, 8, 2))
  assert_equal(len(ws), 3)

  # Test if the depthwise conv kernel shape is correct.
  assert_equal(ws[0].shape, (2, 2, 1, 3))

  # Test if the pointwise conv kernel shape is correct.
  assert_equal(ws[1].shape, (1, 1, 2, 3))

  # Test if the bias shape is correct.
  assert_equal(ws[2].shape, (2,))

  # Test if overall output is correct.
  assert_equal(actual_output, predicted)


def test_quantization_in_separable_conv2d_transpose():
  # Test if quantization is applied correctly.

  # Create model with quantization.
  model = create_quantized_model()

  x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])
  inputs = np.concatenate([x, x, x], axis=-1)
  inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32)

  k = tf.ones((2, 2, 1, 1))
  dw_kernel = tf.concat([k, 1+k, 2+k], axis=-1)

  k = tf.ones((1, 1, 1, 3))
  pt_kernel = tf.concat([k, 1+k], axis=-2)

  bias = tf.ones((2,))

  model.layers[1].set_weights([dw_kernel, pt_kernel, bias])

  actual_output = model.predict(inputs).astype(np.float16)

  qs = model.layers[1].get_quantizers()
  assert_equal(len(qs), 5)
  assert_equal(str(qs[0]), "quantized_bits(1,0,1,alpha=1.0)")
  assert_equal(str(qs[1]), "quantized_bits(1,0,1,alpha=1.0)")
  assert_equal(str(qs[2]), "quantized_bits(2,2,1,alpha=1.0)")
  assert_equal(str(qs[3]), "quantized_bits(10,6,1)")
  assert_equal(str(qs[4]), "quantized_bits(5,3,1)")

  expected = np.array(
      [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
        [[3., 3.], [6., 6.], [6., 6.], [6., 6.]],
        [[7.5, 7.5], [7.5, 7.5], [7.5, 7.5], [7.5, 7.5]],
        [[7.5, 7.5], [7.5, 7.5], [7.5, 7.5], [7.5, 7.5]]]]
  )

  assert_equal(actual_output, expected)


def test_save_and_load_model():
  # Test if the model can be loaded from a saved model.
  model = create_quantized_model()

  fd, fname = tempfile.mkstemp(".hdf5")
  model.save(fname)

  custom_object = {
      "QSeparableConv2DTranspose": QSeparableConv2DTranspose,
  }

  model_loaded = tf.keras.models.load_model(
      fname, custom_objects=custom_object)

  # Clean the h5 file after loading the model
  os.close(fd)
  os.remove(fname)

  model_weights = model.layers[1].weights
  loaded_model_weights = model_loaded.layers[1].weights

  assert_equal(len(model_weights), len(loaded_model_weights))
  for i, model_weight in enumerate(model_weights):
    assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy())


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qtools_model_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for various model architectures."""

import json

import numpy as np
import pytest
import tensorflow.keras as keras
import tensorflow as tf

from qkeras import QActivation
from qkeras import QAdaptiveActivation
from qkeras import QBatchNormalization
from qkeras import QConv2D
from qkeras import QDepthwiseConv2D
from qkeras import QDense
from qkeras import quantizers
from qkeras.qtools import interface
from qkeras.qtools import qgraph
from qkeras.qtools import run_qtools
from qkeras.qtools import settings as qtools_settings
from qkeras.qtools.quantized_operators import divider_factory
from qkeras.qtools import generate_layer_data_type_map
from qkeras.utils import model_save_quantized_weights
from qkeras.qtools.quantized_operators import adder_impl
from qkeras.qtools.quantized_operators import quantizer_impl
from qkeras.qtools.DnC import divide_and_conquer


def qdense_model_fork():
  x = x_in = keras.layers.Input((23,), name="input")
  x = QDense(
      10,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizers.quantized_po2(3, 1),
      name="qdense_0")(x)
  x = QDense(
      20,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizers.quantized_relu(6, 2),
      name="qdense_1")(x)
  x = QActivation("quantized_relu(4)", name="QA_2")(x)
  x_1 = QDense(
      30,
      kernel_quantizer=quantizers.binary(),
      bias_quantizer=quantizers.binary(),
      name="qdense_3")(x)
  x_2 = QActivation("quantized_relu(6,2)", name="QA_3")(x)

  model = keras.Model(
      inputs=[x_in], outputs=[x_1, x_2,])
  return model


def qconv_model():
  x = x_in = keras.layers.Input((23, 23, 1), name="input")
  x = QActivation("quantized_relu(4)", name="QA_0")(x)
  x = QConv2D(
      16, 2, 2,
      kernel_quantizer=quantizers.binary(),
      bias_quantizer=quantizers.ternary(),
      name="qconv2d_1")(x)
  x = QConv2D(
      8, 2, 2,
      kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
      bias_quantizer=quantizers.quantized_bits(4, 0, 1),
      activation=quantizers.quantized_relu(6, 2),
      name="qconv2D_2")(x)
  x = QConv2D(
      2, 2, 2,
      kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
      bias_quantizer=quantizers.quantized_bits(4, 0, 1),
      activation=quantizers.quantized_relu(6, 2),
      name="qconv2d_3")(x)
  x = QActivation("quantized_bits(6, 0, 1)", name="QA_4")(x)

  model = keras.Model(
      inputs=[x_in], outputs=[x])
  return model


def po2_qbits_model():
  x = x_in = keras.layers.Input((23, 23, 1), name="input")
  x = QActivation("quantized_relu_po2(3, 2)", name="QA_0")(x)
  x = QConv2D(
      16, 2, 2,
      kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
      bias_quantizer=quantizers.quantized_bits(4, 0, 1),
      name="qconv2d_1")(x)

  model = keras.Model(inputs=[x_in], outputs=[x])
  return model


def float_po2_model():
  x = x_in = keras.layers.Input((23, 23, 1), name="input")
  x = QConv2D(
      16, 2, 2,
      kernel_quantizer=quantizers.quantized_po2(5, 0),
      bias_quantizer=quantizers.quantized_po2(5, 0),
      name="qconv2d_1")(x)
  x = QActivation("quantized_relu_po2(3, 2)", name="QA_0")(x)
  x = QConv2D(
      10, 2, 2,
      kernel_quantizer=quantizers.quantized_bits(5, 2, 1),
      bias_quantizer=quantizers.quantized_bits(5, 2, 1),
      name="qconv2d_0")(x)
  model = keras.Model(
      inputs=[x_in], outputs=[x])

  for layer in model.layers:
    print(layer)
    print(layer.output_shape)
  return model


def qbn_model(
    act="binary(use_01=0)",
    gamma=quantizers.quantized_relu_po2(4, 2),
    variance=quantizers.quantized_relu_po2(4, 2),
    beta=None, mean=None):

  x = x_in = keras.layers.Input((23, 23, 1), name="input")
  x = QActivation(act, name="QA_0")(x)
  x = QBatchNormalization(
      gamma_quantizer=gamma,
      variance_quantizer=variance,
      beta_quantizer=beta,
      mean_quantizer=mean,
      gamma_range=8, beta_range=4, name="qbn_1")(x)

  model = keras.Model(
      inputs=[x_in], outputs=[x])

  return model


def qbn_model_inference():

  x = x_in = keras.layers.Input((23, 23, 1), name="input")
  x = QConv2D(
      4, 2, 23,
      kernel_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0),
      bias_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0),
      use_bias=False,
      name="qconv2d_1")(x)
  x = QBatchNormalization(
      mean_quantizer=quantizers.quantized_bits(6, 0, 1),
      gamma_quantizer=None,
      variance_quantizer=None,
      beta_quantizer=quantizers.quantized_bits(6, 0, 1),
      inverse_quantizer=quantizers.quantized_bits(16, 0, 1),
      scale=False,
      center=False,
      gamma_range=8, beta_range=4, name="qbn_2")(x)
  x = QActivation(activation="quantized_bits(5, 0, 1)", name="act")(x)
  x = QConv2D(
      2, 1, 1,
      kernel_quantizer=quantizers.quantized_bits(3, 0),
      bias_quantizer=quantizers.quantized_bits(3, 2),
      name="qconv2d_3")(x)
  # Add an extra QNormalization here to test auto_po2 type of inverse_quantizer
  # in batchnorm fusing.
  x = QBatchNormalization(
      mean_quantizer=quantizers.quantized_bits(6, 0, 1),
      gamma_quantizer=None,
      variance_quantizer=None,
      beta_quantizer=quantizers.quantized_bits(6, 0, 1),
      inverse_quantizer=quantizers.quantized_bits(8, 0, 1, alpha="auto_po2"),
      scale=False,
      center=False,
      gamma_range=8, beta_range=4, name="qbn_4")(x)

  model = keras.Model(inputs=[x_in], outputs=[x])
  model.compile(loss="mse", run_eagerly=True)
  model.get_layer("qconv2d_1").set_weights([
      np.array([[[[0.11, -0.5, -0.14, -0.41]], [[-0.4, 0.9, 0.6, -1.]]],
                [[[-0.35, 1., 0.54, 0.17]], [[0.39, -0.2, -0.41, -0.7]]]])
  ])
  model.get_layer("qbn_2").set_weights(
      [np.array([0., 0, 0, 0.]),
       np.array([1, 1, 1, 1])])
  model.get_layer("qconv2d_3").set_weights([
      np.array([[[[1.2, -1.5], [10., 1.3], [-0.7, 1.2], [1.7, 1.5]]]]),
      np.array([0.7, 0.8])
  ])
  model.get_layer("qbn_4").set_weights(
      [np.array([0, 0]), np.array([0.3, 16.8])])

  hw_weight_dict = model_save_quantized_weights(model)
  return (hw_weight_dict, model)


def add_qmodel(quantizer1, quantizer2, quantizer3):

  # Layer that add a list of inputs.
  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).

  x1 = input1 = keras.layers.Input((16,), name="input_0")
  x1 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizer1, name="dense_0")(x1)

  x2 = input2 = keras.layers.Input(shape=(32,), name="input_1")
  x2 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizer2, name="dense_1")(x2)

  x3 = input3 = keras.layers.Input(shape=(64,), name="input_2")
  x3 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizer3, name="dense_2")(x3)

  x = keras.layers.add([x1, x2, x3], name="add")

  model = keras.Model(
      inputs=[input1, input2, input3], outputs=[x])

  return model


def multiply_qmodel():

  # element-wise multiply a list of inputs.
  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).
  x1 = input1 = keras.layers.Input((16,), name="input_0")
  x1 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizers.quantized_bits(4, 0, 1),
      name="dense_0")(x1)

  x2 = input2 = keras.layers.Input(shape=(32,), name="input_1")
  x2 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizers.quantized_bits(5, 0, 1),
      name="dense_1")(x2)

  x3 = input3 = keras.layers.Input(shape=(64,), name="input_2")
  x3 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizers.quantized_bits(6, 0, 1),
      name="dense_2")(x3)

  x = keras.layers.multiply([x1, x2, x3], name="multiply")
  model = keras.Model(
      inputs=[input1, input2, input3], outputs=[x])

  return model


def pooling_qmodel():

  # Average pooling and global average pooling operation for spatial data.
  x = input1 = keras.layers.Input((16, 16, 3), name="input")
  x = keras.layers.AveragePooling2D(pool_size=(2, 2), padding="valid",
                                    name="avg_pooling")(x)
  x = keras.layers.GlobalAveragePooling2D(name="global_avg_pooling")(x)

  model = keras.Model(inputs=[input1], outputs=[x])

  return model


def maximum_qmodel(quantizer1, quantizer2, quantizer3):

  # element-wise maximum/minimum/average of a list of inputs.
  # It takes as input a list of tensors, all of the same shape,
  # and returns a single tensor (also of the same shape).
  x1 = input1 = keras.layers.Input((16,), name="input_0")
  x1 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizer1, name="qdense_0")(x1)

  x2 = input2 = keras.layers.Input(shape=(32,), name="input_1")
  x2 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizer2, name="dense_1")(x2)

  x3 = input3 = keras.layers.Input(shape=(64,), name="input_2")
  x3 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      activation=quantizer3, name="dense_2")(x3)

  x = keras.layers.maximum([x1, x2, x3], name="maximum")
  model = keras.Model(
      inputs=[input1, input2, input3], outputs=[x])

  return model


def concatenate_qmodel(quantizer1, quantizer2, quantizer3):

  # Layer that concatenates a list of inputs.
  # It takes as input a list of tensors, all of the same shape except
  # for the concatenation axis, and returns a single tensor,
  # the concatenation of all inputs..

  x1 = input1 = keras.layers.Input((16, 16, 1), name="input_0")
  x1 = QConv2D(
      16, 2, 2,
      kernel_quantizer=quantizer1,
      bias_quantizer=quantizer1,
      name="conv2d_0")(x1)

  x2 = input2 = keras.layers.Input((16, 16, 1), name="input_1")
  x2 = QConv2D(
      32, 2, 2,
      kernel_quantizer=quantizer2,
      bias_quantizer=quantizer2,
      name="conv2d_1")(x2)

  x3 = input3 = keras.layers.Input((16, 16, 1), name="input_2")
  x3 = QConv2D(
      64, 2, 2,
      kernel_quantizer=quantizer3,
      bias_quantizer=quantizer3,
      name="conv2d_2")(x3)

  x = keras.layers.concatenate([x1, x2, x3], axis=-1, name="concatenate")
  model = keras.Model(inputs=[input1, input2, input3], outputs=[x])

  return model


def run(model, input_quantizers, is_inference=False,
        verbose=False, hw_weight_dict=None):
  (graph, source_quantizer_list) = qgraph.CreateGraph(
      model, input_quantizers)
  # qgraph.PrintGraph(graph)
  qgraph.GraphPropagateActivationsToEdges(graph)

  layer_map = generate_layer_data_type_map.generate_layer_data_type_map(
      graph=graph, source_quantizer_list=source_quantizer_list,
      is_inference=is_inference, hw_weight_dict=hw_weight_dict)

  # interface.print_layer_data_type_map(dict)
  output_dict = interface.map_to_json(layer_map)

  if verbose:
    dict_to_json = json.dumps(output_dict, indent=4)
    print(dict_to_json)

  return output_dict


def test_wrong_input_quantizers():
  input_quantizers = [
      quantizers.quantized_bits(4, 0, 1),
      quantizers.quantized_bits(5, 0, 1),
      quantizers.quantized_bits(6, 0, 1)
  ]
  # INPUT_QUANTIZERS = None
  x1 = input1 = keras.layers.Input((16,), name="input_0")
  x1 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      name="dense_0")(x1)
  x2 = input2 = keras.layers.Input(shape=(32,), name="input_1")
  x2 = QDense(
      8,
      kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
      bias_quantizer=quantizers.quantized_bits(5, 0, 1),
      name="dense_1")(x2)
  x = keras.layers.add([x1, x2], name="add")

  model = keras.Model(
      inputs=[input1, input2], outputs=[x])

  with pytest.raises(qgraph.WrongInputQuantizerError):
    run(model, input_quantizers)


def test_qbn_inference():
  input_quantizers = [quantizers.quantized_bits(4, 0, 1)]
  (hw_weight_dict, model) = qbn_model_inference()

  dtype_dict = run(model, input_quantizers, is_inference=True,
                   hw_weight_dict=hw_weight_dict)
  multiplier = dtype_dict["qconv2d_1"]["multiplier"]
  accumulator = dtype_dict["qconv2d_1"]["accumulator"]
  output = dtype_dict["qconv2d_1"]["output_quantizer"]
  fused_accumulator = dtype_dict["qconv2d_1"]["fused_accumulator"]

  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 7
  assert multiplier["int_bits"] == 1
  assert multiplier["is_signed"] == 1
  assert multiplier["op_type"] == "mul"

  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 9
  assert accumulator["int_bits"] == 3
  assert accumulator["is_signed"] == 1
  assert accumulator["op_type"] == "add"

  assert fused_accumulator["quantizer_type"] == "quantized_bits"
  assert fused_accumulator["bits"] == 25
  assert fused_accumulator["int_bits"] == 4
  assert accumulator["is_signed"] == 1
  assert fused_accumulator["op_type"] == "add"

  # Tests auto_po2 type of quantizer in conv2d and batchnorm fusing. Here
  # we set the layer weights in a way that scale value would be !=1 so that
  # we need to check bits and int_bits are adjusted properly to incorporate
  # the scale value.
  multiplier = dtype_dict["qconv2d_3"]["multiplier"]
  accumulator = dtype_dict["qconv2d_3"]["accumulator"]
  output = dtype_dict["qconv2d_3"]["output_quantizer"]
  fused_accumulator = dtype_dict["qconv2d_3"]["fused_accumulator"]

  # w_bits = 3, w_intbits =0
  # x_bits = 5, x_intbits =0
  # weight scale = [[[[16.  2.]]]]
  # before scale adjustment: m_bits=(3-1)+(5-1)+1=7   m_intbits = 0
  # after scale adjustment: m_bits=7+(log16-log2)=10  m_intbits = 0+log16=4
  # Note: dict here added sign bit to the intbit to match hardware format.
  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 10
  assert multiplier["int_bits"] == 5
  assert multiplier["is_signed"] == 1
  assert multiplier["op_type"] == "mul"

  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 13
  assert accumulator["int_bits"] == 8
  assert accumulator["is_signed"] == 1
  assert accumulator["op_type"] == "add"

  # Calculates fused_accumulator according to fused_bn_factory/FusedBNFactory.
  # For example, wiht inv_quantizer scale:[2. 2.] we have here,
  # multiplier_x before adjust - bits:19 int_bits:6
  # multiplier_x after adjust - bits:19 int_bits:7
  assert fused_accumulator["quantizer_type"] == "quantized_bits"
  assert fused_accumulator["bits"] == 20
  assert fused_accumulator["int_bits"] == 9
  assert accumulator["is_signed"] == 1
  assert fused_accumulator["op_type"] == "add"


def test_invalid_denominator_qbn():
  input_quantizers = None
  act = "binary(use_01=0)"
  gamma = quantizers.ternary()
  variance = gamma
  model = qbn_model(
      act=act, gamma=gamma, variance=variance,
      beta=None, mean=None)
  with pytest.raises(divider_factory.UnacceptedQuantizerError):
    run(model, input_quantizers)


def test_conv2d():
  input_quantizers = None

  act = "quantized_bits(6, 0, 1)"
  weight = quantizers.quantized_relu_po2(4, 2)
  x = x_in = keras.layers.Input((23, 23, 1), name="input")
  x = QActivation(act, name="QA_0")(x)
  x = QConv2D(
      16, 2, 2,
      kernel_quantizer=weight,
      bias_quantizer=weight,
      name="qconv2d_1")(x)

  model = keras.Model(inputs=[x_in], outputs=[x])

  dtype_dict = run(model, input_quantizers)
  multiplier = dtype_dict["qconv2d_1"]["multiplier"]
  accumulator = dtype_dict["qconv2d_1"]["accumulator"]
  op_count = dtype_dict["qconv2d_1"]["operation_count"]

  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 15
  assert multiplier["int_bits"] == 2
  assert multiplier["is_signed"] == 1
  assert multiplier["op_type"] == "shifter"
  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 18
  assert accumulator["int_bits"] == 5
  assert accumulator["is_signed"] == 1
  assert accumulator["op_type"] == "add"
  assert op_count == 7744


def test_qdense_model_fork():
  input_quantizers = [quantizers.quantized_bits(4, 0, 1)]
  model = qdense_model_fork()
  dtype_dict = run(model, input_quantizers)

  multiplier = dtype_dict["qdense_3"]["multiplier"]
  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 5
  assert multiplier["int_bits"] == 1
  assert multiplier["is_signed"] == 1
  assert multiplier["op_type"] == "mux"

  accumulator = dtype_dict["qdense_3"]["accumulator"]
  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 11
  assert accumulator["int_bits"] == 7
  assert accumulator["is_signed"] == 1
  assert accumulator["op_type"] == "add"


def test_util_layers():
  input_quantizers = None  # quantizers.quantized_bits(4, 0, 1)

  act = "quantized_bits(6, 0, 1)"
  x = x_in = keras.layers.Input((24, 24, 1), name="input")
  x = QActivation(act, name="QA_0")(x)
  x = keras.layers.Reshape((12 * 12, 4, 1), name="reshape_1")(x)
  x = keras.layers.MaxPooling2D(
      pool_size=(2, 2), name="maxpooling_2")(x)
  x = keras.layers.Flatten(name="flatten_3")(x)
  x = QDense(
      30,
      kernel_quantizer=quantizers.binary(use_01=1),
      bias_quantizer=quantizers.binary(use_01=1),
      activation=quantizers.quantized_po2(3, 2),
      name="qdense_4")(x)

  model = keras.Model(inputs=[x_in], outputs=[x])
  dtype_dict = run(model, input_quantizers)

  multiplier = dtype_dict["qdense_4"]["multiplier"]
  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 6
  assert multiplier["int_bits"] == 1
  assert multiplier["is_signed"] == 1
  assert multiplier["op_type"] == "and"

  accumulator = dtype_dict["qdense_4"]["accumulator"]
  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 15
  assert accumulator["int_bits"] == 10
  assert accumulator["is_signed"] == 1
  assert accumulator["op_type"] == "add"

  output = dtype_dict["qdense_4"]["output_quantizer"]
  assert output["quantizer_type"] == "quantized_po2"
  assert output["bits"] == 3
  assert output["is_signed"] == 1
  assert output["max_value"] == 2


def test_merge_layers():
  input_quantizers = [
      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1),
      quantizers.quantized_bits(6, 0, 1)]
  model = add_qmodel(
      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0),
      quantizers.quantized_bits(6, 0, 1))
  dtype_dict = run(model, input_quantizers)
  merge_quantizer = dtype_dict["add"]["Add_quantizer"]
  assert merge_quantizer["quantizer_type"] == "quantized_bits"
  assert merge_quantizer["bits"] == 7
  assert merge_quantizer["int_bits"] == 2
  assert merge_quantizer["is_signed"] == 1

  model = multiply_qmodel()
  dtype_dict = run(model, input_quantizers)
  merge_quantizer = dtype_dict["multiply"]["Multiply_quantizer"]
  assert merge_quantizer["quantizer_type"] == "quantized_bits"
  assert merge_quantizer["bits"] == 13
  assert merge_quantizer["int_bits"] == 1
  assert merge_quantizer["is_signed"] == 1
  assert merge_quantizer["op_type"] == "mul"

  model = maximum_qmodel(
      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0),
      quantizers.quantized_bits(6, 0, 1))
  dtype_dict = run(model, input_quantizers)
  merge_quantizer = dtype_dict["maximum"]["Maximum_quantizer"]
  assert merge_quantizer["quantizer_type"] == "quantized_bits"
  assert merge_quantizer["bits"] == 6
  assert merge_quantizer["int_bits"] == 1
  assert merge_quantizer["is_signed"] == 1

  model = concatenate_qmodel(
      quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0),
      quantizers.quantized_bits(6, 0, 1))
  dtype_dict = run(model, input_quantizers)
  merge_quantizer = dtype_dict["concatenate"]["Concatenate_quantizer"]
  assert merge_quantizer["quantizer_type"] == "quantized_bits"
  assert merge_quantizer["bits"] == 14
  assert merge_quantizer["int_bits"] == 4
  assert merge_quantizer["is_signed"] == 1


def test_pooling():
  input_quantizers = [quantizers.quantized_bits(8, 0, 1)]
  model = pooling_qmodel()
  dtype_dict = run(model, input_quantizers)

  accumulator = dtype_dict["avg_pooling"]["pool_sum_accumulator"]
  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 10
  assert accumulator["int_bits"] == 3

  accumulator = dtype_dict["global_avg_pooling"]["pool_sum_accumulator"]
  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 16
  assert accumulator["int_bits"] == 9


def test_qenergy():
  x = x_in = keras.layers.Input((784,), name="input")
  x = QDense(
      300,
      kernel_quantizer=quantizers.binary(),
      bias_quantizer=quantizers.binary(),
      name="d0")(x)
  x = QActivation("quantized_relu(4,0)", name="d0_qr4")(x)
  x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
             bias_quantizer=quantizers.quantized_bits(4, 0, 1),
             name="d1")(x)
  x = QAdaptiveActivation("quantized_relu", 4, name="d1_qr4")(x)
  x = QDense(
      10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
      bias_quantizer=quantizers.quantized_bits(4, 0, 1),
      name="d2")(x)
  x = keras.layers.Activation("softmax", name="softmax")(x)

  model = keras.Model(inputs=[x_in], outputs=[x])
  # print(model.summary())

  reference_internal = "int8"
  reference_accumulator = "int32"

  # get reference energy cost
  q = run_qtools.QTools(
      model, process="horowitz",
      source_quantizers=reference_internal,
      is_inference=False, weights_path=None,
      keras_quantizer=reference_internal,
      keras_accumulator=reference_accumulator,
      for_reference=True)

  ref_energy_dict = q.pe(
      weights_on_memory="sram",
      activations_on_memory="sram",
      min_sram_size=8*16*1024*1024,
      rd_wr_on_io=False)
  reference_size = q.extract_energy_sum(
      qtools_settings.cfg.include_energy, ref_energy_dict)

  # get trial energy cost
  q = run_qtools.QTools(
      model, process="horowitz",
      source_quantizers=reference_internal,
      is_inference=False, weights_path=None,
      keras_quantizer=reference_internal,
      keras_accumulator=reference_accumulator,
      for_reference=False)
  trial_energy_dict = q.pe(
      weights_on_memory="sram",
      activations_on_memory="sram",
      min_sram_size=8*16*1024*1024,
      rd_wr_on_io=False)
  trial_size = q.extract_energy_sum(
      qtools_settings.cfg.include_energy, trial_energy_dict)

  # Reference energy number is now updated with keras_accumulator as
  # output quantizer
  tmp = ref_energy_dict["d0"]["energy"]
  assert tmp["inputs"] == pytest.approx(372.77, abs=0.1)
  assert tmp["outputs"] == pytest.approx(570.57, abs=0.1)
  assert tmp["parameters"] == pytest.approx(111975.96, abs=0.1)
  assert tmp["op_cost"] == pytest.approx(70560.0, abs=0.1)

  tmp = ref_energy_dict["d1"]["energy"]
  assert tmp["inputs"] == pytest.approx(570.57, abs=0.1)
  assert tmp["outputs"] == pytest.approx(190.19, abs=0.1)
  assert tmp["parameters"] == pytest.approx(14313.66, abs=0.1)
  assert tmp["op_cost"] == pytest.approx(26500.0, abs=0.1)

  tmp = ref_energy_dict["d2"]["energy"]
  assert tmp["inputs"] == pytest.approx(190.19, abs=0.1)
  assert tmp["outputs"] == pytest.approx(19.02, abs=0.1)
  assert tmp["parameters"] == pytest.approx(483.08, abs=0.1)
  assert tmp["op_cost"] == pytest.approx(883.33, abs=0.1)

  # Trial
  tmp = trial_energy_dict["d0"]["energy"]
  assert tmp["inputs"] == pytest.approx(372.77, abs=0.1)
  assert tmp["outputs"] == pytest.approx(342.34, abs=0.1)
  assert tmp["parameters"] == pytest.approx(13997.95, abs=0.1)
  assert tmp["op_cost"] == pytest.approx(15729.0, abs=0.1)

  tmp = trial_energy_dict["d1"]["energy"]
  assert tmp["inputs"] == pytest.approx(72.27, abs=0.1)
  assert tmp["outputs"] == pytest.approx(110.31, abs=0.1)
  assert tmp["parameters"] == pytest.approx(7158.73, abs=0.1)
  assert tmp["op_cost"] == pytest.approx(3250.0, abs=0.1)

  tmp = trial_energy_dict["d2"]["energy"]
  assert tmp["inputs"] == pytest.approx(26.63, abs=0.1)
  assert tmp["outputs"] == pytest.approx(11.41, abs=0.1)
  assert tmp["parameters"] == pytest.approx(243.44, abs=0.1)
  assert tmp["op_cost"] == pytest.approx(102.08, abs=0.1)

  # print(ref_energy_dict)
  # print(trial_energy_dict)
  assert int(reference_size) == 226629
  assert int(trial_size) == 41070


def test_quntized_reference_energy_same_as_floating_trial():
  # Test if reference energy from quantized model and floating model is the
  # same
  def get_model(quantize=False):
    x1 = input1 = keras.layers.Input((16, 16, 3), name="input_0")
    if quantize:
      x1 = QConv2D(
          16, 2, 2,
          kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
          bias_quantizer=quantizers.quantized_bits(5, 0, 1),
          name="conv_0")(x1)
    else:
      x1 = keras.layers.Conv2D(16, 2, 2, name="conv_0")(x1)

    x2 = input2 = keras.layers.Input(shape=(16, 16, 3), name="input_1")
    if quantize:
      x2 = QConv2D(
          16, 2, 2,
          kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
          bias_quantizer=quantizers.quantized_bits(5, 0, 1),
          name="conv_1")(x2)
    else:
      x2 = keras.layers.Conv2D(16, 2, 2, name="conv_1")(x2)

    x = keras.layers.add([x1, x2], name="add")
    if quantize:
      x = QActivation(activation="quantized_relu(8, 2)", name="relu")(x)
    else:
      x = keras.layers.Activation("relu", name="relu")(x)

    if quantize:
      x = QConv2D(
          2, 2, 2,
          kernel_quantizer=quantizers.quantized_bits(5, 0, 1),
          bias_quantizer=quantizers.quantized_bits(5, 0, 1),
          name="conv_2")(x)
    else:
      x = keras.layers.Conv2D(2, 2, 2, name="conv_2")(x)

    model = keras.Model(inputs=[input1, input2], outputs=[x])
    return model

  def get_qenergy(model, qenergy_config, for_reference):
    q = run_qtools.QTools(
        model, process=qenergy_config["process"],
        source_quantizers=qenergy_config["reference_internal"],
        is_inference=qenergy_config["trained_model"],
        weights_path=None,
        keras_quantizer=qenergy_config["reference_internal"],
        keras_accumulator=qenergy_config["reference_accumulator"],
        for_reference=for_reference)

    # caculate energy of the derived data type map.
    energy_dict = q.pe(
        weights_on_memory=qenergy_config["parameters_on_memory"],
        activations_on_memory=qenergy_config["activations_on_memory"],
        min_sram_size=qenergy_config["min_sram_size"],
        rd_wr_on_io=qenergy_config["rd_wr_on_io"])

    total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy,
                                        energy_dict)

    return q, total_energy

  qenergy_config = {
      "trained_model": True,
      "delta_p": 8.0,
      "delta_n": 8.0,
      "rate": 2.0,
      "stress": 1.0,
      "process": "horowitz",
      "parameters_on_memory": "sram",
      "activations_on_memory": "sram",
      "rd_wr_on_io": False,
      "min_sram_size": 0,
      "source_quantizers": ["quantizers.quantized_bits(8, 0, 1)"],
      "reference_internal": "int8",
      "reference_accumulator": "int32"
  }

  float_model = get_model(quantize=False)
  quantized_model = get_model(quantize=True)

  _, float_reference_energy = get_qenergy(
      float_model, qenergy_config, for_reference=False)
  _, float_trial_energy = get_qenergy(
      float_model, qenergy_config, for_reference=True)
  _, quantized_reference_energy = get_qenergy(
      quantized_model, qenergy_config, for_reference=True)

  assert float_reference_energy == quantized_reference_energy
  assert float_reference_energy == float_trial_energy


def test_auto_po2():
  def gen_model(img_shape):
    img_input = x = keras.Input(shape=img_shape)
    x = QConv2D(
        filters=5, kernel_size=4, strides=4,
        kernel_quantizer=quantizers.quantized_bits(8, 3, alpha="auto_po2"),
        bias_quantizer=quantizers.quantized_bits(8, 3),
        name="conv")(x)
    x = QActivation(activation=quantizers.quantized_relu(4, 0), name="act")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    x = QDense(5,
               kernel_quantizer=quantizers.quantized_bits(
                   8, 0, alpha="auto_po2"),
               bias_quantizer=quantizers.quantized_bits(8, 3),
               name="dense")(x)
    model = keras.Model(inputs=img_input, outputs=[x])
    return model

  model = gen_model((32, 32, 3,))
  model.compile(loss="mse", run_eagerly=True)
  model.layers[1].quantizers[0].scale = tf.constant(
      [[[[0.0625, 0.0625, 0.0625, 0.0625, 0.03125]]]])
  model.layers[4].quantizers[0].scale = tf.constant([[0.5, 0.5, 1, 0.5, 0.25]])
  input_quantizers = [
      quantizers.quantized_bits(bits=8, integer=0, keep_negative=False)
  ]
  dtype_dict = run(model, input_quantizers)

  # Original multiplier has 16 bits(16=8+8) and 3 int_bits
  multiplier = dtype_dict["conv"]["multiplier"]
  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 16
  assert multiplier["int_bits"] == 4

  # Original accumulator has 16+log2(4*4*3)+1 bits,
  # and 4+log2(4*4*3)+1 int_bits
  accumulator = dtype_dict["conv"]["accumulator"]
  assert accumulator["quantizer_type"] == "quantized_bits"
  assert accumulator["bits"] == 23
  assert accumulator["int_bits"] == 11

  # adjusting multiplier with auto_po2:
  # bits = max_fractional_bits + max_int_bits = bits + max_shift - min_shift
  # max_shift = log2(0.0625) = -4
  # min_shift=log2(0.03125) = -5
  # So adjusted multiplier bits=17, 1 bit bigger than original multiplier.
  # Modified multiplier int_bits = int_bits + max_shift = 3 - 4 = -1
  # Because in datatype map we add int_bits with 1 extra sign bit,
  # adjusted multiplier int_bits = 0, 4 bit smaller than original multiplier.
  # When we pass the adjusted multiplier to fused_accumulator, we
  # get bits = 23+1=24, and int_bits = 11-4=7
  fused_accumulator = dtype_dict["conv"]["fused_accumulator"]
  assert fused_accumulator["quantizer_type"] == "quantized_bits"
  assert fused_accumulator["bits"] == 24
  assert fused_accumulator["int_bits"] == 7

  multiplier = dtype_dict["dense"]["multiplier"]
  assert multiplier["quantizer_type"] == "quantized_bits"
  assert multiplier["bits"] == 12
  assert multiplier["int_bits"] == 1


def test_big_bias_quantizer():
  q1 = quantizer_impl.QuantizedBits()
  q1.convert_qkeras_quantizer(quantizers.quantized_bits(8, 3))
  q2 = quantizer_impl.QuantizedBits()
  q2.convert_qkeras_quantizer(quantizers.quantized_bits(16, 4))
  r = adder_impl.FixedPointAdder(q1, q2)

  # int_bits = max(q1.int_bits, q2.int_bits) + 1
  # bits = int_bits + sign_bit + max(q1_fraction_bit, q2_fraction bit)
  assert r.output.bits == 17
  assert r.output.int_bits == 5


def test_qdepthwiseconv2d():
  x = x_in = keras.layers.Input((64, 64, 3), name="input")
  x = QDepthwiseConv2D(
      kernel_size=(1, 7),
      depthwise_quantizer=quantizers.quantized_bits(8, 0, 1, alpha=1.0),
      bias_quantizer=quantizers.quantized_bits(12, 6, 1, alpha=1.0),
      name="dw_conv")(x)
  x = QConv2D(
      filters=16,
      kernel_size=(1, 1),
      bias_quantizer=quantizers.quantized_bits(12, 4, 1, alpha=1.0),
      kernel_quantizer=quantizers.quantized_bits(4,0, 1, alpha=1.0),
      name="pw_conv")(x)

  model = keras.Model(inputs=[x_in], outputs=[x])

  input_quantizers = [quantizers.quantized_bits(8, 0, 1)]
  dtype_dict = run(model, input_quantizers)

  # multiplier_int_bits = 0(x_int_bits) + 0(w_int_bits) = 0 (excluding sign_bit)
  # multiplier_fractional_bits = 7(x_fractional) + 7(w_fractional) = 14
  # multiplier_bits = 0 + 14 + sign_bit = 15
  assert dtype_dict["dw_conv"]["multiplier"]["bits"] == 15
  assert dtype_dict["dw_conv"]["multiplier"]["int_bits"] == 1
  # accumulator_int_bits = max(bias_int_bits, log7 + 0) + 1 = 7
  # accumulator_fractional_bits = max(bias_fractional, 14) = 14
  # accumulator_bits = int_bits + fractional_bits + sign_bit = 22
  assert dtype_dict["dw_conv"]["accumulator"]["bits"] == 22
  assert dtype_dict["dw_conv"]["accumulator"]["int_bits"] == 8

  assert dtype_dict["pw_conv"]["multiplier"]["bits"] == 25
  assert dtype_dict["pw_conv"]["multiplier"]["int_bits"] == 8
  assert dtype_dict["pw_conv"]["accumulator"]["bits"] == 28
  assert dtype_dict["pw_conv"]["accumulator"]["int_bits"] == 11


def test_divide_and_conquer_sequential_conv2d():
  # These following values are verified manually to be globally optimal.

  # The test has two purposes:
  # 1) check if the code runs ok;
  # 2) for a simple conv2d model, the output is as expected.

  # We will need to add more tests with more complex graph architecture
  # in the future as our solution grows.

  xin = x = tf.keras.layers.Input(shape=(16, 16, 1), name="input_layer")
  x = QConv2D(
      kernel_size=3,
      filters=3,
      use_bias=False,
      kernel_quantizer=quantizers.quantized_bits(4, 0, alpha=1.0),
      name="conv_1",
  )(x)
  x = QConv2D(
      kernel_size=3,
      filters=5,
      use_bias=False,
      kernel_quantizer=quantizers.quantized_bits(4, 0, alpha=1.0),
      name="conv_2",
  )(x)

  # Create a model
  model = tf.keras.Model(inputs=xin, outputs=x)

  # Test if the flow works perperly. In the future we will construct more
  # detailed tests regarding cost once the cost design matures.
  assert divide_and_conquer.estimate_model_cost(
      model,
      input_quantizer_bits=8,
      target_OutElementPerClk=10,
      target_out_throughput=1.0,
      compute_to_memory_max_ratio=1,
      memory_to_unroll_max_ratio=1,
      mode=divide_and_conquer.CostMode.ACE,
  )


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/qtools_util_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for qtools_util module."""

import json

import numpy as np
import pytest
import tensorflow.keras as keras
import tensorflow as tf

from qkeras import quantizers
from qkeras.qtools import qtools_util

from qkeras.qtools import quantized_operators
from qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module


@pytest.mark.parametrize(
    "w_bits, w_int_bits, weight_quantizer_scale_type, "
    "expected_bits_before_adjustment, expected_int_bits_before_adjustment, "
    "expected_bits_after_adjustment, expected_int_bits_after_adjustment",
    [
        (8, 0, "1.0", 11, 2, 11, 2),
        (4, 2, "auto_po2", 7, 4, 10, 5),
        (4, 0, "post_training_scale", 7, 2, 10, 5),
    ],
)
def test_adjust_multiplier_for_auto_po2(
    w_bits, w_int_bits, weight_quantizer_scale_type,
    expected_bits_before_adjustment, expected_int_bits_before_adjustment,
    expected_bits_after_adjustment, expected_int_bits_after_adjustment):
  """Test adjust_multiplier_for_auto_po2 with auto_po2 weight quantizer."""

  multiplier_factory = quantized_operators.MultiplierFactory()
  quantizer_factory = quantizer_factory_module.QuantizerFactory()

  qkeras_input_quantizer = quantizers.quantized_bits(4, 2, 1)

  # Generate the weight quantizer.
  if weight_quantizer_scale_type in ["auto_po2", "post_training_scale"]:
    # Compute the scale for auto_po2 quantizer.
    qkeras_weight_quantizer = quantizers.quantized_bits(
        bits=w_bits, integer=w_int_bits, keep_negative=True,
        symmetric=True, alpha="auto_po2")
    weight_arr = np.array([1.07, -1.7, 3.06, 1.93, 0.37, -2.43, 6.3, -2.9]
                          ).reshape((2, 4))
    qkeras_weight_quantizer(weight_arr)

    if weight_quantizer_scale_type == "post_training_scale":
      # Set the post_training_scale as fixed scale.
      auto_po2_scale = qkeras_weight_quantizer.scale.numpy()
      qkeras_weight_quantizer = quantizers.quantized_bits(
          bits=w_bits, integer=w_int_bits, alpha="auto_po2",
          post_training_scale=auto_po2_scale)
  else:
    qkeras_weight_quantizer = quantizers.quantized_bits(w_bits, w_int_bits)

  input_quantizer = quantizer_factory.make_quantizer(
      qkeras_input_quantizer)
  weight_quantizer = quantizer_factory.make_quantizer(
      qkeras_weight_quantizer)

  multiplier = multiplier_factory.make_multiplier(
      weight_quantizer, input_quantizer)

  np.testing.assert_equal(multiplier.output.bits,
                          expected_bits_before_adjustment)
  np.testing.assert_equal(multiplier.output.int_bits,
                          expected_int_bits_before_adjustment)

  qtools_util.adjust_multiplier_for_auto_po2(
      multiplier, qkeras_weight_quantizer)
  print(f"after adjustment: {multiplier.output.bits}, {multiplier.output.int_bits}")
  np.testing.assert_equal(multiplier.output.bits,
                          expected_bits_after_adjustment)
  np.testing.assert_equal(multiplier.output.int_bits,
                          expected_int_bits_after_adjustment)


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/quantizer_impl_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for methods in quantizer_impl.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import pytest
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *

from qkeras import *
from qkeras.qtools.quantized_operators import quantizer_impl
from qkeras import quantizers
from numpy.testing import assert_equal


# pylint: disable=invalid-name
def test_QuantizedBits():
  qkeras_quantizer = quantizers.quantized_bits()
  qtools_quantizer = quantizer_impl.QuantizedBits()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      symmetric=qkeras_quantizer.symmetric, alpha=qkeras_quantizer.alpha,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      scale_axis=qkeras_quantizer.scale_axis,
      qnoise_factor=qkeras_quantizer.qnoise_factor)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_QuantizedBits_ElementsPerScale():
  """Tests quantized_bits with elements_per_scale."""
  def _get_min_max_po2_exponent(x):
    """Get min and max po2 exponent of x."""
    po2_x = K.log(x)/np.log(2.0)
    return (tf.math.reduce_min(po2_x).numpy(),
            tf.math.reduce_max(po2_x).numpy())

  qkeras_quantizer = quantizers.quantized_bits(
      alpha="auto_po2", elements_per_scale=[1, 1], scale_axis=[1, 2],
      min_po2_exponent=-3, max_po2_exponent=3)

  qtools_quantizer = quantizer_impl.QuantizedBits()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      symmetric=qkeras_quantizer.symmetric,
      alpha=qkeras_quantizer.alpha,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      scale_axis=qkeras_quantizer.scale_axis,
      qnoise_factor=qkeras_quantizer.qnoise_factor,
      elements_per_scale=qkeras_quantizer.elements_per_scale,
      min_po2_exponent=qkeras_quantizer.min_po2_exponent,
      max_po2_exponent=qkeras_quantizer.max_po2_exponent,
  )

  # for quantized_bits the scale is multiplied by the integer scale as well
  # the integer scale depends on the sign bit
  integer_po2_scale = new_quantizer.bits - new_quantizer.keep_negative

  # Test for input tensors of rank 3
  x_r3 = tf.random.uniform([1, 8, 8])
  new_quantizer(x_r3)
  x_r3_scale = new_quantizer.scale
  xq_r3_min_exp, xq_r3_max_exp = _get_min_max_po2_exponent(x_r3_scale)

  assert_equal(new_quantizer.scale.shape, [1, 8, 8])
  assert xq_r3_min_exp >= -3*integer_po2_scale
  assert xq_r3_max_exp <= 3*integer_po2_scale

  # Test for input tensors of rank 4
  x_r4 = tf.random.uniform([1, 2, 4, 8])
  new_quantizer(x_r4)
  x_r4_scale = new_quantizer.scale
  xq_r4_min_exp, xq_r4_max_exp = _get_min_max_po2_exponent(x_r4_scale)

  assert_equal(new_quantizer.scale.shape, [1, 2, 4, 1])
  assert xq_r4_min_exp >= -3*integer_po2_scale
  assert xq_r4_max_exp <= 3*integer_po2_scale


def test_QuantizedTanh():
  qkeras_quantizer = quantizers.quantized_tanh()
  qtools_quantizer = quantizer_impl.QuantizedTanh()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      symmetric=qkeras_quantizer.symmetric)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_QuantizedUlaw():
  qkeras_quantizer = quantizers.quantized_ulaw()
  qtools_quantizer = quantizer_impl.QuantizedUlaw()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      symmetric=qkeras_quantizer.symmetric,
      u=qkeras_quantizer.u)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_Binary():
  qkeras_quantizer = quantizers.binary()
  qtools_quantizer = quantizer_impl.Binary()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      alpha=qkeras_quantizer.alpha,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_StochasticBinary():
  qkeras_quantizer = quantizers.stochastic_binary()
  qtools_quantizer = quantizer_impl.StochasticBinary()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      alpha=qkeras_quantizer.alpha,
      temperature=qkeras_quantizer.temperature,
      use_real_sigmoid=qkeras_quantizer.use_real_sigmoid)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_Bernoulli():
  qkeras_quantizer = quantizers.bernoulli()
  qtools_quantizer = quantizer_impl.Bernoulli()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      alpha=qkeras_quantizer.alpha, temperature=qkeras_quantizer.temperature,
      use_real_sigmoid=qkeras_quantizer.use_real_sigmoid)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_QuantizedRelu():
  qkeras_quantizer = quantizers.quantized_relu()
  qtools_quantizer = quantizer_impl.QuantizedRelu()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      use_sigmoid=qkeras_quantizer.use_sigmoid,
      negative_slope=qkeras_quantizer.negative_slope,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      relu_upper_bound=qkeras_quantizer.relu_upper_bound,
      is_quantized_clip=qkeras_quantizer.is_quantized_clip,
      qnoise_factor=qkeras_quantizer.qnoise_factor)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_Ternary():

  qkeras_quantizer = quantizers.ternary()
  qtools_quantizer = quantizer_impl.Ternary()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      alpha=qkeras_quantizer.alpha, threshold=qkeras_quantizer.threshold,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      number_of_unrolls=qkeras_quantizer.number_of_unrolls)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_StochasticTernary():
  qkeras_quantizer = quantizers.stochastic_ternary()
  qtools_quantizer = quantizer_impl.StochasticTernary()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      alpha=qkeras_quantizer.alpha, threshold=qkeras_quantizer.threshold,
      temperature=qkeras_quantizer.temperature,
      use_real_sigmoid=qkeras_quantizer.use_real_sigmoid,
      number_of_unrolls=qkeras_quantizer.number_of_unrolls)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_PowerOfTwo():
  qkeras_quantizer = quantizers.quantized_po2()
  qtools_quantizer = quantizer_impl.PowerOfTwo(is_signed=True)
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      negative_slope=None,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      quadratic_approximation=qkeras_quantizer.quadratic_approximation)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_ReluPowerOfTwo():
  qkeras_quantizer = quantizers.quantized_relu_po2()
  qtools_quantizer = quantizer_impl.ReluPowerOfTwo()
  qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer)
  new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer(
      negative_slope=qkeras_quantizer.negative_slope,
      use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding,
      quadratic_approximation=qkeras_quantizer.quadratic_approximation)

  result = new_quantizer.__dict__
  for (key, val) in result.items():
    assert_equal(val, qkeras_quantizer.__dict__[key])


def test_GetScale_PerChannelScale():
  # Rank1 tensors
  x_r1 = tf.ones([4])
  q_r1 = tf.ones([4])
  scale_r1_pcs_true = quantizers._get_scale(
      "auto", x_r1, q_r1, scale_axis=None, per_channel_scale=True)
  scale_r1_pcs_false = quantizers._get_scale(
      "auto", x_r1, q_r1, scale_axis=None, per_channel_scale=False)
  assert_equal(tf.shape(scale_r1_pcs_true).numpy(), [4])
  assert_equal(tf.shape(scale_r1_pcs_false).numpy(), [1])

  # Rank2 tensors
  x_r2 = tf.ones([2, 4])
  q_r2 = tf.ones([2, 4])
  scale_r2_pcs_true = quantizers._get_scale(
      "auto", x_r2, q_r2, scale_axis=None, per_channel_scale=True)
  scale_r2_pcs_false = quantizers._get_scale(
      "auto", x_r2, q_r2, scale_axis=None, per_channel_scale=False)
  assert_equal(tf.shape(scale_r2_pcs_true).numpy(), [1, 4])
  assert_equal(tf.shape(scale_r2_pcs_false).numpy(), [1, 1])

  # Rank3 tensors
  x_r3 = tf.ones([3, 3, 4])
  q_r3 = tf.ones([3, 3, 4])
  scale_r3_pcs_true = quantizers._get_scale(
      "auto", x_r3, q_r3, scale_axis=None, per_channel_scale=True)
  scale_r3_pcs_false = quantizers._get_scale(
      "auto", x_r3, q_r3, scale_axis=None, per_channel_scale=False)
  assert_equal(tf.shape(scale_r3_pcs_true).numpy(), [1, 1, 4])
  assert_equal(tf.shape(scale_r3_pcs_false).numpy(), [1, 1, 1])

  # Rank4 tensors
  x_r4 = tf.ones([1, 1, 3, 4])
  q_r4 = tf.ones([1, 1, 3, 4])
  scale_r4_pcs_true = quantizers._get_scale(
      "auto", x_r4, q_r4, scale_axis=None, per_channel_scale=True)
  scale_r4_pcs_false = quantizers._get_scale(
      "auto", x_r4, q_r4, scale_axis=None, per_channel_scale=False)
  assert_equal(tf.shape(scale_r4_pcs_true).numpy(), [1, 1, 1, 4])
  assert_equal(tf.shape(scale_r4_pcs_false).numpy(), [1, 1, 1, 1])


def _get_num_unique_elements(input_tensor):
  return len(np.unique(input_tensor.numpy()))


def test_GetScale_ElementsPerScale_Scalar_ScaleAxis_EPS():
  # Test get_scale function when elements_per_scale and scale_axis have scalar
  # values and the input x and q tensors have rank 2
  x_r2 = tf.random.uniform([4, 8])
  q_r2 = tf.random.uniform([4, 8])
  scale_r2_eps_none_ua_none = quantizers._get_scale(
      "auto", x_r2, q_r2, elements_per_scale=None, scale_axis=None)
  scale_r2_eps_2_ua_0 = quantizers._get_scale(
      "auto", x_r2, q_r2, elements_per_scale=2, scale_axis=0)
  scale_r2_eps_2_ua_1 = quantizers._get_scale(
      "auto", x_r2, q_r2, elements_per_scale=2, scale_axis=1)

  assert_equal(tf.shape(scale_r2_eps_none_ua_none).numpy(), [1, 8])
  assert_equal(_get_num_unique_elements(scale_r2_eps_none_ua_none), 8)

  assert_equal(tf.shape(scale_r2_eps_2_ua_0).numpy(), [4, 1])
  assert_equal(_get_num_unique_elements(scale_r2_eps_2_ua_0), 2)

  assert_equal(tf.shape(scale_r2_eps_2_ua_1).numpy(), [1, 8])
  assert_equal(_get_num_unique_elements(scale_r2_eps_2_ua_1), 4)

  # Test get_scale function when elements_per_scale and scale_axis have scalar
  # values and the input x and q tensors have rank 3
  x_r3 = tf.random.uniform([2, 4, 8])
  q_r3 = tf.random.uniform([2, 4, 8])
  scale_r3_eps_none_ua_none = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=None, scale_axis=None)
  scale_r3_eps_2_ua_0 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=2, scale_axis=0)
  scale_r3_eps_2_ua_1 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=2, scale_axis=1)
  scale_r3_eps_2_ua_2 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=2, scale_axis=2)

  assert_equal(tf.shape(scale_r3_eps_none_ua_none).numpy(), [1, 1, 8])
  assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_none), 8)

  assert_equal(tf.shape(scale_r3_eps_2_ua_0).numpy(), [2, 1, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_0), 1)

  assert_equal(tf.shape(scale_r3_eps_2_ua_1).numpy(), [1, 4, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_1), 2)

  assert_equal(tf.shape(scale_r3_eps_2_ua_2).numpy(), [1, 1, 8])
  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_2), 4)

  # Test get_scale function when elements_per_scale and scale_axis have scalar
  # values and the input x and q tensors have rank 4
  x_r4 = tf.random.uniform([2, 4, 8, 16])
  q_r4 = tf.random.uniform([2, 4, 8, 16])
  scale_r4_eps_none_ua_none = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=None, scale_axis=None)
  scale_r4_eps_2_ua_0 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=0)
  scale_r4_eps_2_ua_1 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=1)
  scale_r4_eps_2_ua_2 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=2)
  scale_r4_eps_2_ua_3 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=3)

  assert_equal(tf.shape(scale_r4_eps_none_ua_none).numpy(), [1, 1, 1, 16])
  assert_equal(_get_num_unique_elements(scale_r4_eps_none_ua_none), 16)

  assert_equal(tf.shape(scale_r4_eps_2_ua_0).numpy(), [2, 1, 1, 1])
  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_0), 1)

  assert_equal(tf.shape(scale_r4_eps_2_ua_1).numpy(), [1, 4, 1, 1])
  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_1), 2)

  assert_equal(tf.shape(scale_r4_eps_2_ua_2).numpy(), [1, 1, 8, 1])
  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_2), 4)

  assert_equal(tf.shape(scale_r4_eps_2_ua_3).numpy(), [1, 1, 1, 16])
  assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_3), 8)


def test_GetScale_ElementsPerScale_List_ScaleAxis_EPS():
  # Test get_scale function when elements_per_scale and scale_axis are lists of
  # rank 1 and the input x and q tensors have rank 3
  x_r3 = tf.random.uniform([2, 4, 8])
  q_r3 = tf.random.uniform([2, 4, 8])

  scale_r3_eps_none_ua_0 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=None, scale_axis=[0])
  scale_r3_eps_2_ua_0 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=[2], scale_axis=[0])
  scale_r3_eps_2_ua_1 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=[2], scale_axis=[1])
  scale_r3_eps_2_ua_2 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=[2], scale_axis=[2])

  assert_equal(tf.shape(scale_r3_eps_none_ua_0).numpy(), [2, 1, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_0), 2)

  assert_equal(tf.shape(scale_r3_eps_2_ua_0).numpy(), [2, 1, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_0), 1)

  assert_equal(tf.shape(scale_r3_eps_2_ua_1).numpy(), [1, 4, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_1), 2)

  assert_equal(tf.shape(scale_r3_eps_2_ua_2).numpy(), [1, 1, 8])
  assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_2), 4)

  # Test get_scale function when elements_per_scale and scale_axis are lists of
  # rank 2 and the input x and q tensors have rank 3
  x_r3 = tf.random.uniform([2, 4, 8])
  q_r3 = tf.random.uniform([2, 4, 8])

  scale_r3_eps_none_ua_01 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=None, scale_axis=[0, 1])
  scale_r3_eps_22_ua_01 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=[2, 2], scale_axis=[0, 1])
  scale_r3_eps_11_ua_12 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=[2, 2], scale_axis=[1, 2])
  scale_r3_eps_11_ua_02 = quantizers._get_scale(
      "auto", x_r3, q_r3, elements_per_scale=[1, 1], scale_axis=[0, 2])

  assert_equal(tf.shape(scale_r3_eps_none_ua_01).numpy(), [2, 4, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_01), 8)

  assert_equal(tf.shape(scale_r3_eps_22_ua_01).numpy(), [2, 4, 1])
  assert_equal(_get_num_unique_elements(scale_r3_eps_22_ua_01), 2)

  assert_equal(tf.shape(scale_r3_eps_11_ua_12).numpy(), [1, 4, 8])
  assert_equal(_get_num_unique_elements(scale_r3_eps_11_ua_12), 8)

  assert_equal(tf.shape(scale_r3_eps_11_ua_02).numpy(), [2, 1, 8])
  assert_equal(_get_num_unique_elements(scale_r3_eps_11_ua_02), 16)

  # Test get_scale function when elements_per_scale and scale_axis are lists of
  # rank 3 and the input x and q tensors have rank 4
  x_r4 = tf.random.uniform([2, 4, 8, 16])
  q_r4 = tf.random.uniform([2, 4, 8, 16])

  scale_r4_eps_none_ua_012 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=None, scale_axis=[0, 1, 2])
  scale_r4_eps_221_ua_012 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[0, 1, 2])
  scale_r4_eps_221_ua_123 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[1, 2, 3])
  scale_r4_eps_221_ua_013 = quantizers._get_scale(
      "auto", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[0, 1, 3])

  assert_equal(tf.shape(scale_r4_eps_none_ua_012).numpy(), [2, 4, 8, 1])
  assert_equal(_get_num_unique_elements(scale_r4_eps_none_ua_012), 64)

  assert_equal(tf.shape(scale_r4_eps_221_ua_012).numpy(), [2, 4, 8, 1])
  assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_012), 16)

  assert_equal(tf.shape(scale_r4_eps_221_ua_123).numpy(), [1, 4, 8, 16])
  assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_123), 128)

  assert_equal(tf.shape(scale_r4_eps_221_ua_013).numpy(), [2, 4, 1, 16])
  assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_013), 32)


def test_GetScale_MinPO2Exponent_MaxPO2Exponent():
  """Verify get_scale function with min and max po2_exponent clipping."""

  def _get_min_max_po2_exponent(x):
    """Get min and max po2 exponent of x."""
    po2_x = K.log(x)/np.log(2.0)
    return (tf.math.reduce_min(po2_x).numpy(),
            tf.math.reduce_max(po2_x).numpy())

  # generate small decimal numbers to verify that po2 clipping works properly
  x = 2**tf.random.uniform(shape=[2, 4, 8], minval=-50, maxval=0)
  q = 2**tf.random.uniform(shape=[2, 4, 8], minval=-50, maxval=0)

  # set various min and max po2 exponents for the scale
  scale_min_neg3_max_1 = quantizers._get_scale(
      "auto_po2", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-3,
      max_po2_exponent=1)

  scale_min_neg8_max_0 = quantizers._get_scale(
      "auto_po2", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-8,
      max_po2_exponent=0)

  scale_min_neg10_max_1 = quantizers._get_scale(
      "auto_po2", x, q, elements_per_scale=4, scale_axis=2,
      min_po2_exponent=-10, max_po2_exponent=1)

  # verify that the output scales have the correct min and max ranges
  assert_equal(tf.shape(scale_min_neg3_max_1).numpy(), [1, 1, 8])
  min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg3_max_1)
  assert min_po2_exp >= -3
  assert max_po2_exp <= 1

  assert_equal(tf.shape(scale_min_neg8_max_0).numpy(), [1, 1, 8])
  min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg8_max_0)
  assert min_po2_exp >= -8
  assert max_po2_exp <= 0

  assert_equal(tf.shape(scale_min_neg10_max_1).numpy(), [1, 1, 8])
  min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg10_max_1)
  assert min_po2_exp >= -10
  assert max_po2_exp <= 1


def test_GetUnrolledShape_GetRolledBackShape():
  x_r4 = [4, 4, 8, 16]

  # Scalar unroll_factor and unroll_axis - Test _get_unrolled_shape
  unrolled_x_r4_uf_2_ua_0 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=2, unroll_axis=0)
  unrolled_x_r4_uf_2_ua_1 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=2, unroll_axis=1)
  unrolled_x_r4_uf_2_ua_2 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=2, unroll_axis=2)
  unrolled_x_r4_uf_2_ua_3 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=2, unroll_axis=3)

  assert_equal(unrolled_x_r4_uf_2_ua_0, ([2, 2, 4, 8, 16], 0))
  assert_equal(unrolled_x_r4_uf_2_ua_1, ([4, 2, 2, 8, 16], 1))
  assert_equal(unrolled_x_r4_uf_2_ua_2, ([4, 4, 4, 2, 16], 2))
  assert_equal(unrolled_x_r4_uf_2_ua_3, ([4, 4, 8, 8, 2], 3))

  # Scalar unroll_factor and unroll_axis - Test _get_rolled_back_shape
  rolled_back_x_r4_uf_2_ua_0 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_2_ua_0[0], roll_axis=unrolled_x_r4_uf_2_ua_0[1])
  rolled_back_x_r4_uf_2_ua_1 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_2_ua_1[0], roll_axis=unrolled_x_r4_uf_2_ua_1[1])
  rolled_back_x_r4_uf_2_ua_2 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_2_ua_2[0], roll_axis=unrolled_x_r4_uf_2_ua_2[1])
  rolled_back_x_r4_uf_2_ua_3 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_2_ua_3[0], roll_axis=unrolled_x_r4_uf_2_ua_3[1])

  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_0)
  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_1)
  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_2)
  assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_3)

  # List[2] unroll_factor and unroll_axis - Test _get_unrolled_shape
  unrolled_x_r4_uf_24_ua_01 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=[2, 4], unroll_axis=[0, 1])
  unrolled_x_r4_uf_24_ua_12 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=[2, 4], unroll_axis=[1, 2])
  unrolled_x_r4_uf_24_ua_13 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=[2, 4], unroll_axis=[1, 3])
  unrolled_x_r4_uf_24_ua_34 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=[2, 4], unroll_axis=[2, 3])

  assert_equal(unrolled_x_r4_uf_24_ua_01, ([2, 2, 1, 4, 8, 16], [0, 2]))
  assert_equal(unrolled_x_r4_uf_24_ua_12, ([4, 2, 2, 2, 4, 16], [1, 3]))
  assert_equal(unrolled_x_r4_uf_24_ua_13, ([4, 2, 2, 8, 4, 4], [1, 4]))
  assert_equal(unrolled_x_r4_uf_24_ua_34, ([4, 4, 4, 2, 4, 4], [2, 4]))

  # List[2] unroll_factor and unroll_axis - Test _get_rolled_back_shape
  rolled_back_x_r4_uf_24_ua_01 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_24_ua_01[0], roll_axis=unrolled_x_r4_uf_24_ua_01[1])
  rolled_back_x_r4_uf_24_ua_12 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_24_ua_12[0], roll_axis=unrolled_x_r4_uf_24_ua_12[1])
  rolled_back_x_r4_uf_24_ua_13 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_24_ua_13[0], roll_axis=unrolled_x_r4_uf_24_ua_13[1])
  rolled_back_x_r4_uf_24_ua_34 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_24_ua_34[0], roll_axis=unrolled_x_r4_uf_24_ua_34[1])

  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_01)
  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_12)
  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_13)
  assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_34)

  # List[3] unroll_factor and unroll_axis - Test _get_unrolled_shape
  unrolled_x_r4_uf_242_ua_012 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=[2, 4, 2], unroll_axis=[0, 1, 2])
  unrolled_x_r4_uf_242_ua_023 = quantizers._get_unrolled_shape(
      x_r4, unroll_factor=[2, 4, 2], unroll_axis=[0, 2, 3])

  assert_equal(unrolled_x_r4_uf_242_ua_012, ([2, 2, 1, 4, 4, 2, 16], [0, 2, 4]))
  assert_equal(unrolled_x_r4_uf_242_ua_023, ([2, 2, 4, 2, 4, 8, 2], [0, 3, 5]))

  # List[3] unroll_factor and unroll_axis - Test _get_rolled_back_shape
  rolled_back_x_r4_uf_242_ua_012 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_242_ua_012[0],
      roll_axis=unrolled_x_r4_uf_242_ua_012[1])
  rolled_back_x_r4_uf_242_ua_023 = quantizers._get_rolled_back_shape(
      unrolled_x_r4_uf_242_ua_023[0],
      roll_axis=unrolled_x_r4_uf_242_ua_023[1])

  assert_equal(x_r4, rolled_back_x_r4_uf_242_ua_012)
  assert_equal(x_r4, rolled_back_x_r4_uf_242_ua_023)

if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/quantizer_registry_test.py
================================================
# Copyright 2024 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Unit tests for QKeras quantizer registry."""

import numpy as np
import pytest

from qkeras import quantizer_registry
from qkeras import quantizers


@pytest.mark.parametrize(
    "quantizer_name",
    [
        "quantized_linear",
        "quantized_bits",
        "bernoulli",
        "ternary",
        "stochastic_ternary",
        "binary",
        "stochastic_binary",
        "quantized_relu",
        "quantized_ulaw",
        "quantized_tanh",
        "quantized_sigmoid",
        "quantized_po2",
        "quantized_relu_po2",
        "quantized_hswish",
    ],
)
def test_lookup(quantizer_name):
  quantizer = quantizer_registry.lookup_quantizer(quantizer_name)
  is_class_instance = isinstance(quantizer, type)
  np.testing.assert_equal(is_class_instance, True)


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/range_test.py
================================================
# Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test range values that are used for codebook computation"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from numpy.testing import assert_allclose

import pytest
from tensorflow.keras import backend as K

from qkeras import quantized_relu
from qkeras import quantized_bits


@pytest.mark.parametrize(
    'bits, integer, expected_values',
    [
        (3, 0, np.array([0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875])),
        (3, 1, np.array([0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75])),
        (3, 2, np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5])),
        (3, 3, np.array([0, 1, 2, 3, 4, 5, 6, 7])),
        (6, 1, np.array(
            [0.0, 0.03125, 0.0625, 0.09375, 0.125, 0.15625, 0.1875, 0.21875,
            0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875,
            0.5, 0.53125, 0.5625, 0.59375, 0.625, 0.65625, 0.6875, 0.71875,
            0.75, 0.78125, 0.8125, 0.84375, 0.875, 0.90625, 0.9375, 0.96875,
            1.0, 1.03125, 1.0625, 1.09375, 1.125, 1.15625, 1.1875, 1.21875,
            1.25, 1.28125, 1.3125, 1.34375, 1.375, 1.40625, 1.4375, 1.46875,
            1.5, 1.53125, 1.5625, 1.59375, 1.625, 1.65625, 1.6875, 1.71875,
            1.75, 1.78125, 1.8125, 1.84375, 1.875, 1.90625, 1.9375, 1.96875]))
    ])
def test_quantized_relu_range(bits, integer, expected_values):
  """Test quantized_relu range function."""
  q = quantized_relu(bits, integer)
  result = q.range()
  assert_allclose(result, expected_values, rtol=1e-05)


@pytest.mark.parametrize(
    'bits, integer, expected_values',
    [
        (3, 0, np.array([0.0, 0.25, 0.5, 0.75, -1.0, -0.75, -0.5, -0.25])),
        (3, 1, np.array([0.0, 0.5, 1.0, 1.5, -2.0, -1.5, -1.0, -0.5])),
        (3, 2, np.array([0.0, 1.0, 2.0, 3.0, -4.0, -3.0, -2.0, -1.0])),
        (3, 3, np.array([0.0, 2.0, 4.0, 6.0, -8.0, -6.0, -4.0, -2.0])),
        (6, 1, np.array(
            [0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625,
             0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 1.0625, 1.125, 1.1875,
             1.25, 1.3125, 1.375, 1.4375, 1.5, 1.5625, 1.625, 1.6875, 1.75, 1.8125,
             1.875, 1.9375, -2.0, -1.9375, -1.875, -1.8125, -1.75, -1.6875, -1.625,
             -1.5625, -1.5, -1.4375, -1.375, -1.3125, -1.25, -1.1875, -1.125, -1.0625,
             -1.0, -0.9375, -0.875, -0.8125, -0.75, -0.6875, -0.625, -0.5625, -0.5,
             -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625]))
    ])
def test_quantized_bits_range(bits, integer, expected_values):
  """Test quantized_bits range function."""
  q = quantized_bits(bits, integer)
  result = q.range()
  assert_allclose(result, expected_values, rtol=1e-05)


if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/registry_test.py
================================================
# Copyright 2024 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Unit tests for registry."""

from numpy.testing import assert_equal
from numpy.testing import assert_raises
import pytest

from qkeras import registry


def sample_function(arg):
  """Sample function for testing."""
  return arg


class SampleClass(object):
  """Sample class for testing."""

  def __init__(self, arg):
    self._arg = arg

  def get_arg(self):
    return self._arg


def test_register_function():
  reg = registry.Registry()
  reg.register(sample_function)
  registered_function = reg.lookup('sample_function')
  # Call the function to validate.
  assert_equal(registered_function, sample_function)


def test_register_class():
  reg = registry.Registry()
  reg.register(SampleClass)
  registered_class = reg.lookup('SampleClass')
  # Create and call class object to validate.
  assert_equal(SampleClass, registered_class)


def test_register_with_name():
  reg = registry.Registry()
  name = 'NewSampleClass'
  reg.register(SampleClass, name=name)
  registered_class = reg.lookup(name)
  # Create and call class object to validate.
  assert_equal(SampleClass, registered_class)


def test_lookup_missing_item():
  reg = registry.Registry()
  assert_raises(KeyError, reg.lookup, 'foo')


def test_lookup_missing_name():
  reg = registry.Registry()
  sample_class = SampleClass(arg=1)
  # objects don't have a default __name__ attribute.
  assert_raises(AttributeError, reg.register, sample_class)

  # check that the object can be retrieved with a registered name.
  reg.register(sample_class, 'sample_class')
  assert_equal(sample_class, reg.lookup('sample_class'))


if __name__ == '__main__':
  pytest.main([__file__])


================================================
FILE: tests/safe_eval_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implements a safe evaluation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import operator
import pytest

from qkeras.safe_eval import GetParams
from qkeras.safe_eval import safe_eval


add = operator.add


def test_get_params1():
  s = "(3, 0.3, sep=5  )"
  args, kwargs = GetParams(s)
  assert args == [3, 0.3]
  assert kwargs == {"sep": 5}


def test_get_params2():
  s = "(  )"

  args, kwargs = GetParams(s)

  assert not args
  assert not kwargs


def test_get_params3():
  s = ("(3, 0.3,  -1.0, True, False, 'string1', num1=0.1, num2=-3.0, "
       "str1='string2', bool1=True, bool2=False)")

  args, kwargs = GetParams(s)

  assert args == [3, 0.3, -1.0, True, False, "string1"]
  assert kwargs == {
      "num1": 0.1,
      "num2": -3.0,
      "str1": "string2",
      "bool1": True,
      "bool2": False
  }


def test_safe_eval1():
  s = "add(3,3)"
  assert safe_eval(s, globals()) == 6


def i_func(s):
  return -s


def myadd2(a, b):
  return i_func(a) + i_func(b)


def myadd(a=32, b=10):
  return a + b

class myaddcls(object):
  def __call__(self, a=32, b=10):
    return a + b

def test_safe_eval2():
  s_add = [3, 39]
  assert safe_eval("add", globals(), *s_add) == 42


def test_safe_eval3():
  assert safe_eval("myadd()", globals()) == 42
  assert safe_eval("myadd(a=39)", globals(), b=3) == 42


def test_safe_eval4():
  assert safe_eval("myadd2(a=39)", globals(), b=3) == -42
  assert safe_eval("myadd2(a= 39)", globals(), b=3) == -42
  assert safe_eval("myadd2(a= 39, b = 3)", globals()) == -42

def test_safe_eval5():
  assert safe_eval("myadd", globals())(3,39) == 42
  assert safe_eval("myaddcls", globals())(3,39) == 42
  assert safe_eval("myaddcls()", globals())(3,39) == 42

if __name__ == "__main__":
  pytest.main([__file__])


================================================
FILE: tests/utils_test.py
================================================
# Copyright 2019 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for methods in utils.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import pytest
import os
import tempfile
from tensorflow.keras.layers import *
from tensorflow.keras.models import *

from qkeras import *
from qkeras.utils import get_model_sparsity
from qkeras.utils import model_quantize
from qkeras.utils import convert_to_folded_model
from qkeras.utils import is_TFOpLambda_layer
from qkeras.utils import find_bn_fusing_layer_pair
from qkeras.utils import add_bn_fusing_weights
from qkeras.utils import clone_model_and_freeze_auto_po2_scale
from qkeras.utils import load_qmodel


def create_quantized_network():
  """Creates a simple quantized conv net model."""
  # Create a simple model
  xi = Input((28, 28, 1))
  x = Conv2D(32, (3, 3))(xi)
  x = Activation("relu")(x)
  x = Conv2D(32, (3, 3), activation="relu")(x)
  x = Activation("softmax")(x)
  model = Model(inputs=xi, outputs=x)

  # Quantize the model
  quantizer_config = {
      "QConv2D": {
          "kernel_quantizer": "quantized_bits(4)",
          "bias_quantizer": "quantized_bits(4)"
      },
      "QActivation": {
          "relu": "ternary"
      }
  }
  activation_bits = 4
  qmodel = model_quantize(model, quantizer_config, activation_bits)
  return qmodel


def create_quantized_po2_network():
  """Creates a simple quantized conv net model with po2 quantizers."""
  xi = Input((28, 28, 1))
  x = QConv2D(32, (3, 3), kernel_quantizer=quantized_po2(4))(xi)
  x = QActivation(quantized_bits(8))(x)
  x = QConv2D(32, (3, 3), kernel_quantizer=quantized_po2(4))(x)
  x = QActivation(quantized_bits(8))(x)
  qmodel = Model(xi, x, name='simple_po2_qmodel')
  return qmodel


def set_network_sparsity(model, sparsity):
  """Set the sparsity of the given model using random weights."""

  for layer in model.layers:
    new_weights = []
    for w in layer.get_weights():
      # Create weights with desired sparsity
      sparse_weights = np.random.rand(w.size)+0.1
      sparse_weights[:int(w.size*sparsity)] = 0
      np.random.shuffle(sparse_weights)
      new_weights.append(sparse_weights.reshape(w.shape))
    layer.set_weights(new_weights)
  return model


def test_get_model_sparsity():
  """Tests if the method get_model_sparsity in utils.py works correctly."""
  qmodel = create_quantized_network()

  # Generate sparsity levels to test
  sparsity_levels = np.concatenate((np.random.rand(10), [1.0, 0.0])).round(2)

  # Test various sparsity levels
  for true_sparsity in sparsity_levels:
    qmodel = set_network_sparsity(qmodel, true_sparsity)
    calc_sparsity = get_model_sparsity(qmodel)
    assert np.abs(calc_sparsity - true_sparsity) < 0.01


def test_get_po2_model_sparsity():
  """Tests get_model_sparsity on a po2-quantized model.

  Models quantized with po2 quantizers should have a sparsity near 0 because
  if the exponent is set to 0, the value of the weight will equal 2^0 == 1 != 0
  """
  qmodel = create_quantized_po2_network()
  qmodel.use_legacy_config = True

  # Generate sparsity levels to test
  sparsity_levels = np.concatenate((np.random.rand(10), [1.0, 0.0])).round(2)

  # Test various sparsity levels
  for set_sparsity in sparsity_levels:
    qmodel = set_network_sparsity(qmodel, set_sparsity)
    calc_sparsity = get_model_sparsity(qmodel)
    assert np.abs(calc_sparsity - 0) < 0.01


def test_convert_to_folded_model():
  """Test convert_to_folded_model to work properly on non-sequential model."""

  def get_add_model():
    x = x_in = Input(shape=(4, 4, 1), name="input")
    x1 = Conv2D(4, kernel_size=(2, 2), padding="valid", strides=(1, 1),
                name="conv2d_1")(x)
    x1 = BatchNormalization(name="bn_1")(x1)
    x1 = Activation("relu", name="relu_1")(x1)
    x2 = Conv2D(4, kernel_size=(2, 2), padding="valid", strides=(1, 1),
                name="conv2d_2")(x)
    x2 = BatchNormalization(name="bn_2")(x2)
    x2 = Activation("relu", name="relu_2")(x2)
    x = Add(name="add")([x1, x2])
    x = Softmax()(x)

    return Model(inputs=[x_in], outputs=[x])

  model = get_add_model()

  fmodel, _ = convert_to_folded_model(model)

  assert fmodel.layers[5].name == "add"

  # test if convert_to_folded_model work with TFOpLambda layers
  def hard_sigmoid(x):
    return ReLU(6.)(x + 3.) * (1. / 6.)

  def hard_swish(x):
    return Multiply()([hard_sigmoid(x), x])

  def get_lambda_model():
    x = x_in = Input(shape=(4, 4, 1), name="input")
    x = Conv2D(
        4, kernel_size=(2, 2), padding="valid", strides=(1, 1),
        name="conv2d_1")(x)
    x = hard_swish(x)

    return Model(inputs=[x_in], outputs=[x])

  model = get_lambda_model()
  fmodel, _ = convert_to_folded_model(model)

  assert is_TFOpLambda_layer(model.layers[2])
  assert is_TFOpLambda_layer(model.layers[4])
  assert isinstance(fmodel.layers[5], Multiply)


def test_find_bn_fusing_layer_pair():
  x = x_in = Input((23, 23, 1), name="input")
  x1 = QConv2D(
      2, 2, 1,
      kernel_quantizer=quantized_bits(4, 0, 1),
      bias_quantizer=quantized_bits(4, 0, 1),
      use_bias=False,
      name="conv1")(x)
  x1 = QBatchNormalization(
      mean_quantizer=quantized_bits(4, 0, 1),
      gamma_quantizer=None,
      variance_quantizer=None,
      beta_quantizer=quantized_bits(4, 0, 1),
      inverse_quantizer=quantized_bits(8, 0, 1), name="bn1")(x1)

  x2 = QConv2D(
      2, 2, 1,
      kernel_quantizer=quantized_bits(3, 0),
      bias_quantizer=quantized_bits(3, 2),
      name="conv2")(x)

  x2 = QBatchNormalization(
      mean_quantizer=quantized_bits(4, 0, 1),
      gamma_quantizer=None,
      variance_quantizer=None,
      beta_quantizer=quantized_bits(4, 0, 1),
      inverse_quantizer=quantized_bits(8, 0, 1), name="bn2")(x2)

  x = Add(name="add")([x1, x2])
  model = Model(inputs=[x_in], outputs=[x])

  (conv_bn_pair_dict, _) = find_bn_fusing_layer_pair(model)
  assert conv_bn_pair_dict["conv1"] == "bn1"
  assert conv_bn_pair_dict["conv2"] == "bn2"

  conv_layer = model.layers[1]
  bn_layer = model.layers[3]

  conv_layer.set_weights([
      np.array([[[[0.5, 0.75]], [[1.5, -0.625]]],
                [[[-0.875, 1.25]], [[-1.25, -2.5]]]])
  ])
  bn_layer.set_weights([
      np.array([1., 0.25]),
      np.array([0.5, 1.0]),
      np.array([0.5, 2.5]),
      np.array([1.5, 1.])
  ])
  saved_weights = {}
  saved_weights[conv_layer.name] = {}
  add_bn_fusing_weights(conv_layer, bn_layer, saved_weights)

  d = saved_weights[conv_layer.name]
  assert d["enable_bn_fusing"]
  assert d["fused_bn_layer_name"] == "bn1"
  assert np.all(d["bn_inv"] == np.array([0.8125, 0.25]))
  assert np.all(d["fused_bias"] == np.array([0.09375, 0.65625]))


def create_test_model_for_scale_freezing(bias_quantizer):
  def _create_simple_model(bias_quantizer):
    x = x_in = tf.keras.Input((4, 4, 1), name="input")
    x = QConv2D(
        filters=4, kernel_size=2, strides=2,
        kernel_quantizer=quantized_bits(4, 2, 1, alpha="auto_po2"),
        bias_quantizer=quantized_bits(4, 2, 1),
        use_bias=False,
        name="conv")(x)
    x = QDepthwiseConv2D(
        kernel_size=2, strides=1,
        depthwise_quantizer=quantized_bits(6, 3, 1, alpha="auto_po2"),
        use_bias=False,
        bias_quantizer=quantized_bits(4, 2, 1),
        name="dw_conv")(x)
    x = QBatchNormalization(
        mean_quantizer=quantized_bits(4, 2, 1),
        gamma_quantizer=None,
        variance_quantizer=None,
        beta_quantizer=quantized_bits(4, 0, 1),
        inverse_quantizer=quantized_bits(8, 0, 1, alpha="auto_po2"),
        name="bn")(x)

    x = QActivation(activation=quantized_bits(4, 0), name="relu")(x)
    x = tf.keras.layers.Flatten(name="flatten")(x)
    x = QDense(units=2,
               kernel_quantizer=quantized_bits(4, 2, 1, alpha="auto_po2"),
               bias_quantizer=bias_quantizer, name="dense")(x)
    model = tf.keras.Model(inputs=x_in, outputs=x)

    return model

  def _set_weights(model):
    conv_w = [np.array(
        [0.23, 2.76, 0.1, 0.33, 0.53, 0.16, 0.3, 1.7, -0.9,
         1.43, 2.31, -0.2, -1.7, 0.39, -2.03, 1.79]).reshape(2, 2, 1, 4)]

    dw_conv_w = [np.array([
        0.03, 3.6, 2.1, 1.2, 0.13, 1.3, -0.3, 1.2, -0.7,
        -10.3, 11.7, -0.92, -10.7, 0.59, -1.93, 2.8]).reshape((2, 2, 4, 1))]

    bn_w = [np.array([0.28, 1.33, 2.27, 3.36]),
            np.array([0.31, 0.1, 0.03, 4.26]),
            np.array([0.89, -0.21, 1.97, 2.06]),
            np.array([1.2, 0.9, 13.2, 10.9])]

    dense_w = np.array(
        [0.13, 0.66, 0.21, 0.23, 1.07, -0.79, 1.83, 1.81])
    dense_w = [dense_w.reshape((4, 2)), np.array([-1.3, 0.7])]

    model.get_layer("conv").set_weights(conv_w)
    model.get_layer("dw_conv").set_weights(dw_conv_w)
    model.get_layer("bn").set_weights(bn_w)
    model.get_layer("dense").set_weights(dense_w)

  orig_model = _create_simple_model(bias_quantizer)
  _set_weights(orig_model)

  return orig_model


def test_clone_model_and_freeze_auto_po2_scale():
  """Test clone_model_and_freeze_auto_po2_scale to work properly."""

  orig_model = create_test_model_for_scale_freezing(quantized_bits(4, 2, 1))
  _, new_hw = clone_model_and_freeze_auto_po2_scale(
      orig_model, quantize_model_weights=True)

  # Check if the new model's weights and scales are derived properly.
  np.testing.assert_array_equal(
      new_hw["conv"]["weights"][0],
      np.array(
          [[[[0.5, 6, 0, 0.5]], [[1, 0, 0.5, 3.5]]],
           [[[-2., 3., 3.5, -0.5]], [[-3.5, 1., -3.5, 3.5]]]]))

  np.testing.assert_array_equal(
      new_hw["conv"]["scales"][0], np.array([[[[0.25, 0.5, 0.25, 0.25]]]]))

  np.testing.assert_array_equal(
      new_hw["dw_conv"]["weights"][0].numpy().flatten(),
      np.array([
          0., 14, 8, 4, 0, 6, -2, 4, -2, -42, 46, -4, -42, 2, -8, 12]))

  np.testing.assert_array_equal(
      new_hw["dense"]["scales"][0], np.array([[0.25, 0.25]]))


def test_clone_model_and_freeze_auto_po2_scale_serialization():
  # Test if the cloned model can be saved and loaded properly.
  orig_model = create_test_model_for_scale_freezing(quantized_bits(4, 2, 1))
  new_model, _ = clone_model_and_freeze_auto_po2_scale(
      orig_model, quantize_model_weights=True)

  fd, fname = tempfile.mkstemp(".hdf5")
  new_model.save(fname)
  _ = load_qmodel(fname)
  os.close(fd)
  os.remove(fname)


def test_clone_model_and_freeze_auto_po2_scale_error():
  orig_model = create_test_model_for_scale_freezing(
      quantized_bits(4, 2, 1, alpha="auto_po2"))
  # Test if the function raises an error when there are more than one
  # auto_po2 quantizers in a layer.
  with pytest.raises(ValueError):
    clone_model_and_freeze_auto_po2_scale(
        orig_model, quantize_model_weights=False)


if __name__ == "__main__":
  pytest.main([__file__])