Repository: google/qkeras Branch: master Commit: 5e0cd30c20b1 Files: 140 Total size: 1.4 MB Directory structure: gitextract_ead9uto5/ ├── .github/ │ └── workflows/ │ └── ci.yml ├── CHANGELOG ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── examples/ │ ├── example_act.py │ ├── example_b2t.py │ ├── example_cifar10_po2.py │ ├── example_keras_to_qkeras.py │ ├── example_mnist.py │ ├── example_mnist_ae.py │ ├── example_mnist_b2t.py │ ├── example_mnist_bn.py │ ├── example_mnist_po2.py │ ├── example_mnist_prune.py │ ├── example_qdense.py │ ├── example_qoctave.py │ └── example_ternary.py ├── experimental/ │ └── lo/ │ ├── __init__.py │ ├── compress.py │ ├── conv2d.py │ ├── dense.py │ ├── generate_rf_code.py │ ├── optimizer.py │ ├── random_forest/ │ │ ├── __init__.py │ │ ├── gen_random_tree.py │ │ ├── parser.py │ │ ├── random_forest.py │ │ ├── random_tree.py │ │ └── utils.py │ ├── receptive.py │ ├── table/ │ │ ├── __init__.py │ │ ├── parser.py │ │ └── utils.py │ └── utils.py ├── notebook/ │ ├── AutoQKeras.ipynb │ ├── CodebookQuantization.ipynb │ ├── QKerasTutorial.ipynb │ └── QRNNTutorial.ipynb ├── qkeras/ │ ├── __init__.py │ ├── autoqkeras/ │ │ ├── __init__.py │ │ ├── autoqkeras_internal.py │ │ ├── examples/ │ │ │ └── run/ │ │ │ ├── get_data.py │ │ │ ├── get_model.py │ │ │ ├── networks/ │ │ │ │ ├── __init__.py │ │ │ │ └── conv_block.py │ │ │ └── plot_history.py │ │ ├── forgiving_metrics/ │ │ │ ├── __init__.py │ │ │ ├── forgiving_bits.py │ │ │ ├── forgiving_energy.py │ │ │ └── forgiving_factor.py │ │ ├── quantization_config.py │ │ ├── tests/ │ │ │ └── test_forgiving_factor.py │ │ └── utils.py │ ├── b2t.py │ ├── base_quantizer.py │ ├── bn_folding_utils.py │ ├── callbacks.py │ ├── codebook.py │ ├── estimate.py │ ├── experimental/ │ │ └── quantizers/ │ │ ├── __init__.py │ │ └── quantizers_po2.py │ ├── qconv2d_batchnorm.py │ ├── qconvolutional.py │ ├── qdepthwise_conv2d_transpose.py │ ├── qdepthwiseconv2d_batchnorm.py │ ├── qlayers.py │ ├── qmac.py │ ├── qmodel.proto │ ├── qnormalization.py │ ├── qoctave.py │ ├── qpooling.py │ ├── qrecurrent.py │ ├── qseparable_conv2d_transpose.py │ ├── qtools/ │ │ ├── DnC/ │ │ │ ├── divide_and_conquer.py │ │ │ └── dnc_layer_cost_ace.py │ │ ├── __init__.py │ │ ├── config_public.py │ │ ├── examples/ │ │ │ ├── example_generate_json.py │ │ │ └── example_get_energy.py │ │ ├── generate_layer_data_type_map.py │ │ ├── interface.py │ │ ├── qenergy/ │ │ │ ├── __init__.py │ │ │ └── qenergy.py │ │ ├── qgraph.py │ │ ├── qtools_util.py │ │ ├── quantized_operators/ │ │ │ ├── __init__.py │ │ │ ├── accumulator_factory.py │ │ │ ├── accumulator_impl.py │ │ │ ├── adder_factory.py │ │ │ ├── adder_impl.py │ │ │ ├── divider_factory.py │ │ │ ├── divider_impl.py │ │ │ ├── fused_bn_factory.py │ │ │ ├── merge_factory.py │ │ │ ├── multiplier_factory.py │ │ │ ├── multiplier_impl.py │ │ │ ├── qbn_factory.py │ │ │ ├── quantizer_factory.py │ │ │ ├── quantizer_impl.py │ │ │ └── subtractor_factory.py │ │ ├── run_qtools.py │ │ └── settings.py │ ├── quantizer_imports.py │ ├── quantizer_registry.py │ ├── quantizers.py │ ├── registry.py │ ├── safe_eval.py │ └── utils.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests/ ├── automatic_conversion_test.py ├── autoqkeras_test.py ├── bn_folding_test.py ├── callbacks_test.py ├── codebook_test.py ├── leakyrelu_test.py ├── min_max_test.py ├── print_qstats_test.py ├── qactivation_test.py ├── qadaptiveactivation_test.py ├── qalpha_test.py ├── qconvolutional_test.py ├── qdepthwise_conv2d_transpose_test.py ├── qlayers_test.py ├── qmac_test.py ├── qnoise_test.py ├── qpooling_test.py ├── qrecurrent_test.py ├── qseparable_conv2d_transpose_test.py ├── qtools_model_test.py ├── qtools_util_test.py ├── quantizer_impl_test.py ├── quantizer_registry_test.py ├── range_test.py ├── registry_test.py ├── safe_eval_test.py └── utils_test.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/ci.yml ================================================ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: CI tests on: push: branches: [ master ] pull_request: branches: [ master ] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python 3.7 uses: actions/setup-python@v2 with: python-version: 3.7 - name: Install dependencies run: | python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi pip install . python setup.py install - name: Test with pytest run: | pytest ================================================ FILE: CHANGELOG ================================================ v0.5, 2019/07 -- Initial release. v0.6, 2020/03 -- Support tensorflow 2.0, tf.keras and python3. v0.7, 2020/03 -- Enhancemence of binary and ternary quantization. ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project. Head over to to see your current agreements on file or to sign a new one. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. ## Code reviews All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. ## Community Guidelines This project follows [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). ================================================ FILE: LICENSE ================================================ Copyright 2019 The QKeras Authors. All rights reserved. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ include *.txt recursive-include docs *.txt ================================================ FILE: README.md ================================================ # QKeras [github.com/google/qkeras](https://github.com/google/qkeras) ## Introduction QKeras is a quantization extension to Keras that provides drop-in replacement for some of the Keras layers, especially the ones that creates parameters and activation layers, and perform arithmetic operations, so that we can quickly create a deep quantized version of Keras network. According to Tensorflow documentation, Keras is a high-level API to build and train deep learning models. It's used for fast prototyping, advanced research, and production, with three key advantages: - User friendly Keras has a simple, consistent interface optimized for common use cases. It provides clear and actionable feedback for user errors. - Modular and composable Keras models are made by connecting configurable building blocks together, with few restrictions. - Easy to extend Write custom building blocks to express new ideas for research. Create new layers, loss functions, and develop state-of-the-art models. QKeras is being designed to extend the functionality of Keras using Keras' design principle, i.e. being user friendly, modular and extensible, adding to it being "minimally intrusive" of Keras native functionality. In order to successfully quantize a model, users need to replace variable creating layers (Dense, Conv2D, etc) by their counterparts (QDense, QConv2D, etc), and any layers that perform math operations need to be quantized afterwards. ## Publications - Claudionor N. Coelho Jr, Aki Kuusela, Shan Li, Hao Zhuang, Jennifer Ngadiuba, Thea Klaeboe Aarrestad, Vladimir Loncar, Maurizio Pierini, Adrian Alan Pol, Sioni Summers, "Automatic heterogeneous quantization of deep neural networks for low-latency inference on the edge for particle detectors", Nature Machine Intelligence (2021), https://www.nature.com/articles/s42256-021-00356-5 - Claudionor N. Coelho Jr., Aki Kuusela, Hao Zhuang, Thea Aarrestad, Vladimir Loncar, Jennifer Ngadiuba, Maurizio Pierini, Sioni Summers, "Ultra Low-latency, Low-area Inference Accelerators using Heterogeneous Deep Quantization with QKeras and hls4ml", http://arxiv.org/abs/2006.10159v1 - Erwei Wang, James J. Davis, Daniele Moro, Piotr Zielinski, Claudionor Coelho, Satrajit Chatterjee, Peter Y. K. Cheung, George A. Constantinides, "Enabling Binary Neural Network Training on the Edge", https://arxiv.org/abs/2102.04270 ## Layers Implemented in QKeras - QDense - QConv1D - QConv2D - QDepthwiseConv2D - QSeparableConv1D (depthwise + pointwise convolution, without quantizing the activation values after the depthwise step) - QSeparableConv2D (depthwise + pointwise convolution, without quantizing the activation values after the depthwise step) - QMobileNetSeparableConv2D (extended from MobileNet SeparableConv2D implementation, quantizes the activation values after the depthwise step) - QConv2DTranspose - QActivation - QAdaptiveActivation - QAveragePooling2D (in fact, an AveragePooling2D stacked with a QActivation layer for quantization of the result) - QBatchNormalization (is still in its experimental stage, as we have not seen the need to use this yet due to the normalization and regularization effects of stochastic activation functions.) - QOctaveConv2D - QSimpleRNN, QSimpleRNNCell - QLSTM, QLSTMCell - QGRU, QGRUCell - QBidirectional It is worth noting that not all functionality is safe at this time to be used with other high-level operations, such as with layer wrappers. For example, Bidirectional layer wrappers are used with RNNs. If this is required, we encourage users to use quantization functions invoked as strings instead of the actual functions as a way through this, but we may change that implementation in the future. A first attempt to create a safe mechanism in QKeras is the adoption of QActivation is a wrap-up that provides an encapsulation around the activation functions so that we can save and restore the network architecture, and duplicate them using Keras interface, but this interface has not been fully tested yet. ## Activation Layers Implemented in QKeras - smooth_sigmoid(x) - hard_sigmoid(x) - binary_sigmoid(x) - binary_tanh(x) - smooth_tanh(x) - hard_tanh(x) - quantized_bits(bits=8, integer=0, symmetric=0, keep_negative=1)(x) - bernoulli(alpha=1.0)(x) - stochastic_ternary(alpha=1.0, threshold=0.33)(x) - ternary(alpha=1.0, threshold=0.33)(x) - stochastic_binary(alpha=1.0)(x) - binary(alpha=1.0)(x) - quantized_relu(bits=8, integer=0, use_sigmoid=0, negative_slope=0.0)(x) - quantized_ulaw(bits=8, integer=0, symmetric=0, u=255.0)(x) - quantized_tanh(bits=8, integer=0, symmetric=0)(x) - quantized_po2(bits=8, max_value=-1)(x) - quantized_relu_po2(bits=8, max_value=-1)(x) The stochastic_* functions, bernoulli as well as quantized_relu and quantized_tanh rely on stochastic versions of the activation functions. They draw a random number with uniform distribution from _hard_sigmoid of the input x, and result is based on the expected value of the activation function. Please refer to the papers if you want to understand the underlying theory, or the documentation in qkeras/qlayers.py. The parameters "bits" specify the number of bits for the quantization, and "integer" specifies how many bits of "bits" are to the left of the decimal point. Finally, our experience in training networks with QSeparableConv2D, both quantized_bits and quantized_tanh that generates values between [-1, 1), required symmetric versions of the range in order to properly converge and eliminate the bias. Every time we use a quantization for weights and bias that can generate numbers outside the range [-1.0, 1.0], we need to adjust the *_range to the number. For example, if we have a quantized_bits(bits=6, integer=2) in a weight of a layer, we need to set the weight range to 2**2, which is equivalent to Catapult HLS ac_fixed<6, 3, true>. Similarly, for quantization functions that accept an alpha parameter, we need to specify a range of alpha, and for po2 type of quantizers, we need to specify the range of max_value. ### Example Suppose you have the following network. An example of a very simple network is given below in Keras. ```python from keras.layers import * x = x_in = Input(shape) x = Conv2D(18, (3, 3), name="first_conv2d")(x) x = Activation("relu")(x) x = SeparableConv2D(32, (3, 3))(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(NB_CLASSES)(x) x = Activation("softmax")(x) ``` You can easily quantize this network as follows: ```python from keras.layers import * from qkeras import * x = x_in = Input(shape) x = QConv2D(18, (3, 3), kernel_quantizer="stochastic_ternary", bias_quantizer="ternary", name="first_conv2d")(x) x = QActivation("quantized_relu(3)")(x) x = QSeparableConv2D(32, (3, 3), depthwise_quantizer=quantized_bits(4, 0, 1), pointwise_quantizer=quantized_bits(3, 0, 1), bias_quantizer=quantized_bits(3), depthwise_activation=quantized_tanh(6, 2, 1))(x) x = QActivation("quantized_relu(3)")(x) x = Flatten()(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(3), bias_quantizer=quantized_bits(3))(x) x = QActivation("quantized_bits(20, 5)")(x) x = Activation("softmax")(x) ``` The last QActivation is advisable if you want to compare results later on. Please find more cases under the directory examples. ## QTools The purpose of QTools is to assist hardware implementation of the quantized model and model energy consumption estimation. QTools has two functions: data type map generation and energy consumption estimation. - Data Type Map Generation: QTools automatically generate the data type map for weights, bias, multiplier, adder, etc. of each layer. The data type map includes operation type, variable size, quantizer type and bits, etc. Input of the QTools is: 1) a given quantized model; 2) a list of input quantizers for the model. Output of QTools json file that list the data type map of each layer (stored in qtools_instance._output_dict) Output methods include: qtools_stats_to_json, which is to output the data type map to a json file; qtools_stats_print which is to print out the data type map. - Energy Consumption Estimation: Another function of QTools is to estimate the model energy consumption in Pico Joules (pJ). It provides a tool for QKeras users to quickly estimate energy consumption for memory access and MAC operations in a quantized model derived from QKeras, especially when comparing power consumption of two models running on the same device. As with any high-level model, it should be used with caution when attempting to estimate the absolute energy consumption of a model for a given technology, or when attempting to compare different technologies. This tool also provides a measure for model tuning which needs to consider both accuracy and model energy consumption. The energy cost provided by this tool can be integrated into a total loss function which combines energy cost and accuracy. - Energy Model: The best work referenced by the literature on energy consumption was first computed by Horowitz M.: “1.1 computing’s energy problem ( and what we can do about it)”; IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC), 2014 In this work, the author attempted to estimate the energy consumption for accelerators, and for 45 nm process, the data points he presented has since been used whenever someone wants to compare accelerator performance. QTools energy consumption on a 45nm process is based on the data published in this work. - Examples: Example of how to generate data type map can be found in qkeras/qtools/ examples/example_generate_json.py. Example of how to generate energy consumption estimation can be found in qkeras/qtools/examples/example_get_energy.py ## AutoQKeras AutoQKeras allows the automatic quantization and rebalancing of deep neural networks by treating quantization and rebalancing of an existing deep neural network as a hyperparameter search in Keras-Tuner using random search, hyperband or gaussian processes. In order to contain the explosion of hyperparameters, users can group tasks by patterns, and perform distribute training using available resources. Extensive documentation is present in notebook/AutoQKeras.ipynb. ## Related Work QKeras has been implemented based on the work of "B.Moons et al. - Minimum Energy Quantized Neural Networks", Asilomar Conference on Signals, Systems and Computers, 2017 and "Zhou, S. et al. - DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients," but the framework should be easily extensible. The original code from QNN can be found below. https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow QKeras extends QNN by providing a richer set of layers (including SeparableConv2D, DepthwiseConv2D, ternary and stochastic ternary quantizations), besides some functions to aid the estimation for the accumulators and conversion between non-quantized to quantized networks. Finally, our main goal is easy of use, so we attempt to make QKeras layers a true drop-in replacement for Keras, so that users can easily exchange non-quantized layers by quantized ones. ### Acknowledgements Portions of QKeras were derived from QNN. https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow Copyright (c) 2017, Bert Moons where it applies ================================================ FILE: examples/example_act.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Example the usage of activation functions in qkeras.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import warnings import numpy as np import tensorflow as tf import tensorflow.keras.backend as K from qkeras import binary from qkeras import bernoulli from qkeras import hard_sigmoid from qkeras import hard_tanh from qkeras import quantized_bits from qkeras import quantized_relu from qkeras import quantized_tanh from qkeras import quantized_po2 from qkeras import quantized_relu_po2 from qkeras import set_internal_sigmoid from qkeras import smooth_sigmoid from qkeras import smooth_tanh from qkeras import stochastic_binary from qkeras import stochastic_ternary from qkeras import ternary def main(): # check the mean value of samples from stochastic_rounding for po2 np.random.seed(42) count = 100000 val = 42 a = K.constant([val] * count) b = quantized_po2(use_stochastic_rounding=True)(a) res = np.sum(K.eval(b)) / count print(res, "should be close to ", val) b = quantized_relu_po2(use_stochastic_rounding=True)(a) res = np.sum(K.eval(b)) / count print(res, "should be close to ", val) a = K.constant([-1] * count) b = quantized_relu_po2(use_stochastic_rounding=True)(a) res = np.sum(K.eval(b)) / count print(res, "should be all ", 0) # non-stochastic rounding quantizer. a = K.constant([-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0]) a = K.constant([0.194336]) print(" a =", K.eval(a).astype(np.float16)) print("qa =", K.eval(quantized_relu(6,2)(a)).astype(np.float16)) print("ss =", K.eval(smooth_sigmoid(a)).astype(np.float16)) print("hs =", K.eval(hard_sigmoid(a)).astype(np.float16)) print("ht =", K.eval(hard_tanh(a)).astype(np.float16)) print("st =", K.eval(smooth_tanh(a)).astype(np.float16)) c = K.constant(np.arange(-1.5, 1.51, 0.3)) print(" c =", K.eval(c).astype(np.float16)) print("qb_111 =", K.eval(quantized_bits(1,1,1)(c)).astype(np.float16)) print("qb_210 =", K.eval(quantized_bits(2,1,0)(c)).astype(np.float16)) print("qb_211 =", K.eval(quantized_bits(2,1,1)(c)).astype(np.float16)) print("qb_300 =", K.eval(quantized_bits(3,0,0)(c)).astype(np.float16)) print("qb_301 =", K.eval(quantized_bits(3,0,1)(c)).astype(np.float16)) c_1000 = K.constant(np.array([list(K.eval(c))] * 1000)) b = np.sum(K.eval(bernoulli()(c_1000)).astype(np.int32), axis=0) / 1000.0 print(" hs =", K.eval(hard_sigmoid(c)).astype(np.float16)) print(" b_all =", b.astype(np.float16)) T = 0.0 t = K.eval(stochastic_ternary(alpha="auto")(c_1000)) for i in range(10): print("stochastic_ternary({}) =".format(i), t[i]) print(" st_all =", np.round( np.sum(t.astype(np.float32), axis=0).astype(np.float16) / 1000.0, 2).astype(np.float16)) print(" ternary =", K.eval(ternary(threshold=0.5)(c)).astype(np.int32)) c = K.constant(np.arange(-1.5, 1.51, 0.3)) print(" c =", K.eval(c).astype(np.float16)) print(" b_10 =", K.eval(binary(1)(c)).astype(np.float16)) print("qr_10 =", K.eval(quantized_relu(1,0)(c)).astype(np.float16)) print("qr_11 =", K.eval(quantized_relu(1,1)(c)).astype(np.float16)) print("qr_20 =", K.eval(quantized_relu(2,0)(c)).astype(np.float16)) print("qr_21 =", K.eval(quantized_relu(2,1)(c)).astype(np.float16)) print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16)) print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16)) print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16)) print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16)) print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16)) print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16)) print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16)) print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16)) set_internal_sigmoid("smooth"); print("with smooth sigmoid") print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16)) print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16)) print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16)) print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16)) print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16)) print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16)) print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16)) print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16)) set_internal_sigmoid("real"); print("with real sigmoid") print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16)) print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16)) print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16)) print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16)) print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16)) print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16)) print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16)) print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16)) set_internal_sigmoid("hard") print(" c =", K.eval(c).astype(np.float16)) print("q2_31 =", K.eval(quantized_po2(3,1)(c)).astype(np.float16)) print("q2_32 =", K.eval(quantized_po2(3,2)(c)).astype(np.float16)) print("qr2_21 =", K.eval(quantized_relu_po2(2,1)(c)).astype(np.float16)) print("qr2_22 =", K.eval(quantized_relu_po2(2,2)(c)).astype(np.float16)) print("qr2_44 =", K.eval(quantized_relu_po2(4,1)(c)).astype(np.float16)) # stochastic rounding c = K.constant(np.arange(-1.5, 1.51, 0.3)) print("q2_32_2 =", K.eval(quantized_relu_po2(32,2)(c)).astype(np.float16)) b = K.eval(stochastic_binary()(c_1000)).astype(np.int32) for i in range(5): print("sbinary({}) =".format(i), b[i]) print("sbinary =", np.round(np.sum(b, axis=0) / 1000.0, 2).astype(np.float16)) print(" binary =", K.eval(binary()(c)).astype(np.int32)) print(" c =", K.eval(c).astype(np.float16)) for i in range(10): print(" s_bin({}) =".format(i), K.eval(binary(use_stochastic_rounding=1)(c)).astype(np.int32)) for i in range(10): print(" s_po2({}) =".format(i), K.eval(quantized_po2(use_stochastic_rounding=1)(c)).astype(np.int32)) for i in range(10): print( " s_relu_po2({}) =".format(i), K.eval(quantized_relu_po2(use_stochastic_rounding=1)(c)).astype( np.int32)) if __name__ == '__main__': main() ================================================ FILE: examples/example_b2t.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements total/partial Binary to Thermometer decoder.""" import numpy as np from qkeras import BinaryToThermometer if __name__ == "__main__": np.random.seed(42) x = np.array(range(8)) b = BinaryToThermometer(x, 2, 8) print(b) b = BinaryToThermometer(x, 2, 8, 1) print(b) b = BinaryToThermometer(x, 2, 8, 1, use_two_hot_encoding=1) print(b) b = BinaryToThermometer(x, 4, 8) print(b) b = BinaryToThermometer(x, 4, 8, 1) print(b) b = BinaryToThermometer(x, 4, 8, 1, use_two_hot_encoding=1) print(b) x = np.random.randint(0, 255, (100, 28, 28, 1)) print(x[0, 0, 0:5]) b = BinaryToThermometer(x, 8, 256, 0) print(x.shape, b.shape) print(b[0, 0, 0:5]) b = BinaryToThermometer(x, 8, 256, 1) print(b[0, 0, 0:5]) x = np.random.randint(0, 255, (100, 28, 28, 2)) b = BinaryToThermometer(x, 8, 256, 0, 1) print(x.shape, b.shape) print(x[0, 0, 0, 0:2]) print(b[0, 0, 0, 0:8]) print(b[0, 0, 0, 8:16]) ================================================ FILE: examples/example_cifar10_po2.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests qcore model with po2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from collections import defaultdict import tensorflow.keras.backend as K from tensorflow.keras.datasets import cifar10 from tensorflow.keras.layers import * from tensorflow.keras.models import Model from tensorflow.keras.optimizers import * from tensorflow.keras.utils import to_categorical import numpy as np from qkeras import * np.random.seed(42) NB_EPOCH = 50 BATCH_SIZE = 64 VERBOSE = 1 NB_CLASSES = 10 OPTIMIZER = Adam(lr=0.0001) VALIDATION_SPLIT = 0.1 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255.0 x_test /= 255.0 print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") print(y_train[0:10]) y_train = to_categorical(y_train, NB_CLASSES) y_test = to_categorical(y_test, NB_CLASSES) x = x_in = Input(x_train.shape[1:], name="input") x = QActivation("quantized_relu_po2(4,4)", name="acti")(x) x = QConv2D( 128, (3, 3), strides=1, kernel_quantizer=quantized_po2(4, 1), bias_quantizer=quantized_po2(4, 4), bias_range=4, name="conv2d_0_m")( x) x = QActivation("ternary()", name="act0_m")(x) x = MaxPooling2D(2, 2, name="mp_0")(x) x = QConv2D( 256, (3, 3), strides=1, kernel_quantizer=quantized_po2(4, 1), bias_quantizer=quantized_po2(4, 4), bias_range=4, name="conv2d_1_m")( x) x = QActivation("quantized_relu(6,2)", name="act1_m")(x) x = MaxPooling2D(2, 2, name="mp_1")(x) x = QConv2D( 128, (3, 3), strides=1, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="conv2d_2_m")( x) x = QActivation("quantized_relu(4,2)", name="act2_m")(x) x = MaxPooling2D(2, 2, name="mp_2")(x) x = Flatten()(x) x = QDense( NB_CLASSES, kernel_quantizer=quantized_ulaw(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense")( x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile( loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if int(os.environ.get("TRAIN", 0)): history = model.fit( x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE, validation_split=VALIDATION_SPLIT) outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in [ "QActivation", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D" ]: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=[x_in], outputs=outputs) outputs = model_debug.predict(x_train) print("{:30} {: 8.4f} {: 8.4f}".format( "input", np.min(x_train), np.max(x_train))) for n, p in zip(output_names, outputs): print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="") layer = model.get_layer(n) for i, weights in enumerate(layer.get_weights()): weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") print("") score = model.evaluate(x_test, y_test, verbose=VERBOSE) print("Test score:", score[0]) print("Test accuracy:", score[1]) model.summary() print_qstats(model) ================================================ FILE: examples/example_keras_to_qkeras.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests automatic conversion of keras model to qkeras.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import defaultdict from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import * from tensorflow.keras.models import Model from qkeras.estimate import print_qstats from qkeras.utils import model_quantize from qkeras.utils import quantized_model_dump x0 = x_in0 = Input((28, 28, 1), name="input0") x1 = x_in1 = Input((28, 28, 1), name="input1") x = Concatenate(name="concat")([x0, x1]) x = Conv2D(128, (3, 3), strides=1, name="conv2d_0_m")(x) x = Activation("relu", name="act0_m")(x) x = MaxPooling2D(2, 2, name="mp_0")(x) x = Conv2D(256, (3, 3), strides=1, name="conv2d_1_m")(x) x = Activation("relu", name="act1_m")(x) x = MaxPooling2D(2, 2, name="mp_1")(x) x = Conv2D(128, (3, 3), strides=1, name="conv2d_2_m")(x) x = Activation("relu", name="act2_m")(x) x = MaxPooling2D(2, 2, name="mp_2")(x) x = Flatten()(x) x = Dense(10, name="dense")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in0, x_in1], outputs=[x]) model.summary() q_dict = { "conv2d_0_m": { "kernel_quantizer": "binary()", "bias_quantizer": "quantized_bits(4,0,1)" }, "conv2d_1_m": { "kernel_quantizer": "ternary()", "bias_quantizer": "quantized_bits(4,0,1)" }, "act2_m": "quantized_relu(6,2)", "QActivation": { "relu": "quantized_relu(4,0)" }, "QConv2D": { "kernel_quantizer": "quantized_bits(4,0,1)", "bias_quantizer": "quantized_bits(4,0,1)" }, "QDense": { "kernel_quantizer": "quantized_bits(3,0,1)", "bias_quantizer": "quantized_bits(3,0,1)" } } qmodel = model_quantize(model, q_dict, 4) qmodel.summary() print_qstats(qmodel) (x_train, y_train), (x_test, y_test) = mnist.load_data() x_test_arr = [x_test[0:10,:], x_test[0:10,:]] quantized_model_dump( qmodel, x_test_arr, layers_to_dump=["input0", "input1", "act2_m", "act1_m", "act0_m"]) ================================================ FILE: examples/example_mnist.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """uses po2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from collections import defaultdict import tensorflow.keras.backend as K from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.layers import * from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.optimizers import SGD from tensorflow.keras.utils import to_categorical from qkeras import * from qkeras.utils import model_save_quantized_weights import numpy as np import tensorflow.compat.v1 as tf np.random.seed(42) NB_EPOCH = 100 BATCH_SIZE = 64 VERBOSE = 1 NB_CLASSES = 10 OPTIMIZER = Adam(lr=0.0001, decay=0.000025) VALIDATION_SPLIT = 0.1 train = 1 (x_train, y_train), (x_test, y_test) = mnist.load_data() RESHAPED = 784 x_test_orig = x_test x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train = x_train[..., np.newaxis] x_test = x_test[..., np.newaxis] x_train /= 256.0 x_test /= 256.0 print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") print(y_train[0:10]) y_train = to_categorical(y_train, NB_CLASSES) y_test = to_categorical(y_test, NB_CLASSES) x = x_in = Input( x_train.shape[1:-1] + (1,), name="input") x = QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m")(x) x = QActivation("quantized_relu(4,0)", name="act0_m")(x) x = QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m")(x) x = QActivation("quantized_relu(4,0)", name="act1_m")(x) x = QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m")(x) x = QActivation("quantized_relu(4,0)", name="act2_m")(x) x = Flatten()(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense")(x) x_out = x x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) mo = Model(inputs=[x_in], outputs=[x_out]) model.summary() model.compile( loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if train: history = model.fit( x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE, validation_split=VALIDATION_SPLIT) outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in ["QActivation", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D"]: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=[x_in], outputs=outputs) outputs = model_debug.predict(x_train) print("{:30} {: 8.4f} {: 8.4f}".format( "input", np.min(x_train), np.max(x_train))) for n, p in zip(output_names, outputs): print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="") layer = model.get_layer(n) for i, weights in enumerate(layer.get_weights()): weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") print("") p_test = mo.predict(x_test) p_test.tofile("p_test.bin") score = model.evaluate(x_test, y_test, verbose=VERBOSE) print("Test score:", score[0]) print("Test accuracy:", score[1]) all_weights = [] model_save_quantized_weights(model) for layer in model.layers: for w, weights in enumerate(layer.get_weights()): print(layer.name, w) all_weights.append(weights.flatten()) all_weights = np.concatenate(all_weights).astype(np.float32) print(all_weights.size) for layer in model.layers: for w, weight in enumerate(layer.get_weights()): print(layer.name, w, weight.shape) print_qstats(model) ================================================ FILE: examples/example_mnist_ae.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """uses po2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from collections import defaultdict import tensorflow.keras.backend as K from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.layers import * from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.optimizers import SGD from tensorflow.keras.utils import to_categorical from qkeras import * from qkeras.utils import model_save_quantized_weights import numpy as np import tensorflow.compat.v1 as tf np.random.seed(42) NB_EPOCH = 100 BATCH_SIZE = 64 VERBOSE = 1 NB_CLASSES = 10 OPTIMIZER = Adam(lr=0.0001, decay=0.000025) VALIDATION_SPLIT = 0.1 train = 1 (x_train, y_train), (x_test, y_test) = mnist.load_data() RESHAPED = 784 x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train = x_train[..., np.newaxis] x_test = x_test[..., np.newaxis] x_train /= 256.0 x_test /= 256.0 print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") print(y_train[0:10]) y_train = to_categorical(y_train, NB_CLASSES) y_test = to_categorical(y_test, NB_CLASSES) x = x_in = Input( x_train.shape[1:-1] + (1,)) x = QConv2D( 32, kernel_size=(3, 3), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x = QActivation("quantized_relu(4,0)")(x) x = QConv2D( 16, kernel_size=(3, 3), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x = QActivation("quantized_relu(4,0)")(x) x = QConv2D( 8, kernel_size=(3, 3), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x = QActivation("quantized_relu(4,0)")(x) x = QConv2DTranspose( 8, kernel_size=(3, 3), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x = QActivation("quantized_relu(4,0)")(x) x = QConv2DTranspose( 16, kernel_size=(3, 3), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x = QActivation("quantized_relu(4,0)")(x) x = QConv2DTranspose( 32, kernel_size=(3, 3), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x = QActivation("quantized_relu(4,0)")(x) x = QConv2D( 1, kernel_size=(3, 3), padding="same", kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1))(x) x_out = x x = Activation("sigmoid")(x) model = Model(inputs=[x_in], outputs=[x]) mo = Model(inputs=[x_in], outputs=[x_out]) model.summary() model.compile( loss="binary_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if train: history = model.fit( x_train, x_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE, validation_split=VALIDATION_SPLIT) # Generate reconstructions num_reco = 8 samples = x_test[:num_reco] targets = y_test[:num_reco] reconstructions = model.predict(samples) for layer in model.layers: for w, weight in enumerate(layer.get_weights()): print(layer.name, w, weight.shape) print_qstats(model) ================================================ FILE: examples/example_mnist_b2t.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests qcore model with BinaryToThermometer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tensorflow.keras.backend as K from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.layers import * from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.optimizers import SGD from tensorflow.keras.utils import to_categorical import numpy as np from qkeras import * np.random.seed(42) NB_EPOCH = 20 BATCH_SIZE = 32 VERBOSE = 1 NB_CLASSES = 10 OPTIMIZER = Adam(lr=0.0001) N_HIDDEN = 100 VALIDATION_SPLIT = 0.1 T_CLASSES = 256 T_WITH_RESIDUE = 0 (x_train, y_train), (x_test, y_test) = mnist.load_data() RESHAPED = 784 x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train = x_train[..., np.newaxis] x_test = x_test[..., np.newaxis] if T_CLASSES == 1: x_train /= 256.0 x_test /= 256.0 print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") print(y_train[0:10]) # x_train = x_train[0:1000] # y_train = y_train[0:1000] # x_test = x_test[0:100] # y_test = y_test[0:100] y_train = to_categorical(y_train, NB_CLASSES) y_test = to_categorical(y_test, NB_CLASSES) # we ran out of memory here, so we split x_train/x_test into smaller groups x = x_in = Input( x_train.shape[1:-1] + (T_CLASSES,), name="input") # Number is represented as 1.bbb, where number of bits of bbb is # log2(256/T_CLASSES) if T_WITH_RESIDUE == 1 bits = ( (T_WITH_RESIDUE == 1) * int(np.ceil(np.log2(256/T_CLASSES))) + (T_CLASSES > 1) ) print("Input quantizer: quantized_relu({},{})".format(bits, int(T_CLASSES > 1))) x = QActivation("quantized_relu({},{})".format(bits, int(T_CLASSES > 1)))(x) x = QConv2D( 64, (3, 3), strides=1, padding="same", kernel_quantizer=quantized_po2(4,1), bias_quantizer=quantized_bits(4,2,1), bias_range=4, name="conv2d_0_m")(x) x = QActivation("quantized_relu(4,0)", name="act0_m")(x) x = MaxPooling2D(2,2,name="mp_0")(x) x = QConv2D( 32, (3, 3), strides=1, padding="same", kernel_quantizer=stochastic_ternary(), bias_quantizer=quantized_bits(8,5,1), bias_range=32, name="conv2d_1_m")(x) x = QActivation("quantized_relu(4,0)", name="act1_m")(x) x = MaxPooling2D(2,2,name="mp_1")(x) x = QConv2D( 16, (3, 3), strides=1, padding="same", kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(8,5,1), bias_range=32, name="conv2d_2_m")(x) x = QActivation("quantized_relu(6,2)", name="act2_m")(x) x = MaxPooling2D(2,2,name="mp_2")(x) x = Flatten()(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense2")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile( loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in ["QActivation", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D"]: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=[x_in], outputs=outputs) batch_size = 1000 * BATCH_SIZE n_batches = x_train.shape[0] // batch_size if T_CLASSES > 1: x_test = BinaryToThermometer(x_test, T_CLASSES, 256, T_WITH_RESIDUE) if int(os.environ.get("TRAIN", 0)): for i in range(NB_EPOCH): for b in range(n_batches): min_b = b * batch_size max_b = (b + 1) * batch_size if max_b > x_train.shape[0]: max_b = x_train.shape[0] if T_CLASSES > 1: x = BinaryToThermometer( x_train[min_b:max_b], T_CLASSES, 256, T_WITH_RESIDUE) else: x = x_train[min_b:max_b] history = model.fit( x, y_train[min_b:max_b], batch_size=BATCH_SIZE, epochs=i+1, initial_epoch=i, verbose=VERBOSE, validation_split=VALIDATION_SPLIT) if T_CLASSES > 1: x = BinaryToThermometer(x_train[0:100], T_CLASSES, 256, T_WITH_RESIDUE) else: x = x_train[0:100] outputs = model_debug.predict(x) print("{:30} {: 8.4f} {: 8.4f}".format("input", np.min(x), np.max(x))) for n, p in zip(output_names, outputs): print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="") layer = model.get_layer(n) for i, weights in enumerate(layer.get_weights()): weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") print("") score = model.evaluate(x_test, y_test, verbose=VERBOSE) print("Test score:", score[0]) print("Test accuracy:", score[1]) print_qstats(model) acc = analyze_accumulator_from_sample(model, x_test, mode="sampled") print(acc) ================================================ FILE: examples/example_mnist_bn.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests mnist batchnormalization used as learned scale factor.""" # to run, THRESHOLD=0.05 WITH_BN=1 EPOCHS=5 TRAIN=1 python example_mnist_bn.py from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import defaultdict import os import numpy as np from six.moves import zip from tensorflow.keras import callbacks import tensorflow.keras.backend as K from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import * from tensorflow.keras.models import Model from tensorflow.keras.optimizers import * from tensorflow.keras.utils import to_categorical from qkeras import * np.random.seed(42) TRAIN = 1 NB_EPOCH = 2 BATCH_SIZE = 64 VERBOSE = 1 NB_CLASSES = 10 OPTIMIZER = Adam(lr=0.0001) VALIDATION_SPLIT = 0.1 WITH_BN = 1 THRESHOLD = 0.1 class LearningRateAdjuster(callbacks.Callback): def __init__(self): self.learning_rate_factor = 1.0 pass def on_epoch_end(self, epochs, logs): max_variance = -1 for layer in self.model.layers: if layer.__class__.__name__ in [ "BatchNormalization", "QBatchNormalization" ]: variance = np.max(layer.get_weights()[-1]) if variance > max_variance: max_variance = variance if max_variance > 32 and self.learning_rate_factor < 100: learning_rate = K.get_value(self.model.optimizer.learning_rate) self.learning_rate_factor /= 2.0 print("***** max_variance is {} / lr is {} *****".format( max_variance, learning_rate)) K.eval(K.update( self.model.optimizer.learning_rate, learning_rate / 2.0 )) lra = LearningRateAdjuster() (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape + (1,)).astype("float32") x_test = x_test.reshape(x_test.shape + (1,)).astype("float32") x_train /= 256.0 x_test /= 256.0 print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") print(y_train[0:10]) y_train = to_categorical(y_train, NB_CLASSES) y_test = to_categorical(y_test, NB_CLASSES) x = x_in = Input(x_train.shape[1:], name="input") #x = QActivation("quantized_relu_po2(4,1)", name="acti")(x) x = QConv2D( 128, (3, 3), strides=1, kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_po2(4, 1), bias_quantizer=quantized_bits(4,2,0) if not WITH_BN else None, bias_range=4 if not WITH_BN else None, use_bias=not WITH_BN, name="conv2d_0_m")(x) if WITH_BN: x = QBatchNormalization( gamma_quantizer=quantized_relu_po2(4,8), variance_quantizer=quantized_relu_po2(6), beta_quantizer=quantized_po2(4, 4), gamma_range=8, beta_range=4, name="bn0")(x) x = QActivation("quantized_relu(3,1)", name="act0_m")(x) x = MaxPooling2D(2, 2, name="mp_0")(x) x = QConv2D( 256, (3, 3), strides=1, kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_bits(2,0,1), bias_quantizer=quantized_bits(4,2,1) if not WITH_BN else None, bias_range=4 if not WITH_BN else None, use_bias=not WITH_BN, name="conv2d_1_m")(x) if WITH_BN: x = QBatchNormalization( gamma_quantizer=quantized_relu_po2(4,8), variance_quantizer=quantized_relu_po2(6), beta_quantizer=quantized_po2(4, 4), gamma_range=8, beta_range=4, name="bn1")(x) x = QActivation("quantized_relu(3,1)", name="act1_m")(x) x = MaxPooling2D(2, 2, name="mp_1")(x) x = QConv2D( 128, (3, 3), strides=1, kernel_quantizer=ternary(threshold=THRESHOLD), #quantized_bits(2,0,1), bias_quantizer=quantized_bits(4,2,1) if not WITH_BN else None, bias_range=4 if not WITH_BN else None, use_bias=not WITH_BN, name="conv2d_2_m")(x) if WITH_BN: x = QBatchNormalization( gamma_quantizer=quantized_relu_po2(4,8), variance_quantizer=quantized_relu_po2(6), beta_quantizer=quantized_po2(4, 4), gamma_range=8, beta_range=4, name="bn2")(x) x = QActivation("quantized_relu(3,1)", name="act2_m")(x) x = MaxPooling2D(2, 2, name="mp_2")(x) x = Flatten()(x) x = QDense( NB_CLASSES, kernel_quantizer=quantized_ulaw(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense")( x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile( loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if TRAIN: history = model.fit( x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE, validation_split=VALIDATION_SPLIT, callbacks=[]) #lra]) outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in [ "QActivation", "QBatchNormalization", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D" ]: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=[x_in], outputs=outputs) outputs = model_debug.predict(x_train) print("{:30} {: 8.4f} {: 8.4f}".format( "input", np.min(x_train), np.max(x_train))) for n, p in zip(output_names, outputs): print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="") layer = model.get_layer(n) for i, weights in enumerate(layer.get_weights()): if layer.get_quantizers()[i]: weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") print("") score = model.evaluate(x_test, y_test, verbose=False) print("Test score:", score[0]) print("Test accuracy:", score[1]) print_qstats(model) ================================================ FILE: examples/example_mnist_po2.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests qlayers model with po2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow.keras.backend as K from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.utils import to_categorical import numpy as np from qkeras import * # pylint: disable=wildcard-import np.random.seed(42) NB_EPOCH = 5 BATCH_SIZE = 64 VERBOSE = 1 NB_CLASSES = 10 OPTIMIZER = Adam(lr=0.0001, decay=0.000025) N_HIDDEN = 100 VALIDATION_SPLIT = 0.1 QUANTIZED = 1 CONV2D = 1 (x_train, y_train), (x_test, y_test) = mnist.load_data() RESHAPED = 784 x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train = x_train[..., np.newaxis] x_test = x_test[..., np.newaxis] x_train /= 256.0 x_test /= 256.0 train = False print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") print(y_train[0:10]) y_train = to_categorical(y_train, NB_CLASSES) y_test = to_categorical(y_test, NB_CLASSES) # we ran out of memory here, so we split x_train/x_test into smaller groups x = x_in = Input(x_train.shape[1:-1] + (1,), name="input") x = QActivation("quantized_relu_po2(4)", name="acti")(x) x = QConv2D( 32, (2, 2), strides=(2, 2), kernel_quantizer=quantized_po2(4, 1), bias_quantizer=quantized_po2(4, 1), name="conv2d_0_m")( x) x = QActivation("quantized_relu_po2(4,4)", name="act0_m")(x) x = QConv2D( 64, (3, 3), strides=(2, 2), kernel_quantizer=quantized_po2(4, 1), bias_quantizer=quantized_po2(4, 1), name="conv2d_1_m")( x) x = QActivation("quantized_relu_po2(4,4,use_stochastic_rounding=True)", name="act1_m")(x) x = QConv2D( 64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_po2(4, 1, use_stochastic_rounding=True), bias_quantizer=quantized_po2(4, 1), name="conv2d_2_m")( x) x = QActivation("quantized_relu(4,1)", name="act2_m")(x) x = Flatten()(x) x = QDense( NB_CLASSES, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense")( x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile( loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if train: history = model.fit( x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, initial_epoch=1, verbose=VERBOSE, validation_split=VALIDATION_SPLIT) outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in [ "QActivation", "Activation", "QDense", "QConv2D", "QDepthwiseConv2D" ]: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=[x_in], outputs=outputs) outputs = model_debug.predict(x_train) print("{:30} {: 8.4f} {: 8.4f}".format( "input", np.min(x_train), np.max(x_train))) for n, p in zip(output_names, outputs): print("{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p), np.max(p)), end="") layer = model.get_layer(n) for i, weights in enumerate(layer.get_weights()): weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") print("") score = model.evaluate(x_test, y_test, verbose=VERBOSE) print("Test score:", score[0]) print("Test accuracy:", score[1]) model.summary() print_qstats(model) ================================================ FILE: examples/example_mnist_prune.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Example of mnist model with pruning. Adapted from TF model optimization example.""" import tempfile import numpy as np import tensorflow.keras.backend as K from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.models import Sequential from tensorflow.keras.models import save_model from tensorflow.keras.utils import to_categorical from qkeras import QActivation from qkeras import QDense from qkeras import QConv2D from qkeras import quantized_bits from qkeras.utils import load_qmodel from qkeras.utils import print_model_sparsity from tensorflow_model_optimization.python.core.sparsity.keras import prune from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule batch_size = 128 num_classes = 10 epochs = 12 prune_whole_model = True # Prune whole model or just specified layers def build_model(input_shape): x = x_in = Input(shape=input_shape, name="input") x = QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m")(x) x = QActivation("quantized_relu(4,0)", name="act0_m")(x) x = QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m")(x) x = QActivation("quantized_relu(4,0)", name="act1_m")(x) x = QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m")(x) x = QActivation("quantized_relu(4,0)", name="act2_m")(x) x = Flatten()(x) x = QDense(num_classes, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return model def build_layerwise_model(input_shape, **pruning_params): return Sequential([ prune.prune_low_magnitude( QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m"), input_shape=input_shape, **pruning_params), QActivation("quantized_relu(4,0)", name="act0_m"), prune.prune_low_magnitude( QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m"), **pruning_params), QActivation("quantized_relu(4,0)", name="act1_m"), prune.prune_low_magnitude( QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m"), **pruning_params), QActivation("quantized_relu(4,0)", name="act2_m"), Flatten(), prune.prune_low_magnitude( QDense( num_classes, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense"), **pruning_params), Activation("softmax", name="softmax") ]) def train_and_save(model, x_train, y_train, x_test, y_test): model.compile( loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) # Print the model summary. model.summary() # Add a pruning step callback to peg the pruning step to the optimizer's # step. Also add a callback to add pruning summaries to tensorboard callbacks = [ pruning_callbacks.UpdatePruningStep(), #pruning_callbacks.PruningSummaries(log_dir=tempfile.mkdtemp()) pruning_callbacks.PruningSummaries(log_dir="/tmp/mnist_prune") ] model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print("Test loss:", score[0]) print("Test accuracy:", score[1]) print_model_sparsity(model) # Export and import the model. Check that accuracy persists. _, keras_file = tempfile.mkstemp(".h5") print("Saving model to: ", keras_file) save_model(model, keras_file) print("Reloading model") with prune.prune_scope(): loaded_model = load_qmodel(keras_file) score = loaded_model.evaluate(x_test, y_test, verbose=0) print("Test loss:", score[0]) print("Test accuracy:", score[1]) def main(): # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == "channels_first": x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255 x_test /= 255 print("x_train shape:", x_train.shape) print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") # convert class vectors to binary class matrices y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) pruning_params = { "pruning_schedule": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100) } if prune_whole_model: model = build_model(input_shape) model = prune.prune_low_magnitude(model, **pruning_params) else: model = build_layerwise_model(input_shape, **pruning_params) train_and_save(model, x_train, y_train, x_test, y_test) if __name__ == "__main__": main() ================================================ FILE: examples/example_qdense.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests qdense model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse from tensorflow.keras.datasets import mnist from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.utils import to_categorical import numpy as np from qkeras import print_qstats from qkeras import QActivation from qkeras import QDense from qkeras import quantized_bits from qkeras import ternary np.random.seed(42) OPTIMIZER = Adam() NB_EPOCH = 1 BATCH_SIZE = 32 VERBOSE = 1 NB_CLASSES = 10 N_HIDDEN = 100 VALIDATION_SPLIT = 0.1 RESHAPED = 784 def QDenseModel(weights_f, load_weights=False): """Construct QDenseModel.""" x = x_in = Input((RESHAPED,), name="input") x = QActivation("quantized_relu(4)", name="act_i")(x) x = QDense(N_HIDDEN, kernel_quantizer=ternary(), bias_quantizer=quantized_bits(4, 0, 1), name="dense0")(x) x = QActivation("quantized_relu(2)", name="act0")(x) x = QDense( NB_CLASSES, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense2")( x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if load_weights and weights_f: model.load_weights(weights_f) print_qstats(model) return model def UseNetwork(weights_f, load_weights=False): """Use DenseModel. Args: weights_f: weight file location. load_weights: load weights when it is True. """ model = QDenseModel(weights_f, load_weights) batch_size = BATCH_SIZE (x_train_, y_train_), (x_test_, y_test_) = mnist.load_data() x_train_ = x_train_.reshape(60000, RESHAPED) x_test_ = x_test_.reshape(10000, RESHAPED) x_train_ = x_train_.astype("float32") x_test_ = x_test_.astype("float32") x_train_ /= 255 x_test_ /= 255 print(x_train_.shape[0], "train samples") print(x_test_.shape[0], "test samples") y_train_ = to_categorical(y_train_, NB_CLASSES) y_test_ = to_categorical(y_test_, NB_CLASSES) if not load_weights: model.fit( x_train_, y_train_, batch_size=batch_size, epochs=NB_EPOCH, verbose=VERBOSE, validation_split=VALIDATION_SPLIT) if weights_f: model.save_weights(weights_f) score = model.evaluate(x_test_, y_test_, verbose=VERBOSE) print_qstats(model) print("Test score:", score[0]) print("Test accuracy:", score[1]) def ParserArgs(): parser = argparse.ArgumentParser() parser.add_argument("-l", "--load_weight", default="0", help="""load weights directly from file. 0 is to disable and train the network.""") parser.add_argument("-w", "--weight_file", default=None) a = parser.parse_args() return a if __name__ == "__main__": args = ParserArgs() lw = False if args.load_weight == "0" else True UseNetwork(args.weight_file, load_weights=lw) ================================================ FILE: examples/example_qoctave.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """QOctave example.""" import numpy as np import sys from tensorflow.keras import activations from tensorflow.keras import initializers import tensorflow.keras.backend as K from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.utils import to_categorical from functools import partial from qkeras import * # pylint: disable=wildcard-import def create_model(): """use qocatve in network.""" kernel_initializer=initializers.he_normal(seed=42) x = x_in = Input(shape=(256, 256, 3)) # Block 1 high, low = QOctaveConv2D( 32, (3, 3), alpha=0.5, strides=(2, 2), padding='valid', kernel_initializer=kernel_initializer, bias_initializer="zeros", bias_quantizer="quantized_bits(4,1)", depthwise_quantizer="quantized_bits(4,1)", depthwise_activation="quantized_bits(6,2,1)", pointwise_quantizer="quantized_bits(4,1)", acc_quantizer="quantized_bits(16,7,1)", activation="quantized_relu(6,2)", use_separable=True, name='block1_conv1')([x, None]) # Block 2 high, low = QOctaveConv2D( 64, (3, 3), alpha=0.4, strides=(2, 2), padding='same', kernel_initializer=kernel_initializer, bias_initializer="zeros", bias_quantizer="quantized_bits(4,1)", depthwise_quantizer="quantized_bits(4,1)", depthwise_activation="quantized_bits(6,2,1)", pointwise_quantizer="quantized_bits(4,1)", acc_quantizer="quantized_bits(16,7,1)", activation="quantized_relu(6,2)", use_separable=True, name='block2_conv1')([high, low]) # Block 3 high, low = QOctaveConv2D( 64, (3, 3), alpha=0.4, strides=(2, 2), padding='same', kernel_initializer=kernel_initializer, bias_initializer="zeros", bias_quantizer="quantized_bits(4,1)", depthwise_quantizer="quantized_bits(4,1)", depthwise_activation="quantized_bits(6,2,1)", pointwise_quantizer="quantized_bits(4,1)", acc_quantizer="quantized_bits(16,7,1)", activation="quantized_relu(6,2)", use_separable=True, name='block3_conv1')([high, low]) high, low = QOctaveConv2D( 32, (3, 3), alpha=0.4, strides=(1, 1), padding='same', kernel_initializer=kernel_initializer, bias_initializer='zeros', bias_quantizer="quantized_bits(4,1)", depthwise_quantizer="quantized_bits(4,1)", depthwise_activation="quantized_bits(6,2,1)", pointwise_quantizer="quantized_bits(4,1)", acc_quantizer="quantized_bits(16,7,1)", activation="quantized_relu(6,2)", use_separable=True, name='block3_conv2')([high, low]) high, low = QOctaveConv2D( 32, (3, 3), alpha=0.3, strides=(1, 1), padding='same', kernel_initializer=kernel_initializer, bias_initializer='zeros', bias_quantizer="quantized_bits(4,1)", depthwise_quantizer="quantized_bits(4,1)", depthwise_activation="quantized_bits(6,2,1)", pointwise_quantizer="quantized_bits(4,1)", acc_quantizer="quantized_bits(16,7,1)", activation="quantized_relu(6,2)", use_separable=True, name='block3_conv3')([high, low]) x, _ = QOctaveConv2D( 32, (3, 3), alpha=0.0, strides=(2, 2), padding='same', kernel_initializer=kernel_initializer, bias_initializer='zeros', bias_quantizer="quantized_bits(4,1)", depthwise_quantizer="quantized_bits(4,1)", depthwise_activation="quantized_bits(6,2,1)", pointwise_quantizer="quantized_bits(4,1)", acc_quantizer="quantized_bits(16,7,1)", activation="quantized_relu(6,2)", use_separable=True, name='block3_conv_down')([high, low]) # Upsample x = UpSampling2D(size=(2, 2), data_format="channels_last")(x) x = QConv2D( 2, (2, 2), strides=(1, 1), kernel_initializer=kernel_initializer, bias_initializer="ones", kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), padding="same", name="conv_up")( x) x = Activation("softmax", name="softmax")(x) output = x model = Model(x_in, output, name='qoctave_network') return model # Create the model def customLoss(y_true,y_pred): log1 = 1.5 * y_true * K.log(y_pred + 1e-9) * K.pow(1-y_pred, 2) log0 = 0.5 * (1 - y_true) * K.log((1 - y_pred) + 1e-9) * K.pow(y_pred, 2) return (- K.sum(K.mean(log0 + log1, axis = 0))) if __name__ == '__main__': model = create_model() model.compile(optimizer="Adam", loss=customLoss, metrics=['acc']) model.summary(line_length=100) print_qstats(model) ================================================ FILE: examples/example_ternary.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import # Not necessary in a Python 3-only module from __future__ import division # Not necessary in a Python 3-only module from __future__ import print_function # Not necessary in a Python 3-only module from absl import app from absl import flags import matplotlib import numpy as np matplotlib.use('TkAgg') import matplotlib.pyplot as plt FLAGS = flags.FLAGS def _stochastic_rounding(x, precision, resolution, delta): """Stochastic_rounding for ternary. Args: x: precision: A float. The area we want to make this stochastic rounding. [delta-precision, delta] [delta, delta+precision] resolution: control the quantization resolution. delta: the undiscountinued point (positive number) Return: A tensor with stochastic rounding numbers. """ delta_left = delta - precision delta_right = delta + precision scale = 1 / resolution scale_delta_left = delta_left * scale scale_delta_right = delta_right * scale scale_2_delta = scale_delta_right - scale_delta_left scale_x = x * scale fraction = scale_x - scale_delta_left # print(precision, scale, x[0], np.floor(scale_x[0]), scale_x[0], fraction[0]) # we use uniform distribution random_selector = np.random.uniform(0, 1, size=x.shape) * scale_2_delta # print(precision, scale, x[0], delta_left[0], delta_right[0]) # print('x', scale_x[0], fraction[0], random_selector[0], scale_2_delta[0]) # rounddown = fraction < random_selector result = np.where(fraction < random_selector, scale_delta_left / scale, scale_delta_right / scale) return result def _ternary(x, sto=False): m = np.amax(np.abs(x), keepdims=True) scale = 2 * m / 3.0 thres = scale / 2.0 ratio = 0.1 if sto: sign_bit = np.sign(x) x = np.abs(x) prec = x / scale x = ( sign_bit * scale * _stochastic_rounding( x / scale, precision=0.3, resolution=0.01, # those two are all normalized. delta=thres / scale)) # prec + prec *ratio) # mm = np.amax(np.abs(x), keepdims=True) return np.where(np.abs(x) < thres, np.zeros_like(x), np.sign(x)) def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # x = np.arange(-3.0, 3.0, 0.01) # x = np.random.uniform(-0.01, 0.01, size=1000) x = np.random.uniform(-10.0, 10.0, size=1000) # x = np.random.uniform(-1, 1, size=1000) x = np.sort(x) tr = np.zeros_like(x) t = np.zeros_like(x) iter_count = 500 for _ in range(iter_count): y = _ternary(x) yr = _ternary(x, sto=True) t = t + y tr = tr + yr plt.plot(x, t/iter_count) plt.plot(x, tr/iter_count) plt.ylabel('mean (%s samples)' % iter_count) plt.show() if __name__ == '__main__': app.run(main) ================================================ FILE: experimental/lo/__init__.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Exports logic optimization module.""" from .utils import * # pylint: disable=wildcard-import from .receptive import model_to_receptive_field from .conv2d import optimize_conv2d_logic from .dense import optimize_dense_logic from .optimizer import run_rf_optimizer from .optimizer import run_abc_optimizer from .optimizer import mp_rf_optimizer_func from .table import load from .compress import Compressor from .generate_rf_code import * # __version__ = "0.5.0" ================================================ FILE: experimental/lo/compress.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements faster version of set on multiple strings.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class Compressor: """Implements a hierarchical set class with better performance than a set.""" def __init__(self, hash_only_input=False): self.n_dict = {} self.hash_only_input = hash_only_input def add_entry(self, table_in, table_out=""): """Adds entry (table_in, table_out) to the set.""" line = (table_in, table_out) if self.hash_only_input: h_line = hash(table_in) else: h_line = hash(line) if self.n_dict.get(h_line, None): self.n_dict[h_line] = self.n_dict[h_line].union([line]) else: self.n_dict[h_line] = set([line]) def has_entry(self, table_in, table_out=""): """Checks if table_in is already stored in the set.""" line = (table_in, table_out) if self.hash_only_input: h_line = hash(table_in) else: h_line = hash(line) if not self.n_dict.get(h_line, None): return None set_h_line = self.n_dict[h_line] for (ti, to) in set_h_line: if table_in == ti: return to return None def __call__(self): for key in self.n_dict: for line in self.n_dict[key]: yield line ================================================ FILE: experimental/lo/conv2d.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements convolutional (?, h, w, c) facing input layer optimization.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import multiprocessing as mp import os import shutil from .compress import Compressor import numpy as np import six from tensorflow.keras.models import Model from .utils import get_padding_value DEBUG = int(os.getenv("DEBUG", 0)) OG_IS_SYMBOLIC = 0 def parallel_index_table( p, ni, size, idx_height, idx_width, i_dict, o_dict, kernel, strides, padding, generate_pla): """Processes the table in parallel and use espresso to optimize it.""" print("... indexing table from {} to {} ({} => {})".format( ni, ni+size, p[0].shape, p[1].shape)) table_ins = [] table_ous = [] table_set = Compressor(hash_only_input=True) if DEBUG: table_set_line = {} for n in range(size): # we need to traverse the outputs to compute the input coordinates for ho in idx_height: min_hi = strides[0]*ho - 2*padding[0] max_hi = strides[0]*ho - 2*padding[0] + kernel[0] if min_hi < 0 or max_hi > p[0].shape[0]: continue for wo in idx_width: min_wi = strides[1]*wo - 2*padding[1] max_wi = strides[1]*wo - 2*padding[1] + kernel[1] if min_wi < 0 or max_wi > p[0].shape[1]: continue i_values = p[0][n, min_hi:max_hi, min_wi:max_wi].flatten() # o_values has dimension (1, 1, C_O) o_values = p[1][n, ho, wo] # if we generate a pla entry, we care about a list of # bits. Otherwise, we care about a list of floating point # values. table_i = "".join([i_dict[v] for v in i_values]) table_o = "".join([o_dict[v] for v in o_values]) if generate_pla: table_s = "".join([str(v) for v in table_i]) bit_str = table_s else: table_s = ",".join([str(v) for v in table_i]) table_i = table_s bit_str = "".join(i_dict[v] for v in i_values) is_table_zero = bit_str != "0"*len(bit_str) if table_set.has_entry(table_s) and not is_table_zero: # if table is already stored, we do not store it again. # from time to time, we may want to check if we have found # diverging output values. if DEBUG: (table_o_old, (old_n, old_ho, old_wo)) = table_set_line[table_s] if table_o != table_o_old: print( "contradicting outputs n={} old_n={} out_p={} out={}".format( (n, ho, wo), (old_n, old_ho, old_wo), table_o_old, table_o)) print(" I:", table_s) print(" I:", i_values) print("<<<", table_o_old) print(">>>", table_o) return (None, None) continue # these are unique table entries table_ins.append(table_i) table_ous.append(table_o) # we store this information in order to be able to debug # and discard information. table_set.add_entry(table_s) if DEBUG: table_set_line[table_s] = (table_o, (n, ho, wo)) print("... indexing table from {} to {} completed".format(ni, ni+size)) return (table_ins, table_ous) def parallel_compress_output_table( filename, header, table_ins, table_ous, output_group, generate_pla, n_bits_og, o, o_bits): """Processes in parallel compression of table and writes it to a disk.""" f = open(filename, "w") f.write("".join(header)) c = Compressor() for n in range(len(table_ins)): for og in range(output_group): if output_group > 1: if generate_pla: if OG_IS_SYMBOLIC: og_l = ["0"] * n_bits_og og_l[n_bits_og - 1 - og] = "1" og_b = "".join(og_l) table_i_suffix = " " + og_b else: og_b = bin(og)[2:] table_i_suffix = " " + "0" * (n_bits_og - len(og_b)) + og_b else: table_i_suffix = "," + str(og) else: table_i_suffix = "" table_i = table_ins[n] + table_i_suffix table_o = table_ous[n][(o+og)*o_bits:(o+og+1)*o_bits] if generate_pla: c.add_entry(table_i + " " + table_o) else: c.add_entry(table_i + "," + str(table_o[0])) for line in c(): f.write("{}\n".format(line[0])) if generate_pla: f.write(".e\n") f.close() print("... file {} generated".format(filename)) def optimize_conv2d_logic( model, i_name, o_name, x_train, i_dict=None, o_dict=None, kernel=None, strides=None, padding=None, output_group=1, samples=2000, randomize=None, generate_pla=True, prefix=""): """Generates table for logic synthesis for conv2d or conv2d-like shape. Generates table in either espresso format or csv format to be optimized for logic synthesis. The parameters kernel, strides and padding usually do not require any values, unless we want to embed maxpooling layer or multiple convolutional layers between i_name and o_name. In that case, we require the user to compute the proper kernel, strides, and padding that will correspond to the combined layer, as Keras and tensorflow do not provide a way to compute the receptive field between two layers. Arguments: model: Keras model i_name: name of convolutional layer (input to this layer must be quantized). o_name: name of quantized output layer. x_train: training set to be used to dump table. i_dict: dictionary of floating point values to encoding for inputs. o_dict: dictionary of floating point values to encoding for outputs. kernel: kernel size, to be specified if we want to override convolution kernel. strides: strides, to be specified if we want to override first convolution strides. padding: padding, to be specified if we want to override first convolution padding. output_group: by default, we compute one PE per channel output. The user can override that by specifying how many output channels should be bundled into the same PE. samples: how many images from x_train should be sampled when generating the tables. randomize: if specified, it should be the number of coordinates within the same image we will use to derive the convolution table. generate_pla: if true, we generate table in pla format. Otherwise, we generate a csv file. prefix: prefix name to create directory. Returns: list of files generated. """ # if no i_dict or no o_dict, we do not know how to encode, so we generate # csv file. if not i_dict or not o_dict: generate_pla = False # extract layer from i_name and o_name i_layer = model.get_layer(i_name) o_layer = model.get_layer(o_name) # if kernel is not specified, use the kernel size from i_layer if not kernel: kernel = i_layer.kernel_size # if strides is not specified, use the strides from i_layer if not strides: strides = i_layer.strides # if padding is not specified, use the padding from i_layer if not padding: padding = i_layer.padding # for conv2d, we want a list for kernel, strides and padding if not isinstance(kernel, list) and not isinstance(kernel, tuple): kernel = [kernel, kernel] if not isinstance(strides, list) and not isinstance(strides, tuple): strides = [strides, strides] if not isinstance(padding, list) and not isinstance(padding, tuple): padding = [padding, padding] # compute the padding value padding[0] = get_padding_value(padding[0], kernel[0]) padding[1] = get_padding_value(padding[1], kernel[1]) # resample inputs skip = min(2000, samples) indexes = np.array(range(x_train.shape[0])) np.random.shuffle(indexes) x_train = x_train[indexes[:samples]] # we want to create a smaller model that from inputs generate # i_layer.output + o_layer.output tensors, so that we can predict # its values. outputs = [] x = i_layer.input y = o_layer.output if not isinstance(x, list): x = [x] outputs = x + [y] mo = Model(inputs=model.inputs, outputs=outputs) p = mo.predict(x_train) # in csv mode, each entry has "1" value, for PLA, # we encode the floating point into multiple bits. if not generate_pla: i_bits = 1 # i_dict = {v:v for v in i_dict.keys()} else: i_bits = len(six.next(six.itervalues(i_dict))) if not generate_pla: o_bits = 1 # o_dict = {v:v for v in o_dict.keys()} else: o_bits = len(six.next(six.itervalues(o_dict))) # if randomize is specified, we will sample sqrt(randomize) # from each image, as the conv2d performs the filter everywhere # in the image. Because the same image may contain a lot of # reduntant information, we may want to restrict the number of # samples. if randomize: idx_height = np.random.choice( p[-1].shape[1], int(np.round(np.sqrt(randomize)))) idx_width = np.random.choice( p[-1].shape[2], int(np.round(np.sqrt(randomize)))) else: idx_height = range(p[-1].shape[1]) idx_width = range(p[-1].shape[2]) # this is just to inspect that the inputs and outputs are really quantized. print("inputs:") for i in range(len(x)): print(i, np.min(p[i]), np.max(p[i])) print("outputs:") print(np.min(p[-1]), np.max(p[-1])) # i_size and o_size are the channel sizes of the inputs and outputs o_size = y.shape[-1] i_size = p[0].shape[-1] if generate_pla: suffix = "pla" else: suffix = "csv" prefix = prefix + "/" if prefix else "" # lets try to remove the directory and create a new one try: shutil.rmtree(prefix + i_layer.name + "." + suffix) except OSError: pass try: os.makedirs(prefix + i_layer.name + "." + suffix) except OSError: pass table_ins = list() table_ous = list() print("...indexing inputs") # for each image in sampled x_train # on Intel processors, mp.cpu_count() returns number of threads number_of_processes = mp.cpu_count() // 2 pool = mp.Pool(number_of_processes) results = [] for n in range(0, x_train.shape[0], skip): res = pool.apply_async( parallel_index_table, args=((p[0][n:n+skip], p[1][n:n+skip]), n, skip, idx_height, idx_width, i_dict, o_dict, kernel, strides, padding, generate_pla)) results.append(res) pool.close() pool.join() all_pools = [res.get(timeout=1) for res in results] table_ins = sum([ap[0] for ap in all_pools], []) table_ous = sum([ap[1] for ap in all_pools], []) # input and output size ni = len(table_ins[0]) no = len(table_ous[0]) print("... generating tables {} outputs, {} entries".format( o_size, len(table_ins))) # this step should be very fast files = [] if OG_IS_SYMBOLIC: if output_group > 1: n_bits_og = output_group else: n_bits_og = 1 else: if output_group == 2: n_bits_og = 1 else: n_bits_og = int(np.ceil(np.log2(output_group))) # sometimes linux get very grumpy with too many files opened. # let's limit to 20. number_of_processes = min(20, mp.cpu_count() // 2) pool = mp.Pool(number_of_processes) for o in range(0, o_size, output_group): filename = "{}{}.{}/{}_{}.raw.{}".format( prefix, i_name, suffix, i_name, o, suffix) files.append(filename) header = [] if generate_pla: header.append(".i {}\n".format(ni + n_bits_og)) header.append(".o {}\n".format(no // o_size)) header.append(".type fr\n") if OG_IS_SYMBOLIC and output_group > 1: header.append(".mv {} {} {} {}\n".format( 3, ni, n_bits_og, no // o_size)) # let's generate some labels header.append(".ob " + " ".join([ "o_" + str(o) + "_" + str(o_bits - 1 - v) for v in range(o_bits)]) + "\n") i_names = [] # name is i____bit assert ni == (i_size * kernel[0] * kernel[1] * i_bits) for channel in range(i_size): for row in range(kernel[0]): for col in range(kernel[1]): for bit in range(i_bits): i_names.append("i_{}_{}_{}_{}".format( channel, row, col, (i_bits - 1 - bit))) # if we are grouping multiple channels, these will be the inputs for c in range(n_bits_og): i_names.append("og_{}".format(n_bits_og - 1 - c)) header.append(".ilb " + " ".join(i_names) + "\n") pool.apply_async( parallel_compress_output_table, args=((filename, header, table_ins, table_ous, output_group, generate_pla, n_bits_og, o, o_bits))) pool.close() pool.join() return files ================================================ FILE: experimental/lo/dense.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements dense (?, features) fancing input layer optimization.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import multiprocessing as mp import os import shutil from .compress import Compressor import numpy as np import six from tensorflow.keras.models import Model DEBUG = int(os.getenv("DEBUG", 0)) OG_IS_SYMBOLIC = 0 def parallel_index_table( p, ni, size, i_dict, o_dict, generate_pla): """Processes the table in parallel and use espresso to optimize it.""" print("... indexing table from {} to {} ({} => {})".format( ni, ni+size, p[0].shape, p[1].shape)) table_ins = [] table_ous = [] table_set = Compressor(hash_only_input=True) if DEBUG: table_set_line = {} for n in range(size): i_values = p[0][n].flatten() o_values = p[1][n].flatten() # if we generate a pla entry, we care about a list of # bits. Otherwise, we care about a list of floating point # values. table_i = "".join([i_dict[v] for v in i_values]) table_o = "".join([o_dict[v] for v in o_values]) if generate_pla: table_s = "".join([str(v) for v in table_i]) bit_str = table_s else: table_s = ",".join([str(v) for v in table_i]) table_i = table_s bit_str = "".join(str(i_dict[v]) for v in i_values) is_table_zero = bit_str != "0"*len(bit_str) if table_set.has_entry(table_s) and not is_table_zero: # if table is already stored, we do not store it again. # from time to time, we may want to check if we have found # diverging output values. if DEBUG: (table_o_old, old_n) = table_set_line[table_s] if table_o != table_o_old: print("contradicting outputs n={} old_n={} out_p={} out={}".format( n, old_n, table_o_old, table_o)) print(" I:", table_s) print(" I:", i_values) print("<<<", table_o_old) print(">>>", table_o) return (None, None) continue # these are unique table entries table_ins.append(table_i) table_ous.append(table_o) # we store this information in order to be able to debug # and discard information. table_set.add_entry(table_s) if DEBUG: table_set_line[table_s] = (table_o, n) print("... indexing table from {} to {} completed".format(ni, ni+size)) return (table_ins, table_ous) def parallel_compress_output_table( filename, header, table_ins, table_ous, output_group, generate_pla, n_bits_og, o, o_bits): """Processes in parallel compression of table and writes it to a disk.""" f = open(filename, "w") f.write("".join(header)) c = Compressor() for n in range(len(table_ins)): for og in range(output_group): if output_group > 1: if generate_pla: if OG_IS_SYMBOLIC: og_l = ["0"] * n_bits_og og_l[n_bits_og - 1 - og] = "1" og_b = "".join(og_l) table_i_suffix = " " + og_b else: og_b = bin(og)[2:] table_i_suffix = " " + "0"*(n_bits_og - len(og_b)) + og_b else: table_i_suffix = "," + str(og) else: table_i_suffix = "" table_i = table_ins[n] + table_i_suffix table_o = table_ous[n][(o+og)*o_bits:(o+og+1)*o_bits] if generate_pla: c.add_entry(table_i + " " + table_o) else: c.add_entry(table_i + "," + str(table_o[0])) for line in c(): f.write("{}\n".format(line[0])) if generate_pla: f.write(".e\n") f.close() def optimize_dense_logic( model, i_name, o_name, x_train, i_dict, o_dict, output_group=1, samples=2000, generate_pla=True, prefix=""): """Generates table for logic synthesis for dense or flattened layer. Generates table in either espresso format or csv format to be optimized for logic synthesis. Arguments: model: Keras model i_name: name of convolutional layer (input to this layer must be quantized). o_name: name of quantized output layer. x_train: training set to be used to dump table. i_dict: dictionary of floating point values to encoding for inputs. o_dict: dictionary of floating point values to encoding for outputs. output_group: by default, we compute one PE per channel output. The user can override that by specifying how many output channels should be bundled into the same PE. samples: how many images from x_train should be sampled when generating the tables. generate_pla: if true, we generate table in pla format. Otherwise, we generate a csv file. prefix: prefix name to create a directory. Returns: list of files generated. """ i_layer = model.get_layer(i_name) o_layer = model.get_layer(o_name) # resample inputs skip = min(2000, samples) indexes = np.array(range(x_train.shape[0])) np.random.shuffle(indexes) x_train = x_train[indexes[:samples]] outputs = [] x = i_layer.input y = o_layer.output if not isinstance(x, list): x = [x] outputs = x + [y] mo = Model(inputs=model.inputs, outputs=outputs) p = mo.predict(x_train) # in csv mode, each entry has "1" value, for PLA, # we encode the floating point into multiple bits. if not generate_pla: i_bits = 1 # i_dict = {v:v for v in i_dict.keys()} else: i_bits = len(six.next(six.itervalues(i_dict))) if not generate_pla: o_bits = 1 # o_dict = {v:v for v in o_dict.keys()} else: o_bits = len(six.next(six.itervalues(o_dict))) print("inputs:") for i in range(len(x)): print(i, np.min(p[i]), np.max(p[i])) print("outputs:") print(0, np.min(p[-1]), np.max(p[-1])) o_size = y.shape[-1] i_size = p[0].shape[-1] if generate_pla: suffix = "pla" else: suffix = "csv" prefix = prefix + "/" if prefix else "" # lets try to remove the directory and create a new one try: shutil.rmtree(prefix + i_layer.name + "." + suffix) except OSError: pass try: os.makedirs(prefix + i_layer.name + "." + suffix) except OSError: pass print("...indexing inputs") # for each image in sampled x_train # on Intel processors, mp.cpu_count() returns number of threads number_of_processes = mp.cpu_count() // 2 pool = mp.Pool(number_of_processes) results = [] for n in range(0, x_train.shape[0], skip): res = pool.apply_async( parallel_index_table, args=((p[0][n:n+skip], p[1][n:n+skip]), n, skip, i_dict, o_dict, generate_pla)) results.append(res) pool.close() pool.join() all_pools = [res.get(timeout=1) for res in results] table_ins = sum([ap[0] for ap in all_pools], []) table_ous = sum([ap[1] for ap in all_pools], []) # input and output size ni = len(table_ins[0]) no = len(table_ous[0]) print("... generating tables {} outputs, {} entries".format( o_size, len(table_ins))) # this step should be very fast files = [] if OG_IS_SYMBOLIC: if output_group > 1: n_bits_og = output_group else: n_bits_og = 1 else: if output_group == 2: n_bits_og = 1 else: n_bits_og = int(np.ceil(np.log2(output_group))) # sometimes linux get very grumpy with too many files opened. # let's limit to 20. number_of_processes = min(20, mp.cpu_count() // 2) pool = mp.Pool(number_of_processes) for o in range(0, o_size, output_group): filename = "{}{}.{}/{}_{}.raw.{}".format( prefix, i_name, suffix, i_name, o, suffix) files.append(filename) header = [] if generate_pla: header.append(".i {}\n".format(ni + n_bits_og)) header.append(".o {}\n".format(no // o_size)) header.append(".type fr\n") if OG_IS_SYMBOLIC and output_group > 1: header.append(".mv {} {} {} {}\n".format( 3, ni, n_bits_og, no // o_size)) # let's generate some labels header.append(".ob " + " ".join([ "o_" + str(o) + "_" + str(o_bits - 1 - v) for v in range(o_bits)]) + "\n") i_names = [] # name is i__bit assert ni == (i_size * i_bits) for feature in range(i_size): for bit in range(i_bits): i_names.append("i_{}_{}".format( feature, (i_bits - 1 - bit))) # if we are grouping multiple channels, these will be the inputs for c in range(n_bits_og): i_names.append("og_{}".format(n_bits_og - 1 - c)) header.append(".ilb " + " ".join(i_names) + "\n") pool.apply_async( parallel_compress_output_table, args=((filename, header, table_ins, table_ous, output_group, generate_pla, n_bits_og, o, o_bits))) pool.close() pool.join() return files ================================================ FILE: experimental/lo/generate_rf_code.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Generates expressions for random trees.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import numpy as np DEBUG = int(os.environ.get("DEBUG", 0)) PRINT_DEBUG = int(os.environ.get("PRINT_DEBUG", 0)) def gen_random_tree_regressor( tree, code, bits, o_bits, o_decimal_digits, o_is_neg, bdd, offset, is_cc=True): """Generates HLS friendly C++ code for random tree regressor. Generates HLS friendly C++ code for Catapult. Arguments: tree: decision tree regressor from SkLearn. code: list of code lines to be append to. bits: list containing number of bits for each of the inputs. o_bits: number of bits for output. o_decimal_digits: number of decimal digits (right of the decimal point of o_bits for approximation of regressor in RandomTreeRegressor. o_is_neg: True or 1 if output can be negative. bdd: we actually try to cache entries (i,v,n1,n0) entries so that if they appear again, we reuse previously computed nodes. offset: each variable created in this function call is incremented by offset. is_cc: if True, generates C++, else Verilog. Returns: Tuple containing last variable name and current number of variables. """ # extract information from tree n_nodes = tree.node_count children_left = tree.children_left children_right = tree.children_right feature = tree.feature threshold = tree.threshold values = np.copy(tree.value) o_suffix = "" if DEBUG: o_type = "float" elif is_cc: o_type = "ac_fixed<{},{},{}>".format( o_bits + o_decimal_digits, o_bits + o_is_neg, o_is_neg) else: o_sign = " signed" if o_is_neg else "" if o_bits + o_decimal_digits > 1: o_suffix = "[{}:0]".format(o_bits + o_decimal_digits - 1) o_type = "wire" + o_sign + " " + o_suffix def round_digits(x, decimal_digits): """Rounds to decimal_digits to the right of the decimal point.""" if DEBUG: return x factor = (1 << decimal_digits) * 1.0 x = x * factor return np.round(x) / factor is_leaves = np.zeros(shape=n_nodes, dtype=bool) stack = [(0, -1)] while stack: node_id, parent_depth = stack.pop() if children_left[node_id] != children_right[node_id]: stack.append((children_left[node_id], parent_depth+1)) stack.append((children_right[node_id], parent_depth+1)) else: is_leaves[node_id] = True values[node_id] = round_digits(tree.value[node_id], o_decimal_digits) if ( values[node_id].flatten()[0] != tree.value[node_id].flatten()[0] and DEBUG ): print(node_id, values[node_id].flatten()[0], tree.value[node_id].flatten()[0]) v_name = {} n_vars = offset bdd = {} def round_value_to_int(x): v = hex(int(np.round(x * (1 << (o_decimal_digits))))) if is_cc: if DEBUG: return str(x) else: return x #v + " /* {} */".format(x) else: return ( str(o_bits + o_decimal_digits) + "'h" + v[2:] + " /* {} */".format(x) ) if is_leaves[0]: v_name[0] = round_value_to_int(values[0].flatten()[0]) code.append(" {} n_{} = {};".format(o_type, n_vars, v_name[0])) last_var = "n_{}".format(n_vars) n_vars += 1 else: for i in range(n_nodes-1, -1, -1): if is_leaves[i]: continue if v_name.get(children_left[i], None) is not None: n1 = v_name[children_left[i]] elif is_leaves[children_left[i]]: n1 = round_value_to_int(values[children_left[i]].flatten()[0]) v_name[children_left[i]] = n1 else: n1 = "n_" + str(n_vars) n_vars += 1 v_name[children_left[i]] = n1 raise ValueError((children_left[i], n1, is_leaves[children_left[i]])) if v_name.get(children_right[i], None) is not None: n0 = v_name[children_right[i]] elif is_leaves[children_right[i]]: n0 = round_value_to_int(values[children_right[i]].flatten()[0]) v_name[children_right[i]] = n0 else: n0 = "n_" + str(n_vars) n_vars += 1 v_name[children_right[i]] = n0 raise ValueError((children_right[i], n0, is_leaves[children_right[i]])) if v_name.get(i, None) is not None: n = v_name[i] last_var = v_name[i] elif bdd.get((feature[i], threshold[i], n1, n0), None) is not None: n = bdd[(feature[i], threshold[i], n1, n0)] v_name[i] = n last_var = n elif n1 == n0: # store intermediate results so that we can build a dag, not a tree bdd[(feature[i], threshold[i], n1, n0)] = n1 v_name[i] = n1 last_var = n1 else: n = "n_" + str(n_vars) n_vars += 1 v_name[i] = n # store intermediate results so that we can build a dag, not a tree bdd[(feature[i], threshold[i], n1, n0)] = n t = int(threshold[i]) if bits[feature[i]] == 1: if t == 0: n1, n0 = n0, n1 code.append( " {} {} = (i_{}) ? {} : {}; // x_{} {}".format( o_type, v_name[i], feature[i], n1, n0, i, threshold[i])) else: code.append( " {} {} = (i_{} <= {}) ? {} : {}; // x_{} {}".format( o_type, v_name[i], feature[i], t, n1, n0, i, threshold[i])) last_var = v_name[i] return (last_var, n_vars) def entry_to_hex(entry, max_value, size, is_cc): """Converts class instance to hexa number.""" e_vector = [np.power(max_value+1, i) for i in range(len(entry)-1, -1, -1)] entry = np.array(entry) v = hex(np.sum(entry * e_vector)) if is_cc: return v else: return str(size) + "'h" + v[2:] + " /* {} */".format(entry) def gen_random_tree_classifier( tree, code, bits, bdd, max_value, values_rom, offset, is_cc=True): """Generates C++ or Verilog friendly code for random tree classifier. Generates HLS Catapult friendly code or RTL in Verilog for random tree classifier from SkLearn. Arguments: tree: RandomTreeClassifier from sklearn. code: list of strings containing code generated. bits: list containing number of bits for each of the inputs. bdd: we actually try to cache entries (i,v,n1,n0) entries so that if they appear again, we reuse previously computed nodes. max_value: random tree classifiers returns vector of classes with the number of instances found in the terminal leaf node. This variable specifies a clipping factor for each class type so that we have a bounded problem to synthesize. values_rom: to save space in classifier, we store class values in values_rom. offset: each variable created in this function call is incremented by offset. is_cc: if True, generates C++ code; otherwise, Verilog. Returns: Tuple containing last variable name and current number of variables. """ # extract information from tree n_nodes = tree.node_count children_left = tree.children_left children_right = tree.children_right feature = tree.feature threshold = tree.threshold values = {} is_leaves = np.zeros(shape=n_nodes, dtype=bool) stack = [(0, -1)] rom_l = [] use_rom = max_value >= 7 n_classes = len(tree.value[0].flatten()) max_bits = int(np.ceil(np.log2(max_value + 1))) while stack: node_id, parent_depth = stack.pop() if children_left[node_id] != children_right[node_id]: stack.append((children_left[node_id], parent_depth+1)) stack.append((children_right[node_id], parent_depth+1)) else: # is leaf node is_leaves[node_id] = True # get tree node output p_input_tuple = tree.value[node_id].flatten().astype(np.int32) max_input_value = np.max(p_input_tuple) min_input_value = np.min(p_input_tuple) # if max_value == 1, only keep top ones if max_value == 1: input_tuple = (p_input_tuple == max_input_value).astype(np.int32) tree.value[node_id] = (tree.value[node_id] == max_input_value).astype( tree.value[node_id].dtype) else: # if max_value <= 3: # SKLearn classifier computes probability for each entry instead of # suming them all. We should do the same. max_input_value = np.sum(p_input_tuple) min_input_value = 0 # Just update tree.value to number so that we can compare accuracy of # quantization later. tree.value[node_id] = np.round( max_value * (tree.value[node_id] - min_input_value) / (max_input_value - min_input_value)) input_tuple = tree.value[node_id].flatten() input_tuple = tuple(list(input_tuple.astype(np.int32))) # stores values in rom - we will use rom to store values if use_rom is # true. if values_rom.get(input_tuple, None) is None: values_rom[input_tuple] = len(values_rom) rom_l.append(input_tuple) if DEBUG: print(values_rom[input_tuple], input_tuple) if use_rom: values[node_id] = values_rom[input_tuple] else: values[node_id] = entry_to_hex( input_tuple, max_value, max_bits * n_classes, is_cc) # t_bits: entry type # l_bits: table line type if use_rom: t_bits = int(np.ceil(np.log2(len(values_rom)))) l_bits = max_bits * n_classes else: t_bits = max_bits * n_classes # we only store the index here, as we read from a rom if is_cc: if DEBUG: t_type = "int" else: t_type = "ac_int<{},false>".format(t_bits) else: t_type = "wire [{}:0]".format(t_bits-1) v_name = {} n_vars = offset bdd = {} if is_leaves[0]: v_name[0] = t_type + "(" + str(values[0]) + ")" code.append(" {} n_{} = {};".format( t_type, n_vars, values[0])) last_var = "n_{}".format(n_vars) n_vars += 1 else: for i in range(n_nodes-1, -1, -1): if is_leaves[i]: continue if v_name.get(children_left[i], None) is not None: n1 = v_name[children_left[i]] elif is_leaves[children_left[i]]: if is_cc: n1 = t_type + "(" + str(values[children_left[i]]) + ")" else: n1 = str(values[children_left[i]]) v_name[children_left[i]] = n1 else: n1 = "n_" + str(n_vars) n_vars += 1 v_name[children_left[i]] = n1 raise ValueError((children_left[i], n1, is_leaves[children_left[i]])) if v_name.get(children_right[i], None) is not None: n0 = v_name[children_right[i]] elif is_leaves[children_right[i]]: if is_cc: n0 = t_type + "(" + str(values[children_right[i]]) + ")" else: n0 = str(values[children_right[i]]) v_name[children_right[i]] = n0 else: n0 = "n_" + str(n_vars) n_vars += 1 v_name[children_right[i]] = n0 raise ValueError((children_right[i], n0, is_leaves[children_right[i]])) if v_name.get(i, None) is not None: n = v_name[i] last_var = v_name[i] elif bdd.get((feature[i], threshold[i], n1, n0), None) is not None: n = bdd[(feature[i], threshold[i], n1, n0)] v_name[i] = n last_var = n elif n1 == n0: # store intermediate results so that we can build a dag, not a tree bdd[(feature[i], threshold[i], n1, n0)] = n1 v_name[i] = n1 last_var = n1 else: n = "n_" + str(n_vars) n_vars += 1 v_name[i] = n # store intermediate results so that we can build a dag, not a tree bdd[(feature[i], threshold[i], n1, n0)] = n t = int(threshold[i]) if bits[feature[i]] == 1: if t == 0: n1, n0 = n0, n1 code.append( " {} {} = (i_{}) ? {} : {}; // x_{} {}".format( t_type, v_name[i], feature[i], n1, n0, i, threshold[i])) else: code.append( " {} {} = (i_{} <= {}) ? {} : {}; // x_{} {}".format( t_type, v_name[i], feature[i], t, n1, n0, i, threshold[i])) last_var = v_name[i] if use_rom: if is_cc: if DEBUG: l_type = "int" else: l_type = "ac_int<{},false>".format(l_bits) code.append(" {} {}_rom[{}]".format(l_type, last_var, len(values_rom)) + " {") for i in range(len(values_rom)): code_s = " " + entry_to_hex(rom_l[i], max_value, l_bits, is_cc) if i < len(values_rom)-1: code_s = code_s + "," code.append(code_s) code.append(" };") else: l_type = "wire [{}:0]".format(l_bits - 1) code.append(" function [{}:0] {}_rom;".format(l_bits-1, last_var)) code.append(" input [{}:0] address;".format(t_bits-1)) code.append(" begin") code.append(" case (address)") for i in range(len(values_rom)): code.append(" {}'d{}: {}_rom = {};".format( l_bits, i, last_var, entry_to_hex(rom_l[i], max_value, l_bits, is_cc))) code.append(" default: {}_rom = 0;".format(last_var)) code.append(" endcase") code.append(" end") code.append(" endfunction") code.append(" {} v_{} = {}_rom[{}];".format( l_type, last_var, last_var, last_var)) last_var = "v_" + last_var return last_var, n_vars def gen_random_forest( rf, name, bits, is_neg, o_bits, o_is_neg, is_regressor=True, is_top_level=False, is_cc=True): """Generates HLS based C++ or SystemVerilog code for random forest.""" # TODO(nunescoelho): need to take care of multiple outputs for classifier. # we can get better result if we do not look at the winning classifier, # but sum how many of them appear in each classifier for leaf nodes. bdd = {} values_rom = {} offset = 0 code = [] max_value = (1 << int(os.environ.get("MAX_BITS",1))) - 1 decimal_digits = int(os.environ.get("MAX_BITS", 5)) assert max_value > 0 o_list = [] for i in range(len(rf.estimators_)): tree = rf.estimators_[i].tree_ code.append(" //----- TREE {}".format(i)) if is_regressor: last_var, offset = gen_random_tree_regressor( tree, code, bits, o_bits, decimal_digits, o_is_neg, bdd, offset, is_cc) else: values_rom = {} last_var, offset = gen_random_tree_classifier( tree, code, bits, bdd, max_value, values_rom, offset, is_cc) o_list.append(last_var) if is_cc: header = [ "#include ", "#include ", "#include ", "using namespace std;", "//#define _PRINT_DEBUG_", "#define PB(n) cout << #n << \":\" << n << endl;", "#define PS(n) \\", " cout << #n << \":\" << n.to_double() << \" \"; \\", " for(int i=n.width-1; i>=0; i--) cout << n[i]; cout << endl;" ] if DEBUG: header = header + [ "static inline float round_even(float x) {", " int x_int = truncf(x);", " float x_dec = x - x_int;", " if ((x_dec == 0.5) && (x_int % 2 == 0)) {", " return truncf(x);", " } else {", " return truncf(x + 0.5);" " }", "}" ] if is_top_level: header.append("#pragma hls_design top") header.append("void {}(int in[{}], int &out)".format( name, np.sum(bits), o_bits) + " {") else: n_bits = int(np.ceil(np.log2(len(o_list)))) header = header + [ "static inline ac_int<{},{}> round_even(ac_fixed<{},{},{}> x)".format( o_bits, o_is_neg, n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg, o_is_neg ) + " {", " bool x_int_is_even = x[{}] == 0;".format(decimal_digits + n_bits), " bool x_frac_is_0_5 = x[{}] && (x.slc<{}>(0) == 0);".format( n_bits + decimal_digits-1, n_bits + decimal_digits-1), " if (x_frac_is_0_5 && x_int_is_even) {", " return x.slc<{}>({});".format(o_bits, n_bits + decimal_digits), " } else {", " ac_int<{},{}> r = x.slc<{}>({}) + 1;".format( o_bits + 1, o_is_neg, o_bits + 1, n_bits + decimal_digits - 1), " return r.slc<{}>(1);".format(o_bits + 1), #" return (x + ac_fixed<{},{},{}>({})).slc<{}>({});".format( # n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg, # o_is_neg, 1<<(n_bits+decimal_digits-1), # o_bits, n_bits + decimal_digits), # #o_is_neg, len(o_list)/2, o_bits, n_bits + decimal_digits), " }", "}" ] if is_top_level: header.append("#pragma hls_design top") header.append("void {}(ac_int<{},0> in, ac_int<{},{}> &out)".format( name, np.sum(bits), o_bits, o_is_neg) + " {") else: n_bits = int(np.ceil(np.log2(len(o_list)))) i_decl = " input [{}:0] in;".format(np.sum(bits)-1) o_sign = "signed " if o_is_neg else "" o_decl = " output " + o_sign + "[{}:0] out;".format(o_bits-1) header = [ "module " + name + "(in, out);", i_decl, o_decl, "", " function {}[{}:0] round_even;".format(o_sign, o_bits), " input {}[{}:0] x;".format(o_sign, n_bits + o_bits + decimal_digits - 1), " reg x_int_is_even;", " reg x_frac_is_0_5;", " reg {}[{}:0] round_sum;".format(o_sign, o_bits + 1), " begin", " x_int_is_even = x[{}] == 0;".format(decimal_digits + n_bits), " x_frac_is_0_5 = x[{}] && (x[{}:0] == 0);".format( n_bits + decimal_digits-1, n_bits + decimal_digits - 2), " if (x_frac_is_0_5 && x_int_is_even)", " round_even = x[{}:{}];".format( n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits), " else", " begin", " round_sum = x[{}:{}] + 1;".format( n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits - 1), " round_even = round_sum[{}:1];".format(o_bits + 1), " end", #" round_even = (x + {})[{}:{}];".format( # #(1 << (n_bits + decimal_digits - 1)), # n_bits + decimal_digits + o_bits - 1, n_bits + decimal_digits), " end", " endfunction" ] all_bits = np.sum(bits) sum_i = 0 for i in range(bits.shape[0]): if is_cc: if bits[i] > 1: if DEBUG: header.append(" int i_{} = in[{}];".format(i, i)) else: header.append(" ac_int<{},{}> i_{} = in.slc<{}>({});".format( bits[i], is_neg[i], i, bits[i], sum_i)) else: header.append(" bool i_{} = in[{}];".format(i, sum_i)) else: if bits[i] == 1: header.append(" wire i_{} = in[{}];".format(i, all_bits - sum_i - 1)) else: header.append(" wire i_{}[{}:0] = in[{}:{}];".format( i, bits[i], sum_i + bits[i] - 1, all_bits - sum_i - 1)) sum_i += bits[i] footer = [] if is_regressor: n_bits = int(np.ceil(np.log2(len(o_list)))) assert 1 << n_bits == len(o_list) if is_cc: if DEBUG: tmp_type = "float" else: tmp_type = "ac_fixed<{},{},{}>".format( n_bits + o_bits + decimal_digits, n_bits + o_bits + o_is_neg, o_is_neg) avg_o = " {} o_tmp = {};".format(tmp_type, " + ".join(o_list)) # rnd_o = " o_tmp += {}({});".format(tmp_type, len(o_list)/2) if DEBUG: out = " out = round_even(o_tmp / {});".format(len(o_list)) else: out = " out = round_even(o_tmp);" footer.append(" #ifdef _PRINT_DEBUG_") for o_name in o_list: footer.append(" PS({});".format(o_name)) footer.append(" #endif") closing = "}" else: tmp_sign = "signed " if o_is_neg else "" avg_o = " wire " + tmp_sign + "[{}:0] o_tmp = {};".format( n_bits + o_bits + decimal_digits - 1, " + ".join(o_list)) for n in o_list: footer.append(" // always @({}) $display(\"{} = %f (%b)\", {} / 32.0, {});".format(n,n,n,n)) footer.append(" // always @(o_tmp) $display(\"o_tmp = %b\", o_tmp);") out = " assign out = round_even(o_tmp);" closing = "endmodule" footer = footer + [avg_o, out, closing] else: assert not o_is_neg footer = [] o_suffix = "" if DEBUG: o_type = "int" elif is_cc: o_type = "ac_int<{},{}>".format(o_bits, o_is_neg) else: o_sign = " signed" if o_is_neg else "" o_suffix = "[{}:0]".format(o_bits) o_type = "wire" + o_sign + " " + o_suffix if is_cc: n_classes = 1 << o_bits max_bits = int(np.ceil(np.log2(max_value + 1))) log2_o_list = int(np.ceil(np.log2(len(o_list)))) if DEBUG: log2_o_type = "int" else: log2_o_type = "ac_int<{},false>".format(log2_o_list + max_bits) sum_v = ( " {} sum[{}] = ".format( log2_o_type, 1 << o_bits) + "{" + ",".join("0" * (1 << o_bits)) + "};" ) footer = [sum_v] for o_name in o_list: for i in range(n_classes): if DEBUG: footer.append(" sum[{}] += ({} >> {}) & {};".format( i, o_name, (n_classes - i) * max_bits - max_bits, hex((1 << max_bits) - 1))) else: footer.append(" sum[{}] += {}.slc<{}>({});".format( i, o_name, max_bits, (n_classes - i) * max_bits - max_bits)) debug_print = [] for i in range(n_classes): debug_print.append("{}.slc<{}>({}).to_string(AC_DEC)".format( o_name, max_bits, (n_classes - i) * max_bits - max_bits)) footer_s = ( " cout << \"{} \" <<".format(o_name) + " << \" \" << ".join(debug_print) + " << endl;" ) footer.append(" #ifdef _PRINT_DEBUG_") footer.append(footer_s) footer.append(" #endif") footer.append(" {} max_tmp = sum[0];".format(log2_o_type)) footer.append(" {} max_id = 0;".format(o_type)) footer.append(" for(int i=1; i<{}; i++)".format(1 << o_bits)) footer.append( " if (sum[i] >= max_tmp) { max_tmp = sum[i]; max_id = i; }") out = " out = max_id;" footer.append(out) footer += ["}"] else: n_classes = 1 << o_bits max_bits = int(np.ceil(np.log2(max_value + 1))) log2_o_list = int(np.ceil(np.log2(len(o_list)))) log2_o_type = "wire [{}:0]".format(log2_o_list + max_bits) footer = [] for i in range(n_classes): code_s = " {} sum_{} = ".format(log2_o_type, i) code_term = [] for o_name in o_list: code_term.append("{}[{}:{}]".format( o_name, (n_classes - i) * max_bits, (n_classes - i) * max_bits - max_bits)) code_s += " + ".join(code_term) + ";" footer.append(code_s) footer.append(" // always @(sum_{}) $display(\"sum_{} = %d\", sum_{});".format( i, i, i)) footer.append(" reg [{}:0] max_tmp;".format( log2_o_list + max_bits - 1)) footer.append(" reg [{}:0] max_id;".format(o_bits-1)) footer.append(" integer i;") footer.append(" always @(" + " or ".join( ["sum_" + str(i) for i in range(n_classes)]) + ")") footer.append(" begin") footer.append(" max_tmp = sum_0; max_id = 0;") for i in range(1, n_classes): footer.append( " if (sum_{} >= max_tmp) begin max_tmp = sum_{}; max_id = {}; end".format( i, i, i)) footer.append(" end") footer.append(" assign out = max_id;") footer.append("endmodule") return header + code + footer def gen_testbench_sv(rf, name, bits, is_neg, o_bits, o_is_neg, x, y, p, code): code.append("module tb;") x_0, x_1 = x.shape x_0_log2 = int(np.ceil(np.log2(x_0))) code.append("reg [{}:0] x_rom[{}:0];".format(x_1-1, x_0-1)) code.append("initial $readmemb(\"x.rom\", x_rom, 0, {});".format(x_0-1)) with open("x.rom", "w") as f: for i in range(len(x)): f.write("".join([str(int(v)) for v in x[i]]) + "\n") o_sign = "signed " if o_is_neg else "" o_type = o_sign + "[{}:0]".format(o_bits - 1) code.append("reg {} y_rom[{}:0];".format(o_type,x_0-1)) code.append("reg {} p_rom[{}:0];".format(o_type,x_0-1)) with open("y.rom","w") as f: for i in range(len(y)): f.write(hex(int(y[i]))+ "\n") with open("p.rom","w") as f: for i in range(len(y)): f.write(hex(int(p[i]))+ "\n") code.append("initial $readmemh(\"y.rom\", y_rom, 0, {});".format(x_0-1)) code.append("initial $readmemh(\"p.rom\", p_rom, 0, {});".format(x_0-1)) code.append("integer i;") code.append("integer cnt;") code.append("reg [{}:0] in;".format(x_1-1)) code.append("wire {} out;".format(o_type)) code.append("{} {}(in, out);".format(name, name)) code.append("initial") code.append("begin") code.append(" cnt = 0;") code.append(" in = x_rom[i];") code.append(" for (i=0; i<{}; i=i+1)".format(x_0)) code.append(" begin") code.append(" in = x_rom[i];") code.append(" #1000;") code.append(" if (p_rom[i] != out && y_rom[i] != out)") code.append(" begin") code.append(" $display(\"%d: %b y=%d p=%d -> %d\", i, x_rom[i], y_rom[i], p_rom[i], out);") code.append(" end") code.append(" else") code.append(" begin") code.append(" cnt = cnt + 1;") code.append(" end") code.append(" end") code.append(" $display(\"acc = %f\", 100.0 * cnt / {});".format(x_0)) code.append("end") code.append("endmodule") def gen_testbench_cc(rf, name, bits, is_neg, o_bits, o_is_neg, x, y, p, code): code.append("int x[{}][{}] = ".format(*x.shape) + "{") for i in range(len(x)): code_s = " {" + ",".join([str(int(v)) for v in x[i]]) + "}" if i < len(x) - 1: code_s = code_s + "," code.append(code_s) code.append("};") code_s = ( "int y[{}] = ".format(y.shape[0]) + "{" + ",".join([str(int(v)) for v in y]) + "};" ) code.append(code_s) code_s = ( "int p[{}] = ".format(p.shape[0]) + "{" + ",".join([str(int(v)) for v in p]) + "};" ) code.append(code_s) code.append("int main()") code.append("{") code.append(" double acc = 0.0;") if DEBUG: code.append(" int in[{}];".format(x.shape[1])) code.append(" int out;") else: code.append(" ac_int<{},0> in;".format(x.shape[1])) code.append(" ac_int<{},{}> out;".format(o_bits, o_is_neg)) code.append(" for (int i=0; i<{}; i++)".format(x.shape[0]) + "{") code.append(" for (int j=0; j<{}; j++) in[j] = x[i][j];".format( x.shape[1])) code.append(" {}(in, out);".format(name)) code.append(" if (p[i] != out && y[i] != out) {") code.append(" cout << i << \": \";") code.append(" for (int j=0; j<{}; j++) cout << in[j];".format( x.shape[1])) if DEBUG: code.append(" cout << \" y=\" << y[i] << \" p=\" << p[i] << \" \" << out << endl;") code.append(" }") code.append(" acc += (y[i] == out);") else: code.append(" cout << \" y=\" << y[i] << \" p=\" << p[i] << \" \" << out.to_int() << endl;") code.append(" #ifdef _PRINT_DEBUG_") code.append(" exit(1);") code.append(" #endif") code.append(" }") code.append(" acc += (y[i] == out.to_int());") code.append(" }") code.append(" cout << \"acc = \" << 100.0 * acc / {} << endl;".format( x.shape[0])) code.append("}") ================================================ FILE: experimental/lo/optimizer.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements random forest or logic otimizer function.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import multiprocessing as mp import os import pickle import random import shutil import subprocess import sys import time import warnings import numpy as np import six from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestRegressor from .compress import Compressor from .generate_rf_code import gen_random_forest from .table import load def file_compress(fin, fout): """Compresses table using hash set.""" c = Compressor() n_lines = 0 for line in open(fin): n_lines += 1 line = line.strip() c.add_entry(line) f = open(fout, "w") n_compressed = 0 for line in c(): n_compressed += 1 f.write(line + "\n") f.close() print("... random forrest for {} reduced from {} to {} entries".format( os.path.basename(fin), n_lines, n_compressed)) def mp_rf_optimizer_func(fn_tuple): """Executes in parallel creation of random forrest creation.""" fn, flags, file_suffix = fn_tuple n_trees = flags["n_trees"] is_regressor = flags["is_regressor"] sample_size = flags["sample_size"] n_features = flags["n_features"] max_depth = flags["max_depth"] if not file_suffix: file_suffix = "none" path_split = fn.split("/") path = "/".join(path_split[:-1]) + "/" fn_split = path_split[-1].split(".") # o_file = path + ".".join(fn_split[0:-2] + [fn_split[-1]]) cv_file = path + ".".join(fn_split[0:-2] + [file_suffix]) rfb_file = path + ".".join(fn_split[0:-2] + ["rb", "bin"]) # let's compress the table first to make the job easier for random forest. # compression can usually achieve a ratio of 50x or more. # compress(fn, o_file) train = load(fn) n_features = "auto" if not n_features else float(n_features) # min_size = 1 if max_depth: max_depth = int(max_depth) print("... creating random forrest for " + os.path.basename(fn) + " with " + str(sample_size) + " samples") if is_regressor: rf = RandomForestRegressor( n_estimators=n_trees, max_depth=max_depth, # min_samples_split=2, # min_samples_leaf=min_size, max_features=n_features, # max_leaf_nodes=100, # oob_score=True, # warm_start=True, bootstrap=True, random_state=42, n_jobs=1) else: rf = RandomForestClassifier( n_estimators=n_trees, max_depth=max_depth, # min_samples_split=2, # min_samples_leaf=min_size, max_features=n_features, # max_leaf_nodes=100, # oob_score=True, # warm_start=True, bootstrap=True, random_state=42, n_jobs=1) if sample_size and train.shape[0] >= 10000: sample_size = int(sample_size) np.random.seed(42) idx = np.random.choice(train.shape[0], train.shape[0], replace=False) x = train[idx[sample_size:], 0:-1] y = train[idx[sample_size:], -1] x_test = train[idx[0:sample_size], 0:-1] y_test = train[idx[0:sample_size], -1] else: x = train[:, 0:-1] y = train[:, -1] x_test = x y_test = y estimators = [] with warnings.catch_warnings(): warnings.simplefilter("ignore") rf.fit(x, y) func_name = fn_split[0] bits = np.ceil( np.log2( np.abs( np.amax(x, axis=0) - np.amin(x, axis=0) + 1))).astype(np.int32) is_neg = (np.amin(x, axis=0) < 0).astype(np.int8) o_bits = np.ceil( np.log2( np.abs( np.amax(y, axis=0) - np.amin(y, axis=0) + 1))).astype(np.int32) o_is_neg = (np.amin(y, axis=0) < 0).astype(np.int8) rf.bits = bits rf.is_neg = is_neg rf.o_bits = o_bits rf.o_is_neg = o_is_neg code = gen_random_forest( rf, func_name, bits, is_neg, o_bits, o_is_neg, is_regressor=is_regressor, is_top_level=False, is_cc=file_suffix == "cc") open(cv_file, "w").write("\n".join(code)) p = 1.0 * np.round(rf.predict(x_test)) dy = np.max(train[:, -1]) - np.min(train[:, -1]) error = np.sum(np.abs(y_test - p)) / (1.0 * p.shape[0] * dy) score = np.sum(y_test == p) / p.shape[0] print("y:", np.max(y_test), y_test[0:30].astype(np.int32)) print("p:", np.max(p), p[0:30].astype(np.int32)) print("... model {} with score of {:.2f}% and error of {:.2f}%".format( func_name, 100.0*score, 100.0*error)) print("... saving model in {}".format(rfb_file)) pickle.dump(rf, open(rfb_file, "wb")) return rfb_file def mp_abc_optimizer_func(fn): """Performs espresso and abc optimization on a single espresso input.""" fn_split = fn.split(".") o_file = ".".join(fn_split[0:-2] + [fn_split[-1]]) v_file = ".".join(fn_split[0:-2] + ["v"]) b_file = ".".join(fn_split[0:-2] + ["blif"]) print("...running espresso in " + fn) espresso_flags = os.environ.get("ESPRESSO_FLAGS", "-Dexpand") cmd = "espresso {} {} > {}".format(fn, espresso_flags, o_file) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) output = output.strip() if output: print(output) sys.stdout.flush() # check if network is empty for line in open(o_file): line = line.strip() if line[0:2] == ".p": terms = int(line[2:]) # empty : espresso optimized away all the logic if terms == 0: shutil.copyfile(fn, o_file) break print("...running abc in " + o_file) abc_flags = os.environ.get("ABC_FLAGS", "") abc_flags_list = abc_flags.split(";") if abc_flags else [] abc_cmds_list = ( ["read_pla " + o_file] + abc_flags_list + ["strash", "dc2", "strash", "if -K 3", "write_verilog " + v_file, "write_blif " + b_file ]) abc_cmds = ";".join(abc_cmds_list) cmd = "abc -c '" + abc_cmds + "'" output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) output = output.strip() if output: print(output) sys.stdout.flush() print("...generated " + v_file) def run_abc_optimizer(files): """Implements logic optimizer using espresso/abc.""" # intel processors sometimes return number of threads, not processors cpus = mp.cpu_count() // 2 start_time = time.time() pool = mp.Pool(cpus) pool.map(mp_abc_optimizer_func, files) pool.close() print("Optimizer ran in {} seconds.".format(time.time() - start_time)) def run_rf_optimizer(files, flags, file_suffix="cc"): """Implements random forest main optimizer.""" # intel processors sometimes return number of threads, not processors cpus = mp.cpu_count() // 2 start_time = time.time() pool = mp.Pool(cpus) pool.map(mp_rf_optimizer_func, zip( files, [flags]*len(files), [file_suffix]*len(files))) pool.close() print("Optimizer ran in {} seconds.".format(time.time() - start_time)) # generates header file # .../.../.../conv2d_0_m.csv/conv2d_0_m_0.csv # # returns conv2d_0_m for module_name module_name = files[0].split("/")[-2].split(".")[0] path_split = files[0].split("/") path = "/".join(path_split[:-1]) + "/" fn_split = path_split[-1].split(".") rfb_file = path + ".".join(fn_split[0:-2] + ["rb", "bin"]) rf = pickle.load(open(rfb_file, "rb")) f = open(path + module_name + "." + file_suffix, "w") if file_suffix == "cc": f.write("#include \n\n") modules = [] for fn in files: path_split = fn.split("/") path = "/".join(path_split[:-1]) + "/" fn_split = path_split[-1].split(".") v_file = ".".join(fn_split[0:-2] + [file_suffix]) func_name = fn_split[0] if file_suffix == "v": f.write("'include \"" + v_file + "\"\n") else: f.write("#include \"" + v_file + "\"\n") modules.append(func_name) f.write("\n\n") if file_suffix == "v": f.write("module " + module_name + "(") f.write("input [" + str(np.sum(rf.bits)-1) + ":0] in, ") o_sign = " signed " if rf.o_is_neg else "" f.write("output " + o_sign + "[" + str(len(modules)*rf.o_bits-1) + ":0] out);\n") else: f.write("void " + module_name + "(") f.write("ac_int<" + str(np.sum(rf.bits)) + ",false> in, ") f.write("ac_int<" + str(len(modules)*rf.o_bits) + "," + ("true" if rf.o_is_neg else "false") + "> &out)\n") f.write("{\n") for o in range(len(modules)): if file_suffix == "v": f.write(" wire " + ("signed " if rf.o_is_neg else "") + "[" + str(rf.bits[-1]-1) + ":0] " "o_" + str(o) + ";\n") f.write(" " + modules[o] + "(in, o_" + str(o) + ");\n") f.write(" assign out[" + str(rf.o_bits*(o+1)-1) + ":" + str(rf.bits[-1]*o) + "] = o_" + str(o) + ";\n") else: f.write(" ac_int<" + str(rf.o_bits) + "," + ("true" if rf.o_is_neg else "false") + "> o_" + str(o) + "; " + modules[o] + "(in, o_" + str(o) + "); out.set_slc<" + str(rf.o_bits) + ">(" + str(rf.o_bits*o) + "," + "o_" + str(o) + ");\n") if file_suffix == "cc": f.write("}") f.close() ================================================ FILE: experimental/lo/random_forest/__init__.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from .utils import load from .utils import load_csv from .utils import load_pla # from .random_forest import RandomForest # from .random_tree import RandomTree ================================================ FILE: experimental/lo/random_forest/gen_random_tree.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Generates expressions for random trees.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeRegressor def gen_random_tree_cc(tree): n_nodes = tree.node_count children_left = tree.children_left children_right = tree.children_right feature = tree.feature threshold = tree.threshold node_depth = np.zeros(shape=n_nodes, dtype=np.int64) is_leaves = np.zeros(shape=n_nodes, dtype=bool) stack = [(0, -1)] while (len(stack) > 0): node_id, parent_depth = stack.pop() node_depth[node_id] = parent_depth + 1 if children_left[node_id] != children_right[node_id]: stack.append((chidren_left[node_id], parent_depth+1)) stack.append((children_right[node_id], parent_depth+1)) else: is_leaves[node_id] = True for i in range(n_nodes): if is_leaves[i]: print("{}n_{} leaf node.".format(" "*node_depth[i], i)) else: print("{}n_{} (i_{} <= {}) ? n_{} : n_{}".format( " "*node_depth[i], i, feature[i], threshold[i], children_left[i], children_right[i])) ================================================ FILE: experimental/lo/random_forest/parser.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Parses PLA format usig ply.""" from ply import yacc from ply import lex import numpy as np _1 = 1 _0 = 2 _X = 3 _U = 0 NOT = {_0: _1, _1: _0, _X: _U, _U: _U} class PLA: def __init__(self): self.pla_i = [] self.pla_o = [] pla = PLA() tokens = [ "I", "O", "MV", "ILB", "OB", "P", "L", "E", "TYPE", "SYMBOL", "NUMBER", "NEWLINE" ] t_ignore = " \t|" t_I = r"\.[iI]" t_O = r"\.[oO]" t_MV = r"\.[mM][vV]" t_ILB = r"\.[iI][lL][bB]" t_OB = r"\.[oO][bB]" t_P = r"\.[pP]" t_L = r"\.[lL]" t_E = r"\.[eE]" t_TYPE = r"\.type" t_SYMBOL = r"[a-zA-Z_][a-zA-Z0-9_\<\>\-\$]*" def t_NUMBER(t): r"[\d\-]+" return t def t_NEWLINE(t): r"\n+" t.lexer.lineno += t.value.count("\n") return t def t_error(t): print("Illegal character '{}'".format(t.value)) t.lexer.skip(1) lex.lex() def p_pla(p): """pla : pla_declarations pla_table pla_end""" def p_pla_declarations(p): """pla_declarations : pla_declarations pla_declaration | pla_declaration""" def p_pla_declaration(p): """pla_declaration : I NUMBER NEWLINE | O NUMBER NEWLINE | P NUMBER NEWLINE | MV number_list NEWLINE | ILB symbol_list NEWLINE | OB symbol_list NEWLINE | L NUMBER symbol_list NEWLINE | TYPE SYMBOL NEWLINE """ token = p[1].lower() if token == ".i": pla.ni = int(p[2]) elif token == ".o": pla.no = int(p[2]) elif token == ".mv": pla.mv = [int(v) for v in p[2]] elif token == ".ilb": pla.ilb = p[2] elif token == ".ob": pla.ob = p[2] elif token == ".l": pla.label = p[2] elif token == ".type": pla.set_type = p[2] def p_pla_table(p): """pla_table : pla_table number_symbol_list NEWLINE | number_symbol_list NEWLINE""" if len(p[1:]) == 3: line = "".join(p[2]) else: line = "".join(p[1]) assert hasattr(pla, "ni") and hasattr(pla, "no") # right now we only process binary functions line = [_1 if v == "1" else _0 if v == "0" else _X for v in line] pla.pla_i.append(line[0:pla.ni]) pla.pla_o.append(line[pla.ni:]) def p_pla_end(p): """pla_end : E opt_new_line""" pass def p_opt_new_line(p): """opt_new_line : NEWLINE | """ pass def p_number_list(p): """number_list : number_list NUMBER | NUMBER """ if len(p[1:]) == 2: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_symbol_list(p): """symbol_list : symbol_list SYMBOL | SYMBOL """ if len(p[1:]) == 2: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_number_symbol_list(p): """number_symbol_list : number_symbol_list number_or_symbol | number_or_symbol """ if len(p[1:]) == 2: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_number_or_symbol(p): """number_or_symbol : NUMBER | SYMBOL """ p[0] = p[1] def p_error(p): print("Error text at {}".format(p)) #p.value)) yacc.yacc() def get_tokens(fn): lex.input("".join(open(fn).readlines())) return lex.token def parse(fn): yacc.parse("".join(open(fn).readlines())) pla.pla_i = np.array(pla.pla_i) pla.pla_o = np.array(pla.pla_o) return pla ================================================ FILE: experimental/lo/random_forest/random_forest.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Creates a random forest to generate hardware for it.""" import numpy as np import pickle import os from .random_tree import RandomTree def fit_parallel(max_depth, min_size, sample, mask_stuck_at_values): tree = RandomTree(max_depth, min_size) tree.fit(sample, mask_stuck_at_values) return tree class RandomForest: def __init__( self, max_depth, min_size, n_trees, use_mean=False, sample_size=None): self.max_depth = max_depth self.min_size = min_size self.use_mean = use_mean self.sample_size = sample_size self.n_trees = n_trees self.inputs = None self.bits = None self.is_neg = None self.trees = None @staticmethod def save(model, filename): """Saves model to disk.""" print("... saving model in {}".format(filename)) f = open(filename, "wb") pickle.dump(model, f) f.close() @staticmethod def load(filename): """Loads model from disk.""" print("... loading model from {}".format(filename)) f = open(filename, "rb") random_forest = pickle.load(f) f.close() return random_forest def subsample(self, dataset): """Subsamples dataset if we do not want to use entire dataset.""" sample_idx = np.random.choice( dataset.shape[0], self.sample_size, replace=True) sample = dataset[sample_idx,...] return sample def fit(self, dataset, verbose=False): """Fits random tree to model.""" self.inputs = dataset.shape[1]-1 self.bits = np.ceil( np.log2( np.abs( np.amax(dataset, axis=0) - np.amin(dataset, axis=0)))).astype(np.int32) self.is_neg = (np.amin(dataset, axis=0) < 0).astype(np.int8) self.trees = [] for i in range(self.n_trees): if verbose: print("... creating tree {}".format(i)) # as subsample is an expensive operation, we will only perform it if it # reduces the dataset substantially if self.sample_size and self.sample_size < 0.3 * dataset.shape[0]: if verbose: print("... generated subsample of size {}".format(self.sample_size)) sample = self.subsample(dataset) else: sample = dataset self.trees.append(fit_parallel( self.max_depth, self.min_size, sample, True)) def predict_row(self, row): """Predicts output for single row.""" result = [tree.predict_row(row) for tree in self.trees] if self.use_mean: return int(np.round(np.mean(result))) else: return max(set(result), key=result.count) def predict(self, data): """Predicts class based on data.""" assert self.trees is not None return np.array([self.predict_row(data[i]) for i in range(data.shape[0])]) def gen_code(self, filename, func_name): """Generates code for model.""" assert self.bits is not None vd_list = [] n_vars = 0 for tree in self.trees: vd_list.append(tree.gen_code(n_vars)) n_vars += len(vd_list[-1]) # checks the type by the suffix is_v = filename.split(".")[-1] == "v" assert self.inputs f = open(filename, "w") i_bits = np.sum(self.bits[:-1]) o_bits = self.bits[-1] o_sign = self.is_neg[-1] if is_v: f.write("module {}(input [{}:0] i, output [{}:0] o);\n".format( func_name, i_bits-1, o_bits-1)) else: f.write("#include\n\n") f.write("void {}(ac_int<{},false> i, ac_int<{},{}> &o)\n".format( func_name, i_bits, o_bits, o_sign)) f.write("{\n") # write function headline s_in_line = [] i_bits = self.bits[0] i_sign = self.is_neg[0] if is_v: i_datatype = " wire {}[{}:0] ".format( "signed " if i_sign else "", i_bits-1) else: i_datatype = " ac_int<{},{}> ".format(i_bits, i_sign) len_s = len(i_datatype) for i in range(self.inputs): if is_v: s = ( "i_" + str(i) + " = " + "i[" + str(i_bits*(i+1)-1) + ":" + str(i_bits*i) + "]" ) else: s = ( "i_" + str(i) + " = " + "i.slc<" + str(i_bits) + ">(" + str(i_bits*i) + ")" ) if ( len_s + len(s) + 2 > 70 or i_bits != self.bits[i] or i_sign != self.is_neg[i] ): f.write(i_datatype + ", ".join(s_in_line) + ";\n") s_in_line = [] if is_v: i_datatype = " wire {}[{}:0] ".format( "signed " if i_sign else "", i_bits-1) else: i_datatype = " ac_int<{},{}> ".format(i_bits, i_sign) len_s = len(i_datatype) s_in_line.append(s) len_s += len(s) + 2 if s_in_line: f.write(i_datatype + ", ".join(s_in_line) + ";\n") if is_v: o_datatype = " wire {}[{}:0] ".format( "signed " if o_sign else "", o_bits) else: o_datatype = " ac_int<{},{}> ".format(o_bits, o_sign) o_list = [] for i in range(len(vd_list)): for v in vd_list[i]: if is_v: f.write(o_datatype + v + " = " + vd_list[i][v] + ";\n") else: f.write(o_datatype + v + " = " + vd_list[i][v] + ";\n") f.write("\n") o_list.append(v) assert len(o_list) <= 3 if is_v: f.write(" assign ") else: f.write(" ") if len(o_list) == 1: f.write("o = " + o_list[0] + ";") elif len(o_list) == 2: cond = "( " + o_list[0] + " == " + o_list[1] + " ) " n1 = o_list[0] n0 = "( ( " + " + ".join(o_list) + " ) >> 1 )" f.write("o = " + cond + "? " + n1 + ": " + n0) elif len(o_list) == 3: cond = ( "( " + "( " + " == ".join(o_list[0:2]) + " )?" + o_list[0] + ":" + "( " + " == ".join(o_list[1:]) + " )?" + o_list[1] + ":" + "( " + " == ".join([o_list[0], o_list[2]]) + " )?" + o_list[0] + ":" + "( " + " < ".join(o_list[0:2]) + " ) ?" + "( ( " + " < ".join(o_list[1:]) + " ) ?" + o_list[1] + ":" + o_list[2] + " ) : " + "( ( " + " < ".join([o_list[0], o_list[2]]) + " ) ?" + o_list[0] + ":" + o_list[2] + " )" ) f.write("o = " + cond + ";\n") if is_v: f.write("endmodule") else: f.write("}") f.close() ================================================ FILE: experimental/lo/random_forest/random_tree.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements Random Forest for quantized netlist.""" from csv import reader from math import sqrt import os import pprint from random import seed from random import randrange import sys import numpy as np from .parser import parse, _X, _0, _1 class RandomTree: def __init__(self, max_depth, min_size): self.min_size = min_size self.max_depth = max_depth self.n_features = None def split_into_groups(self, index, value, dataset): mask_l = dataset[:, index] < value mask_r = np.logical_not(mask_l) left = dataset[mask_l,...] right = dataset[mask_r,...] return left, right def gini_index(self, groups, classes): # count all samples at split point n_instances = float(sum([len(group) for group in groups])) # sum weighted Gini index for each group gini = 0.0 for group in groups: size = float(len(group)) # avoid divide by zero if size == 0: continue score = 0.0 # score the group based on the score for each class for class_val in classes: p = np.array([np.sum(group[:, -1] == class_val) / size for class_val in classes]) score += np.sum(np.power(p, 2)) # weight the group score by its relative size gini += (1.0 - score) * (size / n_instances) return gini def select_best_split(self, dataset): class_values = list(set(list(dataset[:,-1].flatten()))) b_index, b_value, b_score, b_groups = 9999, 9999, 9999, None # because several of the entries may be don't cares, we will select the # whole set and restrict to only the ones that are not don't cares features = list( np.random.choice(len(dataset[0])-1, self.n_features, p=self.probs, replace=False)) for index in features: assert self.mask[index] == True b_values = list(set(list(dataset[:, index]))) for b in b_values: groups = self.split_into_groups(index, b, dataset) gini = self.gini_index(groups, class_values) if gini < b_score: b_index, b_value, b_score, b_groups = index, b, gini, groups return {'index': b_index, 'value': b_value, 'groups': b_groups} def select_terminal(self, group): outcomes = list(group[:,-1].flatten()) return max(set(outcomes), key=outcomes.count) def split_node(self, node, depth): left, right = node['groups'] del(node['groups']) # check for a no split if left.shape[0] == 0: node['left'] = node['right'] = self.select_terminal(right) return elif right.shape[0] == 0: node['left'] = node['right'] = self.select_terminal(left) return # check for max depth if depth >= self.max_depth: node['left'], node['right'] = (self.select_terminal(left), self.select_terminal(right)) return # process left child if len(set(list( left[:, -1].flatten()))) == 1 or left.shape[0] <= self.min_size: node['left'] = self.select_terminal(left) else: node['left'] = self.select_best_split(left) self.split_node(node['left'], depth + 1) # process right child if len(set(list( right[:, -1].flatten()))) == 1 or right.shape[0] <= self.min_size: node['right'] = self.select_terminal(right) else: node['right'] = self.select_best_split(right) self.split_node(node['right'], depth+1) def create_mask(self, dataset): self.mask = np.amin(dataset, axis=0) != np.amax(dataset, axis=0) def fit(self, dataset, mask_stuck_at_values=False): if mask_stuck_at_values: self.create_mask(dataset) else: self.mask = np.ones(dataset.shape[1]) self.probs = self.mask[:-1].astype(np.float32) / np.sum(self.mask[:-1]) if not self.n_features: self.n_features = int(np.sqrt(dataset.shape[1] - 1)) self.root = self.select_best_split(dataset) self.split_node(self.root, 1) def predict_internal(self, node, data): if data[node['index']] < node['value']: if isinstance(node['left'], dict): return self.predict_internal(node['left'], data) else: return node['left'] else: if isinstance(node['right'], dict): return self.predict_internal(node['right'], data) else: return node['right'] def predict_row(self, row): return self.predict_internal(self.root, row) def predict(self, data): return np.array(self.predict_row(data[i]) for i in range(data.shape[0])) def gen_code_internal(self, node, var_dict, n_offset): # traverse left cond = '( i_' + str(node['index']) + ' < ' + str(node['value']) + ' )' if isinstance(node['left'], dict): n0 = self.gen_code_internal(node['left'], var_dict, n_offset) else: n0 = str(node['left']) if isinstance(node['right'], dict): n1 = self.gen_code_internal(node['right'], var_dict, n_offset) else: n1 = str(node['right']) index = len(var_dict) + n_offset r = 'n_' + str(index) stmt = cond + '? ' + n0 + ' : ' + n1 var_dict[r] = stmt return r def gen_code(self, n_offset=0): var_dict = {} self.gen_code_internal(self.root, var_dict, n_offset) return var_dict ================================================ FILE: experimental/lo/random_forest/utils.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Reads and processes tables of PLAs and CSVs.""" from csv import reader from math import sqrt import os import pprint from random import seed from random import randrange import sys import numpy as np from .parser import parse, _X, _0, _1 def str_column_to_float(dataset, column): """Converts string column to float.""" for row in dataset: row[column] = float(row[column].strip()) def str_column_to_int(dataset, column): """Converts string column to int.""" for row in dataset: row[column] = int(row[column].strip()) def str_column_to_number(dataset, column): """Converts output to integer if possible or float.""" class_values = [row[column] for row in dataset] unique = set(class_values) lookup = dict() is_symbolic = False for value in unique: try: # try int first lookup[value] = int(value) except ValueError: try: # if it fails, try float lookup[value] = float(value) except ValueError: # if it fails, it is symbolic is_symbolic = True break # best we an do is to assign unique numbers to the classes if is_symbolic: for i, value in enumerate(unique): lookup[value] = i # convert output to unique number for row in dataset: row[column] = lookup[row[column]] return lookup def load_csv(filename): """Loads CSV file.""" dataset = list() with open(filename, 'r') as file: csv_reader = reader(file) for row in csv_reader: if not row: continue dataset.append(row) # converts data to int's for i in range(0, len(dataset[0])-1): str_column_to_int(dataset, i) # converts output to int or float str_column_to_number(dataset, len(dataset[0])-1) dataset = np.array(dataset) return dataset def load_pla(filename): """Loads PLA file.""" dataset = list() pla = parse(filename) for i,o in zip(pla.pla_i, pla.pla_o): i_s = [1 if v == _1 else 0 if v == _0 else 0 for v in i] o_s = [sum([(1 << (len(o)-1-oo)) if o[oo] == _1 else 0 for oo in range(len(o))])] dataset.append(i_s + o_s) dataset = np.array(dataset) return dataset def load(filename): """Loads and decides if we will load PLA or CSV file based on suffix.""" suffix_split = filename.split(".") if suffix_split[-1] == "pla": print("... loading pla") dataset = load_pla(filename) else: dataset = load_csv(filename) return dataset ================================================ FILE: experimental/lo/receptive.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import math from .utils import get_padding_value def print_rf(layer_name, x): print("Layer {}:".format(layer_name)) print( "\theight/width: {}\n\tstride: {}\n\teq_kernel_size: {}\n\tstart: {}\n".format( *x) ) def rf_computation_for_layer(layer, layer_in): k, s, p = layer n_in, j_in, r_in, start_in = layer_in n_out = int(math.floor((n_in + 2*p - k)/s)) + 1 if s == 1 and p == 1: n_out = n_in actual_p = (n_out-1)*s - n_in + k p_r = math.ceil(actual_p/2) p_l = math.floor(actual_p/2) j_out = j_in * s r_out = r_in + (k-1)*j_in start_out = start_in + (int((k-1)/2) - p_l) * j_in return n_out, j_out, r_out, start_out def model_to_receptive_field(model, i_name, o_name): layers_h = [] layers_w = [] i_layer = model.get_layer(i_name) o_layer = model.get_layer(o_name) # right now this only works for sequential layers i_index = model.layers.index(i_layer) o_index = model.layers.index(o_layer) for i in range(i_index, o_index+1): k_h, k_w = (1, 1) s_h, s_w = (1, 1) p_h, p_w = (0, 0) if hasattr(model.layers[i], "kernel_size"): kernel = model.layers[i].kernel_size if isinstance(kernel, int): kernel = [kernel, kernel] k_h, k_w = kernel[0], kernel[1] if hasattr(model.layers[i], "strides"): strides = model.layers[i].strides if isinstance(strides, int): strides = [strides, strides] s_h, s_w = strides[0], strides[1] if hasattr(model.layers[i], "padding"): padding = model.layers[i].padding if isinstance(padding, str): padding = [padding, padding] p_h = get_padding_value(padding[0], k_h) p_w = get_padding_value(padding[1], k_w) layers_h.append((k_h, s_h, p_h)) layers_w.append((k_w, s_w, p_w)) x_h = (i_layer.input.shape[1], 1, 1, 0.5) x_w = (i_layer.input.shape[2], 1, 1, 0.5) for l_h, l_w in zip(layers_h, layers_w): x_h = rf_computation_for_layer(l_h, x_h) x_w = rf_computation_for_layer(l_w, x_w) strides = (x_h[1], x_w[1]) kernel = (x_h[2], x_w[2]) padding = ("valid", "valid") return (strides, kernel, padding) ================================================ FILE: experimental/lo/table/__init__.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from .utils import load from .utils import load_csv from .utils import load_pla ================================================ FILE: experimental/lo/table/parser.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Parses PLA format usig ply.""" from ply import yacc from ply import lex import numpy as np _1 = 1 _0 = 2 _X = 3 _U = 0 NOT = {_0: _1, _1: _0, _X: _U, _U: _U} class PLA: def __init__(self): self.pla_i = [] self.pla_o = [] pla = PLA() tokens = [ "I", "O", "MV", "ILB", "OB", "P", "L", "E", "TYPE", "SYMBOL", "NUMBER", "NEWLINE" ] t_ignore = " \t|" t_I = r"\.[iI]" t_O = r"\.[oO]" t_MV = r"\.[mM][vV]" t_ILB = r"\.[iI][lL][bB]" t_OB = r"\.[oO][bB]" t_P = r"\.[pP]" t_L = r"\.[lL]" t_E = r"\.[eE]" t_TYPE = r"\.type" t_SYMBOL = r"[a-zA-Z_][a-zA-Z0-9_\<\>\-\$]*" def t_NUMBER(t): r"[\d\-]+" return t def t_NEWLINE(t): r"\n+" t.lexer.lineno += t.value.count("\n") return t def t_error(t): print("Illegal character '{}'".format(t.value)) t.lexer.skip(1) lex.lex() def p_pla(p): """pla : pla_declarations pla_table pla_end""" def p_pla_declarations(p): """pla_declarations : pla_declarations pla_declaration | pla_declaration""" def p_pla_declaration(p): """pla_declaration : I NUMBER NEWLINE | O NUMBER NEWLINE | P NUMBER NEWLINE | MV number_list NEWLINE | ILB symbol_list NEWLINE | OB symbol_list NEWLINE | L NUMBER symbol_list NEWLINE | TYPE SYMBOL NEWLINE """ token = p[1].lower() if token == ".i": pla.ni = int(p[2]) elif token == ".o": pla.no = int(p[2]) elif token == ".mv": pla.mv = [int(v) for v in p[2]] elif token == ".ilb": pla.ilb = p[2] elif token == ".ob": pla.ob = p[2] elif token == ".l": pla.label = p[2] elif token == ".type": pla.set_type = p[2] def p_pla_table(p): """pla_table : pla_table number_symbol_list NEWLINE | number_symbol_list NEWLINE""" if len(p[1:]) == 3: line = "".join(p[2]) else: line = "".join(p[1]) assert hasattr(pla, "ni") and hasattr(pla, "no") # right now we only process binary functions line = [_1 if v == "1" else _0 if v == "0" else _X for v in line] pla.pla_i.append(line[0:pla.ni]) pla.pla_o.append(line[pla.ni:]) def p_pla_end(p): """pla_end : E opt_new_line""" pass def p_opt_new_line(p): """opt_new_line : NEWLINE | """ pass def p_number_list(p): """number_list : number_list NUMBER | NUMBER """ if len(p[1:]) == 2: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_symbol_list(p): """symbol_list : symbol_list SYMBOL | SYMBOL """ if len(p[1:]) == 2: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_number_symbol_list(p): """number_symbol_list : number_symbol_list number_or_symbol | number_or_symbol """ if len(p[1:]) == 2: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_number_or_symbol(p): """number_or_symbol : NUMBER | SYMBOL """ p[0] = p[1] def p_error(p): print("Error text at {}".format(p)) #p.value)) yacc.yacc() def get_tokens(fn): lex.input("".join(open(fn).readlines())) return lex.token def parse(fn): yacc.parse("".join(open(fn).readlines())) pla.pla_i = np.array(pla.pla_i) pla.pla_o = np.array(pla.pla_o) return pla ================================================ FILE: experimental/lo/table/utils.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Reads and processes tables of PLAs and CSVs.""" from csv import reader from csv import QUOTE_NONNUMERIC from math import sqrt import os import pprint from random import seed from random import randrange import sys import numpy as np from .parser import parse, _X, _0, _1 def str_column_to_float(dataset, column): """Converts string column to float.""" for row in dataset: row[column] = float(row[column].strip()) def str_column_to_int(dataset, column, d_values): """Converts string column to int.""" for row in dataset: v = int(row[column].strip()) row[column] = v if not d_values else d_values[v] def str_column_to_number(dataset, column): """Converts output to integer if possible or float.""" class_values = [row[column] for row in dataset] unique = set(class_values) lookup = dict() is_symbolic = False for value in unique: try: # try int first lookup[value] = int(value) except ValueError: try: # if it fails, try float lookup[value] = float(value) except ValueError: # if it fails, it is symbolic is_symbolic = True break # best we an do is to assign unique numbers to the classes if is_symbolic: for i, value in enumerate(unique): lookup[value] = i # convert output to unique number for row in dataset: row[column] = lookup[row[column]] return lookup def int2bin(v, bits): str_v = format((v & ((1< 0 and sign: if mode == "bin": b_str = bin(-b & ((1 << n_bits) - 1))[2:] else: # mode == "dec" b_str = str(-b) o_dict[-v] = b_str if sign: v = (1.0 * (1 << (bits - sign))) * (1 << ibits) / (1 << bits) if mode == "bin": b_str = bin(-(1 << (bits - sign)) & ((1 << bits) - 1))[2:] else: b_str = str(-(1 << (bits - sign))) o_dict[-v] = b_str return o_dict def get_quantized_po2_dict( bits, max_exp, sign=False, make_smaller_zero=True, mode="bin"): """Returns map from floating values to bit encoding.""" # if make_smaller_zero we will make sure smaller number is 000...0 # mode = "bin" |-> make_smaller_zero assert mode != "bin" or make_smaller_zero o_dict = {} if max_exp > 0: v = 1.0 if mode == "bin": b_str = "0" * bits else: b_str = "1" o_dict[v] = b_str if sign: v = -1.0 if mode == "bin": b_str = "1" + "0"*(bits-sign) else: b_str = "-1" o_dict[v] = b_str for b in range(1, 1<<(bits - sign - 1)): v = np.power(2.0, -b) if mode == "bin": b_sign = "0" if sign else "" b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:] else: b_str = str(v) o_dict[v] = b_str if b <= max_exp: v = np.power(2.0, b) if mode == "bin": b_str = bin(b)[2:] b_str = b_sign + "0"*(bits - sign - len(b_str)) + b_str else: b_str = str(v) o_dict[v] = b_str if sign: v = -np.power(2.0, -b) if mode == "bin": b_sign = "1" if sign else "" b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:] else: b_str = str(v) o_dict[v] = b_str if b <= max_exp: v = -np.power(2.0, b) if mode == "bin": b_str = bin(b)[2:] b_str = b_sign + "0"*(bits - sign - len(b_str)) + b_str else: b_str = str(v) o_dict[v] = b_str b = 1 << (bits - sign - 1) v = np.power(2.0, -b) if mode == "bin": b_sign = "0" if sign else "" b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:] else: b_str = str(v) o_dict[v] = b_str smaller_mask = b_str if sign: v = -np.power(2.0, -b) if mode == "bin": b_sign = "1" if sign else "" b_str = b_sign + bin((-b) & ((1 << (bits - sign + 1)) - 1))[3:] else: b_str = str(v) o_dict[v] = b_str def invert_bit(bit, mask): """Inverts bits if mask is 1.""" if mask == "0": return bit else: return "0" if bit == "1" else "1" if mode == "bin": if make_smaller_zero: for v in o_dict: o_dict[v] = "".join( invert_bit(bit, mask_bit) for bit, mask_bit in zip(o_dict[v], smaller_mask)) else: keys_sorted = list(sorted(o_dict.keys())) if make_smaller_zero: min_positive_key = min([abs(v) for v in keys_sorted]) min_positive_index = keys_sorted.index(min_positive_key) else: min_positive_index = 0 for i, k in enumerate(keys_sorted): o_dict[k] = str(i - min_positive_index) return o_dict def get_ternary_dict(mode="bin"): """Returns map from floating values to bit encoding.""" if mode == "bin": return {-1.0: "11", 0.0: "00", 1.0: "01"} else: return {-1.0: "-1", 0.0: "0", 1.0: "1"} def get_binary_dict(symmetric=False, mode="bin"): """Returns map from floating values to bit encoding.""" if mode == "bin": if symmetric: return {-1.0: "10", 1.0: "01"} else: return {0.0: "0", 1.0: "1"} else: if symmetric: return {-1.0: "-1", 1.0: "1"} else: return {0.0: "0", 1.0: "1"} ================================================ FILE: notebook/AutoQKeras.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##### Copyright 2020 Google LLC\n", "#\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "QC9sVuNrzT-f" }, "source": [ "# Introduction\n", "\n", "In this notebook, we show how to quantize a model using AutoQKeras.\n", "\n", "As usual, let's first make sure we are using Python 3." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 51 }, "colab_type": "code", "executionInfo": { "elapsed": 926, "status": "ok", "timestamp": 1591840345558, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "0sY-O2IfzdB3", "outputId": "1c5a4e7a-1003-4b56-a30a-ca6bc196f18b" }, "outputs": [], "source": [ "import sys\n", "print(sys.version)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "6V7FxYH0zfY0" }, "source": [ "Now, let's load some packages we will need to run AutoQKeras." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "wuVqOAcbz3Go" }, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import json\n", "import pprint\n", "import numpy as np\n", "import six\n", "import tempfile\n", "import tensorflow.compat.v2 as tf\n", "# V2 Behavior is necessary to use TF2 APIs before TF2 is default TF version internally.\n", "tf.enable_v2_behavior()\n", "from tensorflow.keras.optimizers import *\n", "\n", "from qkeras.autoqkeras import *\n", "from qkeras import *\n", "from qkeras.utils import model_quantize\n", "from qkeras.qtools import run_qtools\n", "from qkeras.qtools import settings as qtools_settings\n", "\n", "from tensorflow.keras.utils import to_categorical\n", "import tensorflow_datasets as tfds\n", "\n", "print(\"using tensorflow\", tf.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's define `get_data` and `get_model` as you may not have stand alone access to examples directory inside autoqkeras." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_data(dataset_name, fast=False):\n", " \"\"\"Returns dataset from tfds.\"\"\"\n", " ds_train = tfds.load(name=dataset_name, split=\"train\", batch_size=-1)\n", " ds_test = tfds.load(name=dataset_name, split=\"test\", batch_size=-1)\n", "\n", " dataset = tfds.as_numpy(ds_train)\n", " x_train, y_train = dataset[\"image\"].astype(np.float32), dataset[\"label\"]\n", "\n", " dataset = tfds.as_numpy(ds_test)\n", " x_test, y_test = dataset[\"image\"].astype(np.float32), dataset[\"label\"]\n", "\n", " if len(x_train.shape) == 3:\n", " x_train = x_train.reshape(x_train.shape + (1,))\n", " x_test = x_test.reshape(x_test.shape + (1,))\n", "\n", " x_train /= 256.0\n", " x_test /= 256.0\n", "\n", " x_mean = np.mean(x_train, axis=0)\n", "\n", " x_train -= x_mean\n", " x_test -= x_mean\n", "\n", " nb_classes = np.max(y_train) + 1\n", " y_train = to_categorical(y_train, nb_classes)\n", " y_test = to_categorical(y_test, nb_classes)\n", "\n", " print(x_train.shape[0], \"train samples\")\n", " print(x_test.shape[0], \"test samples\")\n", " return (x_train, y_train), (x_test, y_test)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.initializers import *\n", "from tensorflow.keras.layers import *\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.optimizers import *\n", "\n", "class ConvBlockNetwork(object):\n", " \"\"\"Creates Convolutional block type of network.\"\"\"\n", "\n", " def __init__(\n", " self,\n", " shape,\n", " nb_classes,\n", " kernel_size,\n", " filters,\n", " dropout_rate=0.0,\n", " with_maxpooling=True,\n", " with_batchnorm=True,\n", " kernel_initializer=\"he_normal\",\n", " bias_initializer=\"zeros\",\n", " use_separable=False,\n", " use_xnornet_trick=False,\n", " all_conv=False\n", " ):\n", " \"\"\"Creates class.\n", "\n", " Args:\n", " shape: shape of inputs.\n", " nb_classes: number of output classes.\n", " kernel_size: kernel_size of network.\n", " filters: sizes of filters (if entry is a list, we create a block).\n", " dropout_rate: dropout rate if > 0.\n", " with_maxpooling: if true, use maxpooling.\n", " with_batchnorm: with BatchNormalization.\n", " kernel_initializer: kernel_initializer.\n", " bias_initializer: bias and beta initializer.\n", " use_separable: if \"dsp\", do conv's 1x3 + 3x1. If \"mobilenet\",\n", " use MobileNet separable convolution. If False or \"none\", perform single\n", " conv layer.\n", " use_xnornet_trick: use bn+act after max pool to enable binary\n", " to avoid saturation to largest value.\n", " all_conv: if true, implements all convolutional network.\n", " \"\"\"\n", " self.shape = shape\n", " self.nb_classes = nb_classes\n", " self.kernel_size = kernel_size\n", " self.filters = filters\n", " self.dropout_rate = dropout_rate\n", " self.with_maxpooling = with_maxpooling\n", " self.with_batchnorm = with_batchnorm\n", " self.kernel_initializer = kernel_initializer\n", " self.bias_initializer = bias_initializer\n", " self.use_separable = use_separable\n", " self.use_xnornet_trick = use_xnornet_trick\n", " self.all_conv = all_conv\n", "\n", " def build(self):\n", " \"\"\"Builds model.\"\"\"\n", " x = x_in = Input(self.shape, name=\"input\")\n", " for i in range(len(self.filters)):\n", " if len(self.filters) > 1:\n", " name_suffix_list = [str(i)]\n", " else:\n", " name_suffix_list = []\n", " if not isinstance(self.filters[i], list):\n", " filters = [self.filters[i]]\n", " else:\n", " filters = self.filters[i]\n", " for j in range(len(filters)):\n", " if len(filters) > 1:\n", " name_suffix = \"_\".join(name_suffix_list + [str(j)])\n", " else:\n", " name_suffix = \"_\".join(name_suffix_list)\n", " if self.use_separable == \"dsp\":\n", " kernels = [(1, self.kernel_size), (self.kernel_size, 1)]\n", " else:\n", " kernels = [(self.kernel_size, self.kernel_size)]\n", " for k, kernel in enumerate(kernels):\n", " strides = 1\n", " if (\n", " not self.with_maxpooling and j == len(filters)-1 and\n", " k == len(kernels)-1\n", " ):\n", " strides = 2\n", " if self.use_separable == \"dsp\":\n", " kernel_suffix = (\n", " \"\".join([str(k) for k in kernel]) + \"_\" + name_suffix)\n", " elif self.use_separable == \"mobilenet\":\n", " depth_suffix = (\n", " \"\".join([str(k) for k in kernel]) + \"_\" + name_suffix)\n", " kernel_suffix = \"11_\" + name_suffix\n", " else:\n", " kernel_suffix = name_suffix\n", " if self.use_separable == \"mobilenet\":\n", " x = DepthwiseConv2D(\n", " kernel,\n", " padding=\"same\", strides=strides,\n", " use_bias=False,\n", " name=\"conv2d_dw_\" + depth_suffix)(x)\n", " if self.with_batchnorm:\n", " x = BatchNormalization(name=\"conv2d_dw_bn_\" + depth_suffix)(x)\n", " x = Activation(\"relu\", name=\"conv2d_dw_act_\" + depth_suffix)(x)\n", " kernel = (1, 1)\n", " strides = 1\n", " x = Conv2D(\n", " filters[j], kernel,\n", " strides=strides, use_bias=not self.with_batchnorm,\n", " padding=\"same\",\n", " kernel_initializer=self.kernel_initializer,\n", " bias_initializer=self.bias_initializer,\n", " name=\"conv2d_\" + kernel_suffix)(x)\n", " if not (\n", " self.with_maxpooling and self.use_xnornet_trick and\n", " j == len(filters)-1 and k == len(kernels)-1\n", " ):\n", " if self.with_batchnorm:\n", " x = BatchNormalization(\n", " beta_initializer=self.bias_initializer,\n", " name=\"bn_\" + kernel_suffix)(x)\n", " x = Activation(\"relu\", name=\"act_\" + kernel_suffix)(x)\n", " if self.with_maxpooling:\n", " x = MaxPooling2D(2, 2, name=\"mp_\" + name_suffix)(x)\n", " # this is a trick from xnornet to enable full binary or ternary\n", " # networks to be after maxpooling.\n", " if self.use_xnornet_trick:\n", " x = BatchNormalization(\n", " beta_initializer=self.bias_initializer,\n", " name=\"mp_bn_\" + name_suffix)(x)\n", " x = Activation(\"relu\", name=\"mp_act_\" + name_suffix)(x)\n", " if self.dropout_rate > 0:\n", " x = Dropout(self.dropout_rate, name=\"drop_\" + name_suffix)(x)\n", "\n", " if not self.all_conv:\n", " x = Flatten(name=\"flatten\")(x)\n", " x = Dense(\n", " self.nb_classes,\n", " kernel_initializer=self.kernel_initializer,\n", " bias_initializer=self.bias_initializer,\n", " name=\"dense\")(x)\n", " x = Activation(\"softmax\", name=\"softmax\")(x)\n", " else:\n", " x = Conv2D(\n", " self.nb_classes, 1, strides=1, padding=\"same\",\n", " kernel_initializer=self.kernel_initializer,\n", " bias_initializer=self.bias_initializer,\n", " name=\"dense\")(x)\n", " x = Activation(\"softmax\", name=\"softmax\")(x)\n", " x = Flatten(name=\"flatten\")(x)\n", "\n", " model = Model(inputs=[x_in], outputs=[x])\n", "\n", " return model\n", "\n", "\n", "def get_model(dataset):\n", " \"\"\"Returns a model for the demo of AutoQKeras.\"\"\"\n", " if dataset == \"mnist\":\n", " model = ConvBlockNetwork(\n", " shape=(28, 28, 1),\n", " nb_classes=10,\n", " kernel_size=3,\n", " filters=[16, 32, 48, 64, 128],\n", " dropout_rate=0.2,\n", " with_maxpooling=False,\n", " with_batchnorm=True,\n", " kernel_initializer=\"he_uniform\",\n", " bias_initializer=\"zeros\",\n", " ).build()\n", "\n", " elif dataset == \"fashion_mnist\":\n", " model = ConvBlockNetwork(\n", " shape=(28, 28, 1),\n", " nb_classes=10,\n", " kernel_size=3,\n", " filters=[16, [32]*3, [64]*3],\n", " dropout_rate=0.2,\n", " with_maxpooling=True,\n", " with_batchnorm=True,\n", " use_separable=\"mobilenet\",\n", " kernel_initializer=\"he_uniform\",\n", " bias_initializer=\"zeros\",\n", " use_xnornet_trick=True\n", " ).build()\n", "\n", " elif dataset == \"cifar10\":\n", " model = ConvBlockNetwork(\n", " shape=(32, 32, 3),\n", " nb_classes=10,\n", " kernel_size=3,\n", " filters=[16, [32]*3, [64]*3, [128]*3],\n", " dropout_rate=0.2,\n", " with_maxpooling=True,\n", " with_batchnorm=True,\n", " use_separable=\"mobilenet\",\n", " kernel_initializer=\"he_uniform\",\n", " bias_initializer=\"zeros\",\n", " use_xnornet_trick=True\n", " ).build()\n", "\n", " elif dataset == \"cifar100\":\n", " model = ConvBlockNetwork(\n", " shape=(32, 32, 3),\n", " nb_classes=100,\n", " kernel_size=3,\n", " filters=[16, [32]*3, [64]*3, [128]*3, [256]*3],\n", " dropout_rate=0.2,\n", " with_maxpooling=True,\n", " with_batchnorm=True,\n", " use_separable=\"mobilenet\",\n", " kernel_initializer=\"he_uniform\",\n", " bias_initializer=\"zeros\",\n", " use_xnornet_trick=True\n", " ).build()\n", "\n", " model.summary()\n", "\n", " return model" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "uXsGtqRcN7fY" }, "source": [ "`AutoQKeras` has some examples on how to run with `mnist`, `fashion_mnist`, `cifar10` and `cifar100`." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 51 }, "colab_type": "code", "executionInfo": { "elapsed": 18554, "status": "ok", "timestamp": 1591840377936, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "lB8CBTehz9FK", "outputId": "09f791cf-8db5-40c5-b17d-89d433308716" }, "outputs": [], "source": [ "DATASET = \"mnist\"\n", "(x_train, y_train), (x_test, y_test) = get_data(DATASET)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "bk4rOks2OIbW" }, "source": [ "Before we create the model, let's see if we can perform distributed training." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 206 }, "colab_type": "code", "executionInfo": { "elapsed": 304, "status": "ok", "timestamp": 1591840378251, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "EMbYcKb-wMOc", "outputId": "22e85769-4659-4212-ccdb-4b00be2fcefe" }, "outputs": [], "source": [ "physical_devices = tf.config.list_physical_devices()\n", "for d in physical_devices:\n", " print(d)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 14553, "status": "ok", "timestamp": 1591840392823, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "eMVill0TxUuG", "outputId": "97c07213-fdce-4eed-9af7-cc51393cd996" }, "outputs": [], "source": [ "has_tpus = np.any([d.device_type == \"TPU\" for d in physical_devices])\n", "\n", "if has_tpus:\n", " TPU_WORKER = 'local'\n", "\n", " resolver = tf.distribute.cluster_resolver.TPUClusterResolver(\n", " tpu=TPU_WORKER, job_name='tpu_worker')\n", " if TPU_WORKER != 'local':\n", " tf.config.experimental_connect_to_cluster(resolver, protocol='grpc+loas')\n", " tf.tpu.experimental.initialize_tpu_system(resolver)\n", " strategy = tf.distribute.experimental.TPUStrategy(resolver)\n", " print('Number of devices: {}'.format(strategy.num_replicas_in_sync))\n", "\n", " cur_strategy = strategy\n", "else:\n", " cur_strategy = tf.distribute.get_strategy()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "6FIAmXgOOPWg" }, "source": [ "Now we can create the model with the distributed strategy in place if TPUs are available. We have some test models that we can use, or you can build your own models. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 977 }, "colab_type": "code", "executionInfo": { "elapsed": 1149, "status": "ok", "timestamp": 1591840393983, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "P0_-26kwxZiQ", "outputId": "bf2828fe-2968-4d7d-82e7-0e2b87f063ae" }, "outputs": [], "source": [ "with cur_strategy.scope():\n", " model = get_model(DATASET)\n", " custom_objects = {}" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "jok7tJq1OVuJ" }, "source": [ "Let's see the accuracy on a unquantized model." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 360 }, "colab_type": "code", "executionInfo": { "elapsed": 10292, "status": "ok", "timestamp": 1591840404285, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "nvFSJpeDxmWZ", "outputId": "ceac171d-2357-4d2a-ecbe-6c2775bc2a94" }, "outputs": [], "source": [ "with cur_strategy.scope():\n", " optimizer = Adam(lr=0.02)\n", " model.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n", " model.fit(x_train, y_train, epochs=10, batch_size=2048, steps_per_epoch=29, validation_data=(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "pKArZ2VwQlph" }, "source": [ "For `mnist`, we should get 99% validation accuracy, and for `fashion_mnist`, we should get around 86% of validation accuracy. Let's get a metric for high-level estimation of energy of this model. \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 413, "status": "ok", "timestamp": 1591840404708, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "AlIk3gtFS6iJ", "outputId": "780a9c28-6234-49ff-9a85-e52bf00a5c59" }, "outputs": [], "source": [ " reference_internal = \"fp32\"\n", " reference_accumulator = \"fp32\"\n", "\n", " q = run_qtools.QTools(\n", " model,\n", " # energy calculation using a given process\n", " # \"horowitz\" refers to 45nm process published at\n", " # M. Horowitz, \"1.1 Computing's energy problem (and what we can do about\n", " # it), \"2014 IEEE International Solid-State Circuits Conference Digest of\n", " # Technical Papers (ISSCC), San Francisco, CA, 2014, pp. 10-14, \n", " # doi: 10.1109/ISSCC.2014.6757323.\n", " process=\"horowitz\",\n", " # quantizers for model input\n", " source_quantizers=[quantized_bits(8, 0, 1)],\n", " is_inference=False,\n", " # absolute path (including filename) of the model weights\n", " # in the future, we will attempt to optimize the power model\n", " # by using weight information, although it can be used to further\n", " # optimize QBatchNormalization.\n", " weights_path=None,\n", " # keras_quantizer to quantize weight/bias in un-quantized keras layers\n", " keras_quantizer=reference_internal,\n", " # keras_quantizer to quantize MAC in un-quantized keras layers\n", " keras_accumulator=reference_accumulator,\n", " # whether calculate baseline energy\n", " for_reference=True)\n", " \n", "# caculate energy of the derived data type map.\n", "energy_dict = q.pe(\n", " # whether to store parameters in dram, sram, or fixed\n", " weights_on_memory=\"sram\",\n", " # store activations in dram or sram\n", " activations_on_memory=\"sram\",\n", " # minimum sram size in number of bits. Let's assume a 16MB SRAM.\n", " min_sram_size=8*16*1024*1024,\n", " # whether load data from dram to sram (consider sram as a cache\n", " # for dram. If false, we will assume data will be already in SRAM\n", " rd_wr_on_io=False)\n", "\n", "# get stats of energy distribution in each layer\n", "energy_profile = q.extract_energy_profile(\n", " qtools_settings.cfg.include_energy, energy_dict)\n", "# extract sum of energy of each layer according to the rule specified in\n", "# qtools_settings.cfg.include_energy\n", "total_energy = q.extract_energy_sum(\n", " qtools_settings.cfg.include_energy, energy_dict)\n", "\n", "pprint.pprint(energy_profile)\n", "print()\n", "print(\"Total energy: {:.2f} uJ\".format(total_energy / 1000000.0))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "-eDXxDSUVJ2m" }, "source": [ "During the computation, we had a dictionary that outlines the energy per layer (`energy_profile`), and total energy (`total_energy`). The reader should remember that `energy_profile` may need additional filtering as implementations will fuse some\n", "layers. When we compute the `total_energy`, we consider an approximation that some layers will be fused to compute the final energy number. For example, a convolution layer followed by an activation layer will be fused into a single layer so that the output of the convolution layer is not used.\n", "\n", "You have to remember that our high-level model for energy has several assumptions:\n", "\n", "The energy of a layer is estimated as `energy(layer) = energy(input) + energy(parameters) + energy(MAC) + energy(output)`.\n", "\n", "1) Reading inputs, parameters and outputs consider only _compulsory_ accesses, i.e. first access to the data, which is independent of the hardware architecture. If you remember _The 3 C's of Caches_ (https://courses.cs.washington.edu/courses/cse410/99au/lectures/Lecture-10-18/tsld035.htm) other types of accesses will depend on the accelerator architecture.\n", "\n", "2) For the multiply-and-add (MAC) energy estimation, we only consider the energy to compute the MAC, but not any other type energy. For example, in a real accelerator, you have registers, glue logic, pipeline logic that will affect the overall energy profile of the device.\n", "\n", "Although this model is simple and provides an initial estimate on what to expect, it has high-variance with respect to actual energy numbers you will find in practice, especially with respect to different architectural implementations.\n", "\n", "We assume that the real energy `Energy(layer)` is a linear combination of the high-level energy model, i.e.`Energy(layer) = k1 * energy(layer) + k2`, where `k1` and `k2` are constants that depend on the architecture of the accelerator. One can think of `k1` as the factor that accounts for the additional storage to keep the model running, and `k2` as the additional always on logic that is required to perform the operations. If we compare the energy of two implementations with different quantizations of the same layer, let's say `layer1` and `layer2`, `Energy(layer1) > Energy(layer2)` holds true iff `energy(layer1) > energy(layer2)` for the same architecture, but for different architectures, this will not be true in general.\n", "\n", "Despite its limitations to predict a single energy number, this model is quite good to compare the energy of two different models, or different types of quantizations, when we restrict it to a single architecture, and that's how we use it here." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Hr1FL8wVSy-q" }, "source": [ "# Quantizing a Model With `AutoQKeras`\n", "\n", "To quantize this model with `AutoQKeras`, we need to define the quantization for kernels, biases and activations; forgiving factors and quantization strategy.\n", "\n", "Below we define which quantizers are allowed for kernel, bias, activations and linear. Linear is a proxy that we use to capture `Activation(\"linear\")` to apply quantization without applying a non-linear operation. In some networks, we found that this trick may be necessary to better represent the quantization space.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "vSsEwDr_yRG4" }, "outputs": [], "source": [ "quantization_config = {\n", " \"kernel\": {\n", " \"binary\": 1,\n", " \"stochastic_binary\": 1,\n", " \"ternary\": 2,\n", " \"stochastic_ternary\": 2,\n", " \"quantized_bits(2,1,1,alpha=1.0)\": 2,\n", " \"quantized_bits(4,0,1,alpha=1.0)\": 4,\n", " \"quantized_bits(8,0,1,alpha=1.0)\": 8,\n", " \"quantized_po2(4,1)\": 4\n", " },\n", " \"bias\": {\n", " \"quantized_bits(4,0,1)\": 4,\n", " \"quantized_bits(8,3,1)\": 8,\n", " \"quantized_po2(4,8)\": 4\n", " },\n", " \"activation\": {\n", " \"binary\": 1,\n", " \"ternary\": 2,\n", " \"quantized_relu_po2(4,4)\": 4,\n", " \"quantized_relu(3,1)\": 3,\n", " \"quantized_relu(4,2)\": 4,\n", " \"quantized_relu(8,2)\": 8,\n", " \"quantized_relu(8,4)\": 8,\n", " \"quantized_relu(16,8)\": 16\n", " },\n", " \"linear\": {\n", " \"binary\": 1,\n", " \"ternary\": 2,\n", " \"quantized_bits(4,1)\": 4,\n", " \"quantized_bits(8,2)\": 8,\n", " \"quantized_bits(16,10)\": 16\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "GmW_xaAvZo4D" }, "source": [ "Now let's define how to apply quantization. In the simplest form, we specify how many bits for kernels, biases and activations by layer types. Note that the entry `BatchNormalization` needs to be specified here, as we only quantize layer types specified by these patterns. For example, a `Flatten` layer is not quantized as it does not change the data type of its inputs." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "emTRLIZmR-P7" }, "outputs": [], "source": [ "limit = {\n", " \"Dense\": [8, 8, 4],\n", " \"Conv2D\": [4, 8, 4],\n", " \"DepthwiseConv2D\": [4, 8, 4],\n", " \"Activation\": [4],\n", " \"BatchNormalization\": []\n", "}" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "-iu5gFNhaLNE" }, "source": [ "Here, we are specifying that we want to use at most 4 bits for weights and activations, and at most 8 bits for biases in convolutional and depthwise convolutions, but we allow up to 8 bits for kernels in dense layers." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "ZUMQGEIDblSa" }, "source": [ "Let's define now the forgiving factor. We will consider energy minimization as a goal as follows. Here, we are saying that we allow 8% reduction in accuracy for a 2x reduction in energy, both reference and trials have parameters and activations on SRAM, both reference model and quantization trials do not read/write from DRAM on I/O operations, and we should consider both experiments to use SRAMs with minimum tensor sizes (commonly called distributed SRAM implementation).\n", "\n", "We also need to specify the quantizers for the inputs. In this case, we want to use `int8` as source quantizers. Other possible types are `int16`, `int32`, `fp16` or `fp32`, besides `QKeras` quantizer types.\n", "\n", "Finally, to be fair, we want to compare our quantization against fixed-point 8-bit inputs, outputs, activations, weights and biases, and 32-bit accumulators.\n", "\n", "Remember that a `forgiving factor` forgives a drop in a metric such as `accuracy` if the gains of the model are much bigger than the drop. For example, it corresponds to the sentence *we allow $\\tt{delta}\\%$ reduction in accuracy if the quantized model has $\\tt{rate} \\times$ smaller energy than the original model*, being a multiplicative factor to the metric. It is computed by $1 + \\tt{delta} \\times \\log_{\\tt{rate}}(\\tt{stress} \\times \\tt{reference\\_cost} / \\tt{trial\\_cost})$." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "kS31TuZ-aKb1" }, "outputs": [], "source": [ "goal = {\n", " \"type\": \"energy\",\n", " \"params\": {\n", " \"delta_p\": 8.0,\n", " \"delta_n\": 8.0,\n", " \"rate\": 2.0,\n", " \"stress\": 1.0,\n", " \"process\": \"horowitz\",\n", " \"parameters_on_memory\": [\"sram\", \"sram\"],\n", " \"activations_on_memory\": [\"sram\", \"sram\"],\n", " \"rd_wr_on_io\": [False, False],\n", " \"min_sram_size\": [0, 0],\n", " \"source_quantizers\": [\"int8\"],\n", " \"reference_internal\": \"int8\",\n", " \"reference_accumulator\": \"int32\"\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "-QzyWPA-dCxm" }, "source": [ "There are a few more things we need to define. Let's bundle them on a dictionary and pass them to `AutoQKeras`. We will try a maximum of 10 trials (`max_trials`) just to limit the time we will spend finding the best quantization here. Please note that this parameter is not valid if you are running in `hyperband` mode.\n", "\n", "`output_dir` is the directory where we will store our results. Since we are running on a colab, we will let `tempfile` chooce a directory for us.\n", "\n", "`learning_rate_optimizer` allows `AutoQKeras` to change the optimization function and the `learning_rate` to try to improve the quantization results. Since it is still experimental, it may be the case that in some cases it will get worse results. \n", "\n", "Because we are tuning filters as well, we should set `transfer_weights` to `False` as the trainable parameters will have different shapes.\n", "\n", "In `AutoQKeras` we have three modes of operation: `random`, `bayesian` and `hyperband`. I recommend the user to refer to `KerasTuner` (https://keras-team.github.io/keras-tuner/) for a complete description of them.\n", "\n", "`tune_filters` can be set to `layer`, `block` or `none`. If `tune_filters` is `block`, we change the filters by the same amount for all layers being quantized in the trial. If `tune_filters` is `layer`, we will possibly change the number of filters for each layer independently. Finally, if `tune_filters` is `none`, we will not perform filter tuning.\n", "\n", "Together with `tune_filters`, `tune_filter_exceptions` allows the user to specify by a regular expression which filters we should not perform filter tuning, which is especially good for the last layers of the network.\n", "\n", "Filter tuning is a very important feature of `AutoQKeras`. When we deep quantize a model, we may need less or more filters for each layer (and you can guess we do not know a priori how many filters we will need for each layer). Let me give you a rationale behind this.\n", "\n", "- **less filters**: let us assume we have two set of filter coefficients we want quantize: $[-0.3, 0.2, 0.5, 0.15]$ and $[-0.5, 0.4, 0.1, 0.65]$. If we apply a $\\tt{binary}$ quantizer with $\\tt{scale} = \\big\\lceil \\log_2(\\frac{\\sum |w|}{N}) \\big\\rceil$, where $w$ are the filter coefficients and $N$ is the number of coefficients, we will end up with the same filter $\\tt{binary}([-0.3, 0.2, 0.5, 0.15]) = \\tt{binary}([-0.5, 0.4, 0.1, 0.65]) = [-1,1,1,1] \\times 0.5$. In this case we are assuming the $\\tt{scale}$ is a power-of-2 number so that it can be efficiently implemented by a shift operation;\n", "\n", "- **more filters**: it is clear that quantization will drop information (just look at the example above) and deep quantization will drop more information, so to recover some of the boundary regions in layers that perform feature extraction, we may need to add more filters to the layer when we quantize it.\n", "\n", "We do not want to quantize the `softmax` layer, which is the last layer of the network. In `AutoQKeras`, you can specify the indexes that you want to perform quantization by specifying the corresponding index of the layer in `Keras`, i.e. if you can get the layer as `model.layers[i]` in `Keras`, `i` is the index of the layer.\n", "\n", "Finally, for data parallel distributed training, we should pass the strategy in `distribution_strategy` to `KerasTuner`." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 54 }, "colab_type": "code", "executionInfo": { "elapsed": 297, "status": "ok", "timestamp": 1591840405963, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "2-fyACb2dIAN", "outputId": "a180fa3f-8cc3-4f70-ce70-c05c28f88d1e" }, "outputs": [], "source": [ "run_config = {\n", " \"output_dir\": tempfile.mkdtemp(),\n", " \"goal\": goal,\n", " \"quantization_config\": quantization_config,\n", " \"learning_rate_optimizer\": False,\n", " \"transfer_weights\": False,\n", " \"mode\": \"random\",\n", " \"seed\": 42,\n", " \"limit\": limit,\n", " \"tune_filters\": \"layer\",\n", " \"tune_filters_exceptions\": \"^dense\",\n", " \"distribution_strategy\": cur_strategy,\n", " # first layer is input, layer two layers are softmax and flatten\n", " \"layer_indexes\": range(1, len(model.layers) - 1),\n", " \"max_trials\": 20\n", "}\n", "\n", "print(\"quantizing layers:\", [model.layers[i].name for i in run_config[\"layer_indexes\"]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 471192, "status": "ok", "timestamp": 1591840877167, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "MxlZFpa3fBv2", "outputId": "4d339846-1832-4a79-89b3-c9c4944dd47a" }, "outputs": [], "source": [ "autoqk = AutoQKeras(model, metrics=[\"acc\"], custom_objects=custom_objects, **run_config)\n", "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "LW_qN8-lOwL0" }, "source": [ "Now, let's see which model is the best model we got.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 3961, "status": "ok", "timestamp": 1591840881173, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "1L7KivAoffaL", "outputId": "f44b07a3-027d-4d69-9864-b3670815c407" }, "outputs": [], "source": [ "qmodel = autoqk.get_best_model()\n", "qmodel.save_weights(\"qmodel.h5\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "RB2xBRhJiwoh" }, "source": [ "We got here >90% reduction in energy when compared to 8-bit tensors and 32-bit accumulators. Remember that our original number was 3.3 uJ for fp32. The end model has 11 nJ for the quantized model as opposed to 204 nJ for the 8-bit original quantized model. As these energy numbers are from high-level energy models, you should remember to consider the relations between them, and not the actual numbers." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Wy0zcqvQoBnb" }, "source": [ "Let's train this model to see how much accuracy we can get of it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 71353, "status": "ok", "timestamp": 1591840952535, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "_ipZSEfgoGdb", "outputId": "b184269d-1161-417a-e1ae-e852dc451561" }, "outputs": [], "source": [ "qmodel.load_weights(\"qmodel.h5\")\n", "with cur_strategy.scope():\n", " optimizer = Adam(lr=0.02)\n", " qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n", " qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Fr95jcPROz7p" }, "source": [ "One of problems of trying to quantize the whole thing in one shot is that we may end up with too many choices to make, which will make the entire search space very high. In order to reduce the search space, `AutoQKeras` has two methods to enable users to cope with the explosion of choices." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9zc7ZrnbPIJA" }, "source": [ "## Grouping Layers to Use the Same Choice\n", "\n", "In this case, we can provide regular expressions to `limit` to specify layer names that should be grouped together. In our example, suppose we want to group convolution layers (except the first one) and all activations except the last one to use the same quantization.\n", "\n", "For the first convolution layer, we want to limit the quantization types to fewer choices as the input is already an 8-bit number. The last activation will be fed to a feature classifier layer, so we may leave it with more bits. Because our `dense` is actually a `Conv2D` operation, we will enable 8-bits for the weights by layer name. \n", "\n", "We first need to look at the names of the layers for this. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 428 }, "colab_type": "code", "executionInfo": { "elapsed": 301, "status": "ok", "timestamp": 1591840952867, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "w-d8nhG0pJF0", "outputId": "6529b630-f382-4e2a-94ef-ba3d9e3f875c" }, "outputs": [], "source": [ "pprint.pprint([layer.name for layer in model.layers])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "32Enp890pU_4" }, "source": [ "Convolution layers for `mnist` have names specified as `conv2d_[01234]`. Activation layers have names specified as `act_[01234]`. So, we can create the following regular expressions to reduce the search space in our model.\n", "\n", "Please note that layer class names always select different quantizers, so the user needs to specify a pattern for layer names if he/she wants to use the same quantization for the group of layers.\n", "\n", "You can see here another feature of the limit. You can specify the maximum number of bits, or cherry pick which quantizers you want to try for a specific layer if instead of the maximum number of bits you specify a list of quantizers fron `quantization_config`." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Y5XItp95PHW6" }, "outputs": [], "source": [ "limit = {\n", " \"Dense\": [8, 8, 4],\n", " \"Conv2D\": [4, 8, 4],\n", " \"DepthwiseConv2D\": [4, 8, 4],\n", " \"Activation\": [4],\n", " \"BatchNormalization\": [],\n", "\n", " \"^conv2d_0$\": [\n", " [\"binary\", \"ternary\", \"quantized_bits(2,1,1,alpha=1.0)\"],\n", " 8, 4\n", " ],\n", " \"^conv2d_[1234]$\": [4, 8, 4],\n", " \"^act_[0123]$\": [4],\n", " \"^act_4$\": [8],\n", " \"^dense$\": [8, 8, 4]\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "EJs1L-jIie7w" }, "outputs": [], "source": [ "run_config = {\n", " \"output_dir\": tempfile.mkdtemp(),\n", " \"goal\": goal,\n", " \"quantization_config\": quantization_config,\n", " \"learning_rate_optimizer\": False,\n", " \"transfer_weights\": False,\n", " \"mode\": \"random\",\n", " \"seed\": 42,\n", " \"limit\": limit,\n", " \"tune_filters\": \"layer\",\n", " \"tune_filters_exceptions\": \"^dense\",\n", " \"distribution_strategy\": cur_strategy,\n", " \"layer_indexes\": range(1, len(model.layers) - 1),\n", " \"max_trials\": 40\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 993665, "status": "ok", "timestamp": 1591841947161, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "c7eSwXyijhzc", "outputId": "6c76a21f-cbb3-4bc5-b899-b02c28821b78" }, "outputs": [], "source": [ "autoqk = AutoQKeras(model, metrics=[\"acc\"], custom_objects=custom_objects, **run_config)\n", "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "7sYp8Z2pnLi1" }, "source": [ "Let's see the reduction now." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 7109, "status": "ok", "timestamp": 1591841954308, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "yj826gNhjsfK", "outputId": "2e7f17d7-794e-44f6-d23a-452759727a53" }, "outputs": [], "source": [ "qmodel = autoqk.get_best_model()\n", "qmodel.save_weights(\"qmodel.h5\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "eXMcqxLAnY8t" }, "source": [ "Let's train this model for more time to see how much we can get in accuracy." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 68145, "status": "ok", "timestamp": 1591842022471, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "qpT8QgkJnQPa", "outputId": "61e711db-6187-4047-dae8-9ce2d093f56c" }, "outputs": [], "source": [ "qmodel.load_weights(\"qmodel.h5\")\n", "with cur_strategy.scope():\n", " optimizer = Adam(lr=0.02)\n", " qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n", " qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "gAV6Kw0QoODq" }, "source": [ "## Quantization by Blocks\n", "\n", "In the previous section, we enforced that all decisions were the same in order to reduce the number of options to quantize a model. \n", "\n", "Another approach is still to allow models to have each block of layers to makde their own choice, but quantizing the blocks sequentially, either from inputs to outputs, or by quantizing higher energy blocks first.\n", "\n", "The rationale for this method is that if we quantize the blocks one by one, and assuming that each block has $N$ choices, and $B$ blocks, we end up trying $N B$ options, instead of $N^B$ choices. The reader should note that this is an approximation as there is no guarantee that we will obtain the best quantization possible.\n", "\n", "Should you do sequential from inputs to outputs or starting from the block that has the highest impact?\n", "\n", "If you have a network like ResNet, and if you want to do filter tuning, you need to block the layers by the resnet definition of a block, i.e. including full identity or convolutional blocks, and quantize the model from inputs to outputs, so that you can preserve at each stage the number of channels for the residual block. \n", "\n", "In order to perform quantization by blocks, you need to specify two other parameters in our `run_config`. `blocks` is a list of regular expressions of the groups you want to quantize. If a layer does not match the block pattern, it will not be quantized. `schedule_block` specifies the mode for block quantization scheduling. It can be `sequential` or `cost` if you want to schedule first the blocks by decreasing cost size (energy or bits).\n", "\n", "In this model, there are a few optimizations that we perform automatically. First, we dynamically reduce the learning rate of the blocks that we have already quantized as setting them to not-trainable does not seem to work, so we still allow them to train, but at a slower pace. In addition, we try to dynamically adjust the learning rate for the layer we are trying to quantize as opposed to the learning rate of the unquantized layers. Finally, we transfer the weights of the models we have already quantized whenever we can do (if the shapes remain the same). \n", "\n", "Regardless on how we schedule the operations, we amortize the nubmer of trials for the cost of the block (energy or bits with respect to the total energy or number of bits of the network).\n", "\n", "Instead of invoking `AutoQKeras` now, we will invoke `AutoQKeras` scheduler." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "NUz4A6SKnhUf" }, "outputs": [], "source": [ "run_config = {\n", " \"output_dir\": tempfile.mkdtemp(),\n", " \"goal\": goal,\n", " \"quantization_config\": quantization_config,\n", " \"learning_rate_optimizer\": False,\n", " \"transfer_weights\": False,\n", " \"mode\": \"random\",\n", " \"seed\": 42,\n", " \"limit\": limit,\n", " \"tune_filters\": \"layer\",\n", " \"tune_filters_exceptions\": \"^dense\",\n", " \"distribution_strategy\": cur_strategy,\n", " \"layer_indexes\": range(1, len(model.layers) - 1),\n", " \"max_trials\": 40,\n", "\n", " \"blocks\": [\n", " \"^.*_0$\",\n", " \"^.*_1$\",\n", " \"^.*_2$\",\n", " \"^.*_3$\",\n", " \"^.*_4$\",\n", " \"^dense\"\n", " ],\n", " \"schedule_block\": \"cost\"\n", "}" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "JWJiZZ9vsORJ" }, "source": [ "Because specifying regular expressions is error prone, we recommend that you first try to run `AutoQKerasScheduler` in debug mode to print the blocks." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 737 }, "colab_type": "code", "executionInfo": { "elapsed": 395, "status": "ok", "timestamp": 1591842023212, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "uSOxKQGwsqf2", "outputId": "18647e4f-ef7a-4c6a-aeb8-0c9c2039fdbb" }, "outputs": [], "source": [ "pprint.pprint([layer.name for layer in model.layers])\n", "autoqk = AutoQKerasScheduler(model, metrics=[\"acc\"], custom_objects=custom_objects, debug=True, **run_config)\n", "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "TQPUKPZhC_SI" }, "source": [ "All blocks seem to be fine. Let's find the best quantization now." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 1938883, "status": "ok", "timestamp": 1591843962106, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "sXt-cRKvDEaL", "outputId": "36db3217-86ff-4425-ee12-f637a4fc1841" }, "outputs": [], "source": [ "autoqk = AutoQKerasScheduler(model, metrics=[\"acc\"], custom_objects=custom_objects, **run_config)\n", "autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 291 }, "colab_type": "code", "executionInfo": { "elapsed": 396, "status": "ok", "timestamp": 1591843962540, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "ArdGbsXFDK-I", "outputId": "43730cd5-93fc-4838-c49a-1f3f4151fa54" }, "outputs": [], "source": [ "qmodel = autoqk.get_best_model()\n", "qmodel.save_weights(\"qmodel.h5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "height": 1000 }, "colab_type": "code", "executionInfo": { "elapsed": 69779, "status": "ok", "timestamp": 1591844032332, "user": { "displayName": "Claudionor Coelho", "photoUrl": "", "userId": "01084525977535968041" }, "user_tz": 420 }, "id": "RHGb6YHFEgtV", "outputId": "5578ce49-1ee9-4063-deab-1b3db9f4b66b" }, "outputs": [], "source": [ "qmodel.load_weights(\"qmodel.h5\")\n", "with cur_strategy.scope():\n", " optimizer = Adam(lr=0.02)\n", " qmodel.compile(optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n", " qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab": {}, "colab_type": "code", "id": "fJCkMdAcjnoh" }, "source": [ "Perfect! You have learned how to perform automatic quantization using AutoQKeras with QKeras." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [], "last_runtime": { "build_target": "//learning/deepmind/dm_python:dm_notebook3_tpu", "kind": "private" }, "name": "AutoQKeras.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 1 } ================================================ FILE: notebook/CodebookQuantization.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##### Copyright 2020 Google LLC\n", "#\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Codebook based quantization\n", "\n", "Codebook based quantizaion is a non-uniform quantization technique that maps each weight or activation value to the index of a value in the codebook. This allows us to compress weights/activations even further with neglibible loss in performance. We will demonstrate this by training an object classification model and applying codebook quantization to the activation with the most values." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.regularizers import *\n", "from tensorflow.keras.layers import *\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.optimizers import *\n", "from tensorflow.keras.datasets import *\n", "from tensorflow.keras.utils import to_categorical\n", "\n", "from qkeras import *\n", "from qkeras.codebook import *\n", "\n", "\n", "def get_data(name, sample_size=1.0):\n", " (x_train, y_train), (x_test, y_test) = globals()[name].load_data()\n", "\n", " if len(x_train.shape) == 3:\n", " x_train = x_train.reshape(x_train.shape + (1,))\n", " x_test = x_test.reshape(x_test.shape + (1,))\n", "\n", " x_train = x_train.astype(\"float32\")\n", " x_test = x_test.astype(\"float32\")\n", "\n", " mean = np.mean(x_train,axis=(0,1,2,3))\n", " std = np.std(x_train,axis=(0,1,2,3))\n", " x_train = (x_train-mean)/(std+1e-7)\n", " x_test = (x_test-mean)/(std+1e-7)\n", "\n", " y_train_c = to_categorical(y_train, np.max(y_train) + 1)\n", " y_test_c = to_categorical(y_test, np.max(y_test) + 1)\n", "\n", " if sample_size != 1.0:\n", " indexes = np.asarray(range(x_train.shape[0]))\n", " np.random.shuffle(indexes)\n", " indexes = indexes[:int(x_train.shape[0] * sample_size)]\n", "\n", " x_train = x_train[indexes]\n", " y_train_c = y_train_c[indexes]\n", "\n", " return (x_train, y_train_c), (x_test, y_test_c)\n", "\n", "\n", "def get_model(\n", " name, X_train, y_train, X_test, y_test,\n", " blocks=[[32], [64], [128]],\n", " quantizer_list=[\n", " \"quantized_relu_po2(4,4)\",\n", " \"quantized_relu_po2(4,4)\"\n", " ],\n", " use_stochastic_rounding=0,\n", " l1v=None,\n", " epochs=10,\n", " load_weights=True):\n", "\n", " if l1v is None:\n", " l1v = [0.0] * len(blocks)\n", "\n", " X_shape = X_train.shape[1:]\n", " x_i = x = Input(X_shape)\n", "\n", " for b, block in enumerate(blocks):\n", " # we are assuming we want to quantize the block that has sparsity\n", " # so let's add dropout to the next layer\n", "\n", " if b >= 1 and l1v[b-1] != 0.0:\n", " x = Dropout(0.3, name=f\"drop{b}\")(x)\n", "\n", " for i in range(len(block)):\n", " x = QConv2D(\n", " block[i], kernel_size=(3,3), strides=(2,2), padding=\"same\",\n", " kernel_quantizer=f\"quantized_bits(4, use_stochastic_rounding={use_stochastic_rounding})\",\n", " bias_quantizer=f\"quantized_po2(4, use_stochastic_rounding={use_stochastic_rounding})\",\n", " kernel_regularizer=l1(l1v[b]) if l1v[b] != 0.0 else None,\n", " name=f\"d{b}_{i}\")(x)\n", " if i != len(block) - 1:\n", " if quantizer_list[b] in [\"linear\", \"relu\", \"softmax\", \"sigmoid\"]:\n", " x = Activation(quantizer_list[b], name=f\"a{b}_{i}\")(x)\n", " else:\n", " x = QActivation(quantizer_list[b], name=f\"a{b}_{i}\")(x)\n", " else:\n", " x = QBatchNormalization(name=f\"bn{b}_{i}\")(x)\n", " if b < len(blocks) - 1:\n", " if quantizer_list[b] in [\"linear\", \"relu\", \"softmax\", \"sigmoid\"]:\n", " x = Activation(quantizer_list[b], name=f\"a{b}_{len(block)-1}\")(x)\n", " else:\n", " x = QActivation(quantizer_list[b], name=f\"a{b}_{len(block)-1}\")(x)\n", " else:\n", " if len(block) > 0:\n", " x = QActivation(f\"quantized_relu(6,2, use_stochastic_rounding={use_stochastic_rounding})\", \n", " name=f\"a{b}_{len(block)-1}\")(x)\n", " x = Flatten(name=\"flatten\")(x)\n", " x = QDense(\n", " y_train.shape[1], name=f\"d{len(blocks)-1}_{len(block)}\")(x)\n", " x = Activation(\"softmax\", name=f\"a{len(blocks)-1}_{len(block)}\")(x)\n", "\n", " model = Model(inputs=x_i, outputs=x)\n", " model.summary()\n", "\n", " model.compile(loss=\"categorical_crossentropy\", optimizer=Adam(0.001), metrics=[\"acc\"])\n", "\n", " try:\n", " if load_weights and os.path.isfile(name + \".h5\"):\n", " print('Found file...')\n", " model.load_weights(name + \".h5\")\n", " else:\n", " model.fit(X_train, y_train, validation_data=(X_test, y_test),\n", " batch_size=128, epochs=epochs, verbose=2)\n", " model.save_weights(name + \".h5\")\n", " except:\n", " model.fit(X_train, y_train, validation_data=(X_test, y_test),\n", " batch_size=128, epochs=epochs, verbose=2)\n", " model.save_weights(name + \".h5\")\n", "\n", " return model\n", "\n", "\n", "name = \"cifar10\"\n", "(X_train, y_train), (X_test, y_test) = get_data(name, sample_size=1)\n", "model = get_model(\n", " name, X_train, y_train, X_test, y_test,\n", " blocks=[[32, 32], [64, 64], [128]],\n", " quantizer_list=[\"quantized_relu(6,2)\", \"quantized_relu(6,2)\"],\n", " epochs=50,\n", " load_weights=True\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from qkeras.codebook import *\n", "\n", "cb_tables, models, km_models = activation_compression(\n", " model, \n", " {'loss' : \"categorical_crossentropy\", 'metrics' : [\"acc\"]},\n", " [2], 3, \n", " X_train, y_train, \n", " X_test, y_test,\n", " sample_size=0.3\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "q = models[0].layers[-1].quantizer\n", "in_table, out_table = cb_tables[0]\n", "print(q)\n", "print('in_table:', in_table)\n", "print('out_table:', out_table)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "for i,x in enumerate(q.range()):\n", " print(f'{x:8}, {in_table[out_table[i]]:6}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Weight compression using codebook quantization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conv_weights = model.layers[1].weights[0].numpy()\n", "print(conv_weights.shape)\n", "quantizer = model.layers[1].kernel_quantizer_internal\n", "print(quantizer)\n", "axis = 3\n", "bits = 3\n", "index_table, codebook_table = weight_compression(\n", " conv_weights, \n", " bits, \n", " axis, \n", " quantizer)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(codebook_table.shape)\n", "codebook_table[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(index_table.shape)\n", "index_table[:,:,:,0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_conv_weights = np.zeros(conv_weights.shape)\n", "for i in range(conv_weights.shape[axis]):\n", " new_conv_weights[:,:,:,i] = codebook_table[i][index_table[:,:,:,i]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_conv_weights[:,:,:,0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conv_weights[:,:,:,0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bias = model.layers[1].weights[1].numpy()\n", "model.layers[1].set_weights([new_conv_weights, bias])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.evaluate(X_test, y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Py3", "language": "python", "name": "py3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: notebook/QKerasTutorial.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##### Copyright 2020 Google LLC\n", "#\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# QKeras Lab Book" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "__QKeras__ is a quantization extension to Keras that provides drop-in replacement for some of the Keras layers, especially the ones that creates parameters and activation layers, and perform arithmetic operations, so that we can quickly create a deep quantized version of Keras network.\n", "\n", "According to Tensorflow documentation, Keras is a high-level API to build and train deep learning models. It's used for fast prototyping, advanced research, and production, with three key advantages:\n", "\n", "- User friendly
\n", "Keras has a simple, consistent interface optimized for common use cases. It provides clear and actionable feedback for user errors.\n", "\n", "- Modular and composable
\n", "Keras models are made by connecting configurable building blocks together, with few restrictions.\n", "\n", "- Easy to extend
\n", "Write custom building blocks to express new ideas for research. Create new layers, loss functions, and develop state-of-the-art models.\n", "\n", "__QKeras__ is being designed to extend the functionality of Keras using Keras' design principle, i.e. being user friendly, modular and extensible, adding to it being \"minimally intrusive\" of Keras native functionality.\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Related Work\n", "\n", "__QKeras__ has been implemented based on the work of _\"B.Moons et al. - Minimum Energy Quantized Neural Networks\"_ , Asilomar Conference on Signals, Systems and Computers, 2017 and _“Zhou, S. et al. DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients,”_ but the framework should be easily extensible. The original code from QNN can be found below.\n", "\n", "https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow\n", "\n", "__QKeras__ extends QNN by providing a richer set of layers (including SeparableConv2D, DepthwiseConv2D, ternary and stochastic ternary quantizations), besides some functions to aid the estimation for the accumulators and conversion between non-quantized to quantized networks. Finally, our main goal is easy of use, so we attempt to make QKeras layers a true drop-in replacement for Keras, so that users can easily exchange non-quantized layers by quantized ones." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Layers Implemented in QKeras\n", "\n", "The following layers have been implemented in __QKeras__.\n", "\n", "- __`QDense`__\n", "\n", "- __`QConv1D`__\n", "\n", "- __`QConv2D`__\n", "\n", "- __`QDepthwiseConv2D`__\n", "\n", "- __`QSeparableConv2D`__ (depthwise + pointwise expanded, extended from MobileNet SeparableConv2D implementation)\n", "\n", "- __`QActivation`__\n", "\n", "- __`QAveragePooling2D`__ (in fact, a AveragePooling2D stacked with a QActivation layer for quantization of the result, so this layer does not exist)\n", "\n", "- __`QBatchNormalization`__\n", "\n", "- __`QOctaveConv2D`__\n", "\n", "It is worth noting that not all functionality is safe at this time to be used with other high-level operations, such as with layer wrappers. For example, `Bidirectional` layer wrappers are used with RNNs. If this is required, we encourage users to use quantization functions invoked as strings instead of the actual functions as a way through this, but we may change that implementation in the future.\n", "\n", "__`QSeparableConv2D`__ is implemented as a depthwise + pointwise quantized expansions, which is extended from the `SeparableConv2D` implementation of MobileNet. With the exception of __`QBatchNormalization`__, if quantizers are not specified, no quantization is applied to the layer and it ends up behaving like the orgininal unquantized layers. On the other hand, __`QBatchNormalization`__ has been implemented differently as if the user does not specify any quantizers as parameters, it uses a set up that has worked best when attempting to implement quantization efficiently in hardware and software, i.e. `gamma` and `variance` with po2 quantizers (as they become shift registers in an implementation, and with further constraining variance po2 quantizer to use quadratic approximation as we take the square root of the variance to obtain the standard deviation), `beta` using po2 quantizer to maintain the dynamic range aspect of the center parameter, and `mean` remaining unquantized, as it inherits the properties of the previous layer.\n", "\n", "Activation has been migrated to __`QActivation`__ although it __QKeras__ also recognizes activation parameter used in convolutional and dense layers.\n", "\n", "We have improved the setup of quantization as convolution, dense and batch normalization layers now notify the quantizers when the quantizers are used as internal parameters, so the user does not need to worry about setting up options that work best in `weights` and `bias` like `alpha` and `use_stochastic_rounding` (although users may override the automatic setup).\n", "\n", "Finally, in the current version, we have eliminated the need to set up the range of the quantizers like `kernel_range` in __`QDense`__. This is automatically computed internally at this point. Although we kept the parameters for backward compatibility, these parameters will be removed in the future." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Activation Layers and Quantizers Implemented in __QKeras__\n", "\n", "Quantizers and activation layers are treated interchangingly in __QKeras__. \n", "\n", "The list of quantizers and its parameters is listed below.\n", "\n", "- __`smooth_sigmoid(x)`__\n", "\n", "- __`hard_sigmoid(x)`__\n", "\n", "- __`binary_sigmoid(x)`__\n", "\n", "- __`smooth_tanh(x)`__\n", "\n", "- __`hard_tanh(x)`__\n", "\n", "- __`binary_tanh(x)`__\n", "\n", "- __`quantized_bits(bits=8, integer=0, symmetric=0, keep_negative=1, alpha=None, use_stochastic_rouding=False)(x)`__\n", "\n", "- __`bernoulli(alpha=None, temperature=6.0, use_real_sigmoid=True)(x)`__\n", "\n", "- __`stochastic_ternary(alpha=None, threshold=None, temperature=8.0, use_real_sigmoid=True)(x)`__\n", "\n", "- __`ternary(alpha=None, threshold=None, use_stochastic_rounding=False)(x)`__\n", "\n", "- __`stochastic_binary(alpha=None, temperature=6.0, use_real_sigmoid=True)(x)`__\n", "\n", "- __`binary(use_01=False, alpha=None, use_stochastic_rounding=False)(x)`__\n", "\n", "- __`quantized_relu(bits=8, integer=0, use_sigmoid=0, use_stochastic_rounding=False)(x)`__\n", "\n", "- __`quantized_ulaw(bits=8, integer=0, symmetric=0, u=255.0)(x)`__\n", "\n", "- __`quantized_tanh(bits=8, integer=0, symmetric=0, use_stochastic_rounding=False)(x)`__\n", "\n", "- __`quantized_po2(bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False)(x)`__\n", "\n", "- __`quantized_relu_po2(bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False)(x)`__\n", "\n", "The __`stochastic_*`__ functions and __`bernoulli`__ rely on stochastic versions of the activation functions, so they are best suited for weights and biases. They draw a random number with uniform distribution from `sigmoid` of the input x, and result is based on the expected value of the activation function. Please refer to the papers if you want to understand the underlying theory, or the documentation in qkeras/quantizers.py. The parameter `temperature` determines how steep the sigmoid function will behave, and the default values seem to work fine.\n", "\n", "As we lower the number of bits, rounding becomes problematic as it adds bias to the number system. Numpy attempt to reduce the effects of bias by rounding to even instead of rounding to infinity. Recent results (_\"Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, Pritish Narayanan; Deep Learning with Limited Numerical Precision_ [https://arxiv.org/abs/1502.02551]) suggested using stochastic rounding, which uses the fracional part of the number as a probability to round up or down. We can turn on stochastic rounding in some quantizers by setting `use_stochastic_rounding` to `True` in __`quantized_bits`__, __`binary`__, __`ternary`__, __`quantized_relu`__ and __`quantized_tanh`__, __`quantized_po2`__, and __`quantized_relu_po2`__. Please note that if one is considering an efficient hardware or software implementation, we should avoid setting this flag to `True` in activations as it may affect the efficiency of an implementation. In addition, as mentioned before, we already set this flag to `True` in some quantized layers when the quantizers are used as weights/biases.\n", "\n", "The parameters `bits` specify the number of bits for the quantization, and `integer` specifies how many bits of `bits` are to the left of the decimal point. Finally, our experience in training networks with __`QSeparableConv2D`__, it is advisable to allocate more bits between the depthwise and the pointwise quantization, and both __`quantized_bits`__ and __`quantized_tanh`__ should use symmetric versions for weights and bias in order to properly converge and eliminate the bias.\n", "\n", "We have substantially improved stochastic rounding implementation in __QKeras__ $>= 0.7$, and added a symbolic way to compute alpha in __`binary`__, __`stochastic_binary`__, __`ternary`__, __`stochastic_ternary`__, __`bernoulli`__ and __`quantized_bits`__. Right now, a scale and the threshold (for ternary and stochastic_ternary) can be computed independently of the distribution of the inputs, which is required when using these quantizers in weights.\n", "\n", "The main problem in using very small bit widths in large deep learning networks stem from the fact that weights are initialized with variance roughly $\\propto \\sqrt{1/\\tt{fanin}}$, but during the training the variance shifts outwards. If the smallest quantization representation (threshold in ternary networks) is smaller than $\\sqrt{1/\\tt{fanin}}$, we run the risk of having the weights stuck at 0 during training. So, the weights need to dynamically adjust to the variance shift from initialization to the final training. This can be done by scaling the quantization. \n", "\n", "Scale is computed using the formula $\\sum(\\tt{dot}(Q,x))/\\sum(\\tt{dot}(Q,Q))$ which is described in several papers, including _Mohammad Rastegari, Vicente Ordonez, Joseph Redmon, Ali Farhadi \"XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks\"_ [https://arxiv.org/abs/1603.05279]. Scale computation is computed for each output channel, making our implementation sometimes behaving like a mini-batch normalization adjustment. \n", "\n", "For __`ternary`__ and __`stochastic_ternary`__, we iterate between scale computation and threshold computation, as presented in _K. Hwang and W. Sung, \"Fixed-point feedforward deep neural network design using weights +1, 0, and −1,\" 2014 IEEE Workshop on Signal Processing Systems (SiPS), Belfast, 2014, pp. 1-6_ which makes the search for threshold and scale tolerant to different input distributions. This is especially important when we need to consider that the threshold shifts depending on the input distribution, affecting the scale as well, as pointed out by _Fengfu Li, Bo Zhang, Bin Liu, \"Ternary Weight Networks\"_ [https://arxiv.org/abs/1605.04711]. \n", "\n", "When computing the scale in these quantizers, if `alpha=\"auto\"`, we compute the scale as a floating point number. If `alpha=\"auto_po2\"`, we enforce the scale to be a power of 2, meaning that an actual hardware or software implementation can be performed by just shifting the result of the convolution or dense layer to the right or left by checking the sign of the scale (positive shifts left, negative shifts right), and taking the log2 of the scale. This behavior is compatible with shared exponent approaches, as it performs a shift adjustment to the channel.\n", "\n", "We have implemented a method for each quantizer called __`_set_trainable_parameter`__ that instructs __QKeras__ to set best options when this quantizer is used as a weight or for gamma, variance and beta in __`QBatchNormalization`__, so in principle, users should not worry about this.\n", "\n", "The following pictures show the behavior of __`binary`__ vs stochastic rounding in __`binary`__ vs __`stochastic_binary`__ (Figure 1) and __`ternary`__ vs stochastic rounding in __`ternary`__ and __`stochastic_ternary`__ (Figure 2). We generated a normally distributed input with mean 0.0 and standard deviation of 0.02, ordered the data, and ran the quantizer 1,000 times, averaging the result for each case. Note that because of scale, the output does not range from $[-1.0, +1.0]$, but from $[-\\tt{scale}, +\\tt{scale}]$.\n", "\n", "\n", "\"Binary
Figure 1: Behavior of binary quantizers
\n", "\n", "\"Ternary
Figure 2: Behavior of ternary quantizers
\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using QKeras\n", "\n", "__QKeras__ works by tagging all variables and weights/bias created by Keras as well as output of arithmetic layers by quantized functions. Quantized functions can be instantiated directly in __`QDense`__/__`QConv2D`__/__`QSeparableConv2D`__ functions, and they can be passed to __`QActivation`__, which act as a merged quantization and activation function.\n", "\n", "In order to successfully quantize a model, users need to replace layers that create variables (trainable or not) (`Dense`, `Conv2D`, etc) by their equivalent ones in __Qkeras__ (__`QDense`__, __`QConv2D`__, etc), and any layers that perform math operations need to be quantized afterwards.\n", "\n", "Quantized values are clipped between their maximum and minimum quantized representation (which may be different than $[-1.0, 1.0]$), although for `po2` type of quantizers, we still recommend the users to specify the parameter for `max_value`.\n", "\n", "An example of a very simple network is given below in Keras." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import six\n", "import numpy as np\n", "import tensorflow.compat.v2 as tf\n", "\n", "from tensorflow.keras.layers import *\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.datasets import mnist\n", "from tensorflow.keras.utils import to_categorical" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def CreateModel(shape, nb_classes):\n", " x = x_in = Input(shape)\n", " x = Conv2D(18, (3, 3), name=\"conv2d_1\")(x)\n", " x = Activation(\"relu\", name=\"act_1\")(x)\n", " x = Conv2D(32, (3, 3), name=\"conv2d_2\")(x)\n", " x = Activation(\"relu\", name=\"act_2\")(x)\n", " x = Flatten(name=\"flatten\")(x)\n", " x = Dense(nb_classes, name=\"dense\")(x)\n", " x = Activation(\"softmax\", name=\"softmax\")(x)\n", " \n", " model = Model(inputs=x_in, outputs=x)\n", "\n", " return model" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def get_data():\n", " (x_train, y_train), (x_test, y_test) = mnist.load_data()\n", " x_train = x_train.reshape(x_train.shape + (1,)).astype(\"float32\")\n", " x_test = x_test.reshape(x_test.shape + (1,)).astype(\"float32\")\n", "\n", " x_train /= 256.0\n", " x_test /= 256.0\n", "\n", " x_mean = np.mean(x_train, axis=0)\n", "\n", " x_train -= x_mean\n", " x_test -= x_mean\n", "\n", " nb_classes = np.max(y_train)+1\n", " y_train = to_categorical(y_train, nb_classes)\n", " y_test = to_categorical(y_test, nb_classes)\n", "\n", " return (x_train, y_train), (x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "(x_train, y_train), (x_test, y_test) = get_data()\n", "\n", "model = CreateModel(x_train.shape[1:], y_train.shape[-1])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "model.fit(x_train, y_train, epochs=3, batch_size=128, validation_data=(x_test, y_test), verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Great! it is relatively easy to create a network that converges in MNIST with very high test accuracy. The reader should note that we named all the layers as it will make it easier to automatically convert the network by name." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The corresponding quantized network is presented below." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from qkeras import *\n", "\n", "def CreateQModel(shape, nb_classes):\n", " x = x_in = Input(shape)\n", " x = QConv2D(18, (3, 3),\n", " kernel_quantizer=\"stochastic_ternary\", \n", " bias_quantizer=\"quantized_po2(4)\",\n", " name=\"conv2d_1\")(x)\n", " x = QActivation(\"quantized_relu(2)\", name=\"act_1\")(x)\n", " x = QConv2D(32, (3, 3), \n", " kernel_quantizer=\"stochastic_ternary\", \n", " bias_quantizer=\"quantized_po2(4)\",\n", " name=\"conv2d_2\")(x)\n", " x = QActivation(\"quantized_relu(2)\", name=\"act_2\")(x)\n", " x = Flatten(name=\"flatten\")(x)\n", " x = QDense(nb_classes,\n", " kernel_quantizer=\"quantized_bits(3,0,1)\",\n", " bias_quantizer=\"quantized_bits(3)\",\n", " name=\"dense\")(x)\n", " x = Activation(\"softmax\", name=\"softmax\")(x)\n", " \n", " model = Model(inputs=x_in, outputs=x)\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "qmodel = CreateQModel(x_train.shape[1:], y_train.shape[-1])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.optimizers import Adam\n", "\n", "qmodel.compile(\n", " loss=\"categorical_crossentropy\",\n", " optimizer=Adam(0.0005),\n", " metrics=[\"accuracy\"])" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "qmodel.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test), verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You should note that we had to lower the learning rate and train the network for longer time. On the other hand, the network should not involve in any multiplications in the convolution layers, and very small multipliers in the dense layers.\n", "\n", "Please note that the last `Activation` was not changed to __`QActivation`__ as during inference we usually perform the operation `argmax` on the result instead of `softmax`.\n", "\n", "It seems it is a lot of code to write besides the main network, but in fact, this additional code is only specifying the sizes of the weights and the sizes of the outputs in the case of the activations. Right now, we do not have a way to extract this information from the network structure or problem we are trying to solve, and if we quantize too much a layer, we may end up not been able to recover from that later on." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Converting a Model Automatically\n", "\n", "In addition to the drop-in replacement of Keras functions, we have written the following function to assist anyone who wants to quantize a network.\n", "\n", "__`model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False)`__\n", "\n", "This function converts an non-quantized model (such as the one from `model` in the previous example) into a quantized version, by applying a configuration specified by the dictionary `quantizer_config`, and `activation_bits` specified for unamed activation functions, with this parameter probably being removed in future versions.\n", "\n", "The parameter `custom_objects` specifies object dictionary unknown to Keras, required when you copy a model with lambda layers, or customized layer functions, for example, and if `transfer_weights` is `True`, the returned model will have as initial weights the weights from the original model, instead of using random initial weights.\n", "\n", "The dictionary specified in `quantizer_config` can be indexed by a layer name or layer class name. In the example below, conv2d_1 corresponds to the first convolutional layer of the example, while QConv2D corresponds to the default behavior of two dimensional convolutional layers. The reader should note that right now we recommend using __`QActivation`__ with a dictionary to avoid the conversion of activations such as `softmax` and `linear`. In addition, although we could use `activation` field in the layers, we do not recommend that. \n", "\n", "`{\n", " \"conv2d_1\": {\n", " \"kernel_quantizer\": \"stochastic_ternary\",\n", " \"bias_quantizer\": \"quantized_po2(4)\"\n", " },\n", " \"QConv2D\": {\n", " \"kernel_quantizer\": \"stochastic_ternary\",\n", " \"bias_quantizer\": \"quantized_po2(4)\"\n", " },\n", " \"QDense\": {\n", " \"kernel_quantizer\": \"quantized_bits(3,0,1)\",\n", " \"bias_quantizer\": \"quantized_bits(3)\"\n", " },\n", " \"act_1\": \"quantized_relu(2)\",\n", " \"QActivation\": { \"relu\": \"quantized_relu(2)\" }\n", "}`\n", "\n", "In the following example, we will quantize the model using a different strategy.\n" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "config = {\n", " \"conv2d_1\": {\n", " \"kernel_quantizer\": \"stochastic_binary\",\n", " \"bias_quantizer\": \"quantized_po2(4)\"\n", " },\n", " \"QConv2D\": {\n", " \"kernel_quantizer\": \"stochastic_ternary\",\n", " \"bias_quantizer\": \"quantized_po2(4)\"\n", " },\n", " \"QDense\": {\n", " \"kernel_quantizer\": \"quantized_bits(4,0,1)\",\n", " \"bias_quantizer\": \"quantized_bits(4)\"\n", " },\n", " \"QActivation\": { \"relu\": \"binary\" },\n", " \"act_2\": \"quantized_relu(3)\",\n", "}" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "from qkeras.utils import model_quantize\n", "\n", "qmodel = model_quantize(model, config, 4, transfer_weights=True)\n", "\n", "for layer in qmodel.layers:\n", " if hasattr(layer, \"kernel_quantizer\"):\n", " print(layer.name, \"kernel:\", str(layer.kernel_quantizer_internal), \"bias:\", str(layer.bias_quantizer_internal))\n", " elif hasattr(layer, \"quantizer\"):\n", " print(layer.name, \"quantizer:\", str(layer.quantizer))\n", "\n", "print()\n", "qmodel.summary()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "qmodel.compile(\n", " loss=\"categorical_crossentropy\",\n", " optimizer=Adam(0.001),\n", " metrics=[\"accuracy\"])" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "qmodel.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test), verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "in addition to __`model_quantize`__, __QKeras__ offers the additional utility functions.\n", "\n", "__`BinaryToThermometer(x, classes, value_range, with_residue=False, merge_with_channels, use_two_hot_encoding=False)`__\n", "\n", "This function converts a dense binary encoding of inputs to one-hot (with scales).\n", "\n", "Given input matrix `x` with values (for example) 0, 1, 2, 3, 4, 5, 6, 7, create a number of classes as follows:\n", "\n", "If classes=2, value_range=8, with_residue=0, a true one-hot representation is created, and the remaining bits are truncated, using one bit representation.\n", "\n", "`\n", "0 - [1,0] 1 - [1,0] 2 - [1,0] 3 - [1,0]\n", "4 - [0,1] 5 - [0,1] 6 - [0,1] 7 - [0,1]\n", "`\n", "\n", "If classes=2, value_range=8, with_residue=1, the residue is added to the one-hot class, and the class will use 2 bits (for the remainder) + 1 bit (for the one hot)\n", "\n", "`\n", "0 - [1,0] 1 - [1.25,0] 2 - [1.5,0] 3 - [1.75,0]\n", "4 - [0,1] 5 - [0,1.25] 6 - [0,1.5] 7 - [0,1.75]\n", "`\n", "\n", "The arguments of this functions are as follows:\n", "\n", "`\n", "x: the input vector we want to convert. typically its dimension will be\n", " (B,H,W,C) for an image, or (B,T,C) or (B,C) for for a 1D signal, where\n", " B=batch, H=height, W=width, C=channels or features, T=time for time\n", " series.\n", "classes: the number of classes to (or log2(classes) bits) to use of the\n", " values.\n", "value_range: max(x) - min(x) over all possible x values (e.g. for 8 bits,\n", " we would use 256 here).\n", "with_residue: if true, we split the value range into two sets and add\n", " the decimal fraction of the set to the one-hot representation for partial\n", " thermometer representation.\n", "merge_with_channels: if True, we will not create a separate dimension\n", " for the resulting matrix, but we will merge this dimension with\n", " the last dimension.\n", "use_two_hot_encoding: if true, we will distribute the weight between\n", " the current value and the next one to make sure the numbers will always\n", " be < 1.\n", "`\n", "\n", "__`model_save_quantized_weights(model, filename)`__\n", "\n", "This function saves the quantized weights in the model or writes the quantized weights in the file `filename` for production, as the weights during training are maintained non-quantized because of training. Typically, you should call this function before productizing the final model. The saved model is compatible with Keras for inference, so for power-of-2 quantization, we will not return `(sign, round(log2(weights)))`, but rather `(-1)**sign*2**(round(log2(weights)))`. We also return a dictionary containing the name of the layer and the quantized weights, and for power-of-2 quantizations, we will return `sign` and `round(log2(weights))` so that other tools can properly process that.\n", "\n", "__`load_qmodel(filepath, custom_objects=None, compile=True)`__\n", "\n", "Load quantized model from Keras's model.save() h5 file, where filepath is the path to the filename, custom_objects is an optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization, and compile instructs __QKeras__ to compile the model after reading it. If an optimizer was found as part of the saved model, the model is already compiled. Otherwise, the model is uncompiled and a warning will be displayed. When compile is set to `False`, the compilation is omitted without any warning.\n", "\n", "__`print_model_sparsity(model)`__\n", "\n", "Prints sparsity for the pruned layers in the model.\n", "\n", "__`quantized_model_debug(model, X_test, plot=False)`__\n", "\n", "Debugs and plots model weights and activations. It is usually useful to print weights, biases and activations for inputs and outputs when debugging a model. model contains the mixed quantized/unquantized layers for a model. We only print/plot activations and weights/biases for quantized models with the exception of Activation. X_test is the set of inputs we will use to compute activations, and we recommend that the user uses a subsample from the entire set he/she wants to debug. if plot is True, we also plot weights and activations (inputs/outputs) for each layer.\n", "\n", "__`extract_model_operations(model)`__\n", "\n", "As each operation depends on the quantization method for the weights/bias and on the quantization of the inputs, we estimate which operations are required for each layer of the quantized model. For example, inputs of a __`QDense`__ layer are quantized using __`quantized_relu_po2`__ and weights are quantized using __`quantized_bits`__, the matrix multiplication can be implemented as a barrel shifter + accumulator without multiplication operations. Right now, we return for each layer one of the following operations: `mult`, `barrel`, `mux`, `adder`, `xor`, and the sizes of the operator.\n", "\n", "We are currently refactoring this function and it may be substantially changed in the future.\n", "\n", "__`print_qstats(model)`__\n", "\n", "Prints statistics of number of operations per operation type and layer so that user can see how big the model is. This function utilizes __`extract_model_operations`__.\n", "\n", "An example of the output is presented below.\n", "\n", "`Number of operations in model:\n", " conv2d_0_m : 25088 (smult_4_8)\n", " conv2d_1_m : 663552 (smult_4_4)\n", " conv2d_2_m : 147456 (smult_4_4)\n", " dense : 5760 (smult_4_4)\n", "\n", "Number of operation types in model:\n", " smult_4_4 : 816768\n", " smult_4_8 : 25088`\n", "\n", "In this example, smult_4_4 stands for 4x4 bit signed multiplication and smult_4_8 stands for 8x4 signed multiplication.\n", "\n", "We are currently refactoring this function and it may be substantially changed in the future.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the quantized network `qmodel`, let's print the statistics of the model and weights." ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "print_qstats(qmodel)" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "from qkeras.utils import quantized_model_debug\n", "\n", "quantized_model_debug(qmodel, x_test, plot=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Where the values in `conv2d_1 -4.6218 4.0295 ( -1.0000 1.0000) ( -0.5000 0.5000) a( 0.125000 0.500000)` corresponde to min and max values of the output of the convolution layer, weight ranges (min and max), bias (min and max) and alpha (min and max)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: notebook/QRNNTutorial.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##### Copyright 2020 Google LLC\n", "#\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "import tempfile\n", "\n", "import numpy as np\n", "import tensorflow.compat.v2 as tf\n", "tf.enable_v2_behavior()\n", "\n", "from tensorflow.keras.layers import Input, Dense, Embedding, SimpleRNN, GRU, LSTM, Bidirectional\n", "from tensorflow.keras.optimizers import *\n", "from tensorflow.keras.datasets import imdb\n", "from tensorflow.keras.preprocessing import sequence\n", "\n", "from qkeras.autoqkeras import *\n", "from qkeras import *\n", "\n", "print(\"using tensorflow\", tf.__version__)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "physical_devices = tf.config.list_physical_devices()\n", "for d in physical_devices:\n", " print(d)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " device_name = os.environ['COLAB_TPU_ADDR']\n", " TPU_ADDRESS = 'grpc://' + device_name\n", " print('Found TPU at: {}'.format(TPU_ADDRESS))\n", " resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)\n", " tf.config.experimental_connect_to_cluster(resolver)\n", " # This is the TPU initialization code that has to be at the beginning.\n", " tf.tpu.experimental.initialize_tpu_system(resolver)\n", " print(\"All devices: \", tf.config.list_logical_devices('TPU'))\n", " strategy = tf.distribute.experimental.TPUStrategy(resolver) \n", "except KeyError:\n", " print('TPU not found')\n", " strategy = tf.distribute.get_strategy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.random.seed(12)\n", "tf.random.set_seed(12)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "max_features = 10000\n", "# cut texts after this number of words\n", "# (among top max_features most common words)\n", "maxlen = 100\n", "BATCH_SIZE = 1000\n", "SHUFFLE_BUFFER_SIZE = 25000\n", "\n", "print('Loading data...')\n", "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)\n", "print(len(x_train), 'train sequences')\n", "print(len(x_test), 'test sequences')\n", "\n", "print('Pad sequences (samples x time)')\n", "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n", "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\n", "print('x_train shape:', x_train.shape)\n", "print('x_test shape:', x_test.shape)\n", "y_train = np.array(y_train)\n", "y_test = np.array(y_test)\n", "\n", "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", "test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", "\n", "train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(SHUFFLE_BUFFER_SIZE)\n", "test_dataset = test_dataset.batch(BATCH_SIZE)\n", "\n", "train_dataset, test_dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using QKeras\n", "\n", "__QKeras__ works by tagging all variables and weights/bias created by Keras as well as output of arithmetic layers by quantized functions. Quantized functions can be instantiated directly in __`QSimpleRNN`__/__`QLSTM`__/__`QGRU`__/__`QBidirectional`__/__`QDense`__/__`QConv2D`__/__`QSeparableConv2D`__ functions, and they can be passed to __`QActivation`__, which act as a merged quantization and activation function.\n", "\n", "In order to successfully quantize a model, users need to replace layers that create variables (trainable or not) (`LSTM`, `Conv2D`, etc) by their equivalent ones in __QKeras__ (__`QLSTM`__/__`QDense`__, etc), and any layers that perform math operations need to be quantized afterwards.\n", "\n", "Quantized values are clipped between their maximum and minimum quantized representation (which may be different than $[-1.0, 1.0]$), although for `po2` type of quantizers, we still recommend the users to specify the parameter for `max_value`.\n", "\n", "An example of a very simple recurrent network is given below in Keras." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "units = 64\n", "embedding_dim = 64\n", "loss = 'binary_crossentropy'\n", "\n", "def create_model(batch_size=None):\n", " x = x_in = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32)\n", " x = Embedding(input_dim=max_features, output_dim=embedding_dim)(x)\n", " x = Activation('linear', name='embedding_act')(x)\n", " x = Bidirectional(LSTM(units))(x)\n", " x = Dense(1)(x)\n", " x = Activation('sigmoid')(x)\n", " model = tf.keras.Model(inputs=[x_in], outputs=[x])\n", " return model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "tf.keras.backend.clear_session()\n", "with strategy.scope():\n", " model = create_model(BATCH_SIZE)\n", " custom_objects = {}\n", " model.compile(\n", " optimizer=Adam(learning_rate=0.01),\n", " loss=loss,\n", " metrics=['acc'])\n", "\n", "model.summary()\n", "print('Train...')\n", "model.fit(\n", " train_dataset,\n", " epochs=10,\n", " batch_size=BATCH_SIZE,\n", " validation_data=test_dataset,\n", " verbose=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Replacing with quantized layers" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "def create_qmodel(batch_size=None):\n", " x = x_in = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32)\n", " x = Embedding(input_dim=max_features, output_dim=embedding_dim)(x)\n", " x = QActivation('binary', name='embedding_act')(x)\n", " x = QLSTM(\n", " units,\n", " activation='quantized_tanh(4)',\n", " recurrent_activation='quantized_relu(4,0,1)',\n", " kernel_quantizer='stochastic_ternary(\"auto\")',\n", " recurrent_quantizer='quantized_bits(2,1,1,alpha=1.0)',\n", " bias_quantizer='quantized_bits(4,0,1)')(x)\n", " x = QDense(\n", " 1, \n", " kernel_quantizer=\"quantized_bits(4,0,1)\",\n", " bias_quantizer='quantized_bits(4,0,1)')(x)\n", " x = QActivation('sigmoid')(x)\n", " model = tf.keras.Model(inputs=[x_in], outputs=[x])\n", " return model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tf.keras.backend.clear_session()\n", "with strategy.scope():\n", " qmodel = create_qmodel(BATCH_SIZE)\n", " custom_objects = {}\n", " qmodel.compile(\n", " optimizer=Adam(learning_rate=0.01),\n", " loss=loss,\n", " metrics=['acc'])\n", "\n", "qmodel.summary()\n", "print('Train...')\n", "qmodel.fit(train_dataset,\n", " batch_size=BATCH_SIZE,\n", " epochs=10,\n", " verbose=2,\n", " validation_data=test_dataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Converting a Model Automatically\n", "\n", "In addition to the drop-in replacement of Keras functions, we have written the following function to assist anyone who wants to quantize a network.\n", "\n", "__`model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False)`__\n", "\n", "This function converts an non-quantized model (such as the one from `model` in the previous example) into a quantized version, by applying a configuration specified by the dictionary `quantizer_config`, and `activation_bits` specified for unamed activation functions, with this parameter probably being removed in future versions.\n", "\n", "The parameter `custom_objects` specifies object dictionary unknown to Keras, required when you copy a model with lambda layers, or customized layer functions, for example, and if `transfer_weights` is `True`, the returned model will have as initial weights the weights from the original model, instead of using random initial weights.\n", "\n", "The dictionary specified in `quantizer_config` can be indexed by a layer name or layer class name. In the example below, conv2d_1 corresponds to the first convolutional layer of the example, while QConv2D corresponds to the default behavior of two dimensional convolutional layers. The reader should note that right now we recommend using __`QActivation`__ with a dictionary to avoid the conversion of activations such as `softmax` and `linear`. In addition, although we could use `activation` field in the layers, we do not recommend that. \n", "\n", "`{\n", " \"conv2d_1\": {\n", " \"kernel_quantizer\": \"stochastic_ternary\",\n", " \"bias_quantizer\": \"quantized_po2(4)\"\n", " },\n", " \"QConv2D\": {\n", " \"kernel_quantizer\": \"stochastic_ternary\",\n", " \"bias_quantizer\": \"quantized_po2(4)\"\n", " },\n", " \"QDense\": {\n", " \"kernel_quantizer\": \"quantized_bits(3,0,1)\",\n", " \"bias_quantizer\": \"quantized_bits(3)\"\n", " },\n", " \"act_1\": \"quantized_relu(2)\",\n", " \"QActivation\": { \"relu\": \"quantized_relu(2)\" }\n", "}`\n", "\n", "In the following example, we will quantize the model using a different strategy.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bits = 4\n", "quantizer_config = {\n", " \"bidirectional\": {\n", " 'activation' : f\"quantized_tanh({bits})\",\n", " 'recurrent_activation' : f\"quantized_relu(4,0,1)\",\n", " 'kernel_quantizer' : f\"quantized_bits({bits}, alpha='auto')\",\n", " 'recurrent_quantizer' : f\"quantized_bits({bits}, alpha='auto')\",\n", " 'bias_quantizer' : f\"quantized_bits({bits}, alpha='auto')\",\n", " },\n", " \"dense\": {\n", " 'kernel_quantizer' : f\"quantized_bits({bits}), alpha='auto'\",\n", " 'bias_quantizer' : f\"quantized_bits({bits}), alpha='auto'\"\n", " },\n", " \"embedding_act\": f\"quantized_bits({bits}), alpha='auto'\",\n", "}\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "tf.keras.backend.clear_session()\n", "with strategy.scope():\n", " model = create_model(BATCH_SIZE)\n", " custom_objects = {}\n", " \n", " qmodel = model_quantize(model, quantizer_config, bits, custom_objects)\n", " qmodel.compile(\n", " optimizer=Adam(learning_rate=0.01),\n", " loss=loss,\n", " metrics=['acc'])\n", " \n", "qmodel.summary()\n", "print('Train...')\n", "qmodel.fit(train_dataset,\n", " batch_size=BATCH_SIZE,\n", " epochs=10,\n", " verbose=2,\n", " validation_data=test_dataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Quantizing a Model With `AutoQKeras`\n", "\n", "To quantize this model with `AutoQKeras`, we need to define the quantization for kernels, biases and activations; forgiving factors and quantization strategy.\n", "\n", "Below we define which quantizers are allowed for kernel, bias, activations and linear. Linear is a proxy that we use to capture `Activation(\"linear\")` to apply quantization without applying a non-linear operation. In some networks, we found that this trick may be necessary to better represent the quantization space.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tf.keras.backend.clear_session()\n", "with strategy.scope():\n", " model = create_model(BATCH_SIZE)\n", " custom_objects = {}\n", " model.compile(\n", " optimizer=Adam(learning_rate=0.01),\n", " loss=loss,\n", " metrics=['acc'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "quantization_config = {\n", " \"kernel\": {\n", " \"stochastic_binary\": 1,\n", " \"stochastic_ternary\": 2,\n", " \"quantized_bits(4,0,1,alpha=1.0)\": 4,\n", " \"quantized_po2(4,1)\": 4\n", " },\n", " \"recurrent_kernel\": {\n", " \"stochastic_binary\": 1,\n", " \"stochastic_ternary\": 2,\n", " \"quantized_bits(4,0,1,alpha=1.0)\": 4,\n", " \"quantized_po2(4,1)\": 4\n", " \n", " },\n", " \"recurrent_activation\": {\n", " \"quantized_relu(4,0,1)\": 4 \n", " },\n", " \"bias\": {\n", " \"quantized_bits(4,0,1)\": 4,\n", " \"quantized_po2(4,1)\": 4\n", " },\n", " \"activation\" : {\n", " \"stochastic_ternary('auto')\": 2,\n", " \"quantized_tanh(4)\" : 4, \n", " \"quantized_relu_po2(4,1)\": 4,\n", " \"quantized_relu(4,2)\": 4,\n", " },\n", " \"linear\": { \n", " \"stochastic_ternary('auto')\" : 2,\n", " \"quantized_tanh(4)\" : 4, \n", " \"quantized_relu_po2(4,1)\": 4,\n", " \"quantized_relu(3,1)\": 3,\n", " \"quantized_relu(4,2)\": 4,\n", " }\n", "}\n", "\n", "limit = {\n", " \"Dense\": [4],\n", " \"Bidirectional\": [4],\n", " \"Activation\": [4],\n", " \"default\" : [4]*4\n", "}\n", "\n", "goal = {\n", " \"type\": \"bits\",\n", " \"params\": {\n", " \"delta_p\": 8.0,\n", " \"delta_n\": 8.0,\n", " \"rate\": 2.0,\n", " \"stress\": 1.0,\n", " \"input_bits\": 4,\n", " \"output_bits\": 4,\n", " \"ref_bits\": 4,\n", " \"config\": {\n", " \"default\": [\"parameters\", \"activations\"]\n", " }\n", " }\n", "}\n", "\n", "run_config = {\n", " \"output_dir\": tempfile.mkdtemp(),\n", " \"goal\": goal,\n", " \"quantization_config\": quantization_config,\n", " \"learning_rate_optimizer\": False,\n", " \"transfer_weights\": False,\n", " \"mode\": \"random\",\n", " \"seed\": 42,\n", " \"limit\": limit,\n", " \"tune_filters\": \"layer\",\n", " \"tune_filters_exceptions\": \"^dense\",\n", " \"distribution_strategy\": strategy,\n", "\n", " \"layer_indexes\": range(2, len(model.layers) - 1),\n", " \"max_trials\": 1000\n", "}\n", "\n", "print(\"quantizing layers:\", [model.layers[i].name for i in run_config[\"layer_indexes\"]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "autoqk = AutoQKeras(model, metrics=[\"acc\"], custom_objects={}, **run_config)\n", "autoqk.fit(\n", " train_dataset, \n", " validation_data=test_dataset, \n", " batch_size=BATCH_SIZE, \n", " epochs=10,\n", " verbose=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "qmodel = autoqk.get_best_model()\n", "qmodel.save_weights(\"qmodel.h5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print_qmodel_summary(qmodel)\n", "print(get_quantization_dictionary(qmodel))" ] } ], "metadata": { "kernelspec": { "display_name": "Py3", "language": "python", "name": "py3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: qkeras/__init__.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Exports qkeras modules to quantizer package.""" # We use wildcard import for convenience at this moment, which will be later # refactored and removed. import tensorflow as tf from .b2t import * # pylint: disable=wildcard-import from .estimate import * # pylint: disable=wildcard-import from .qconv2d_batchnorm import QConv2DBatchnorm from .qconvolutional import * # pylint: disable=wildcard-import from .qdepthwise_conv2d_transpose import QDepthwiseConv2DTranspose from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm from .qlayers import * # pylint: disable=wildcard-import from .qmac import * # pylint: disable=wildcard-import from .qnormalization import * # pylint: disable=wildcard-import from .qoctave import * # pylint: disable=wildcard-import from .qpooling import * # pylint: disable=wildcard-import from .qrecurrent import * # pylint: disable=wildcard-import from .qseparable_conv2d_transpose import QSeparableConv2DTranspose #from .qtools.run_qtools import QTools #from .qtools.settings import cfg from .quantizers import * # pylint: disable=wildcard-import from .registry import * # pylint: disable=wildcard-import from .safe_eval import * # pylint: disable=wildcard-import assert tf.executing_eagerly(), "QKeras requires TF with eager execution mode on" __version__ = "0.9.0" ================================================ FILE: qkeras/autoqkeras/__init__.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Exports autoqkeras as a package.""" # We use wildcard import for convenience at this moment, which will be later # refactored and removed. from .autoqkeras_internal import * # pylint: disable=wildcard-import from .quantization_config import default_quantization_config # pylint: disable=line-too-long from .utils import * # pylint: disable=wildcard-import ================================================ FILE: qkeras/autoqkeras/autoqkeras_internal.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements support for auto-quantization.""" import collections import json import os import re import copy from absl import logging import keras_tuner as kt from keras_tuner import HyperModel from keras_tuner import BayesianOptimization from keras_tuner import Hyperband from keras_tuner import RandomSearch import numpy as np import six import tensorflow as tf import tensorflow.keras.backend as K from tensorflow.keras.metrics import binary_accuracy from tensorflow.keras.metrics import categorical_accuracy from tensorflow.keras.metrics import sparse_categorical_accuracy from qkeras.autoqkeras.forgiving_metrics import forgiving_factor # pylint: disable=line-too-long from qkeras.autoqkeras.forgiving_metrics import ForgivingFactor # pylint: disable=line-too-long from qkeras.autoqkeras.quantization_config import default_quantization_config # pylint: disable=line-too-long from qkeras.autoqkeras.utils import print_qmodel_summary from qkeras.utils import clone_model from qkeras.utils import model_quantize # AutoQKHyperModel is implemented on top of keras_tuner # It basically creates a quantized model based on some rules # and it computes a acc_delta that boosts the accuracy when # choosing smaller models. # Boosting function behaves like this. # We use the following formula to compute the decrease factor: # reference_size: number of parameters + activations of the model, # assuming an 8-bit implementation. # trial_size: number of parameters + activations of trial. # # 1) First, we compute how many times we decresed/increased the model # i = log(reference_size / trial_size) / log(rate) # # 2) Then, we use delta_p / delta_n if model is smaller/bigger # than reference model. # # delta = i * ( # (i < 0) * delta_n + (i >= 0) * delta_p # ) # # 3) the accuracy of the model (score) is adjusted by acc * delta # # The delta "boosts" the accuracy to allow worse model to be # chosen by hypermodel tuner. # REGISTERED_LAYERS = ["Dense", "Conv1D", "Conv2D", "DepthwiseConv2D", "SimpleRNN", "LSTM", "GRU", "Bidirectional", "Conv2DTranspose", "SeparableConv1D", "SeparableConv2D"] Q_LAYERS = list(map(lambda x : 'Q' + x, REGISTERED_LAYERS)) SEQUENCE_LAYERS = ["SimpleRNN", "LSTM", "GRU", "Bidirectional"] class AutoQKHyperModel(HyperModel): """Creates an hypermodel to attempt to quantize a reference model. Arguments: model: Model to be quantized. metrics: List of metrics to be used. custom_objects: Custom objects used by Keras during quantization. target: Secondary metric to chase during search ("bits" or "energy"). transfer_weights: if true, transfer weights from unquantized model. frozen_layers: if true, these layers will not be quantized but weights transferred from original model. activation_bits: parameter to be used by 'model_quantize'. limit: limit the number of bits in quantizers, specified as dictionary. tune_filters: one of "block", "layer", "none" for tuning entire network, each layer separately, or no tuning. tune_filters_exceptions: name of layers that will not be tuned. layer_indexes: we only quantize layers whose ids are in layer_indexes. learning_rate_optimizer: if true, we optimize learning rate along with other parameters. head_name: specify which head to calcuate score/trial-size from in autoqkeras quantization_config: dictionary containing configuration of quantizers for kernel, bias and activation. extend_model_metrics: If to append the trial size and score metrics to model metrics, which are used for AutoQKeras to determine the quality of a model. Returns: quantized model in trial and boosted accuracy function compiled into quantized model. """ def __init__( self, model, metrics, custom_objects=None, target=None, transfer_weights=False, frozen_layers=None, activation_bits=4, limit=None, tune_filters="none", tune_filters_exceptions=None, layer_indexes=None, learning_rate_optimizer=False, head_name=None, quantization_config=None, extend_model_metrics=True, ): self.model = model self.metrics = metrics self.custom_objects = custom_objects if custom_objects else {} self.target = target self.reference_size = self.target.get_reference(model) self.transfer_weights = transfer_weights self.frozen_layers = frozen_layers if frozen_layers else [] self.activation_bits = activation_bits self.head_name = head_name self.extend_model_metrics = extend_model_metrics # make sure we have at least 3 elements in list # first one for kernel, second one for bias and thid one for activations. # # limit is in the format, where default replaces missing values: # '{ # "Conv2D":[weight,bias,activation], # "RNN":[weight,bias,recurrent,activation], # "Dense":[weight,bias,activation], # "Activation":[activation] # "default": value # }' if limit is None: self.limit = {} else: self.limit = limit self.groups = {} assert isinstance(self.limit, dict) if self.limit.get("default", None) is None: default = 8 else: default = self.limit["default"] # make sure we have entries for every type of layer we process self._adjust_limit(default) print("Limit configuration:" + json.dumps(self.limit)) assert tune_filters in ["block", "layer", "none"] self.tune_filters = tune_filters self.tune_filters_exceptions = re.compile(tune_filters_exceptions) self.layer_indexes = layer_indexes self.learning_rate_optimizer = learning_rate_optimizer # load quantizer types for each type of quantizer if quantization_config is None: self.quantization_config = default_quantization_config else: self.quantization_config = quantization_config def _adjust_limit(self, default): """Makes sure limit has all the fields required.""" if isinstance(default, list): assert 3 <= len(default) <= 4 else: default = [default] * 3 # we consider that if name is not there, we will ignore the layer for name in REGISTERED_LAYERS: if name in self.limit: length = len(self.limit[name]) if length < 4 and name in SEQUENCE_LAYERS: assert len(default) == 4 self.limit[name] = self.limit[name] + default[length:] elif length < 3: # No recurrent limit needed for non recurrent layers self.limit[name] = self.limit[name] + default[length:2] + default[-1:] def _n(self, name, s_list): """Creates a unique name for the tuner.""" return name + "_".join([str(v) for v in s_list]) def _get_quantizer(self, hp, head, layer_name, layer_class_name, i_list=None, is_kernel=True, is_linear=False): """Gets a quantizer randomly for kernels/bias/activations.""" # first pick up which group we belong to. if not i_list: i_list = [] if is_linear: # linear quantizers field_name = "linear" kq = self.quantization_config["linear"] index = 0 q_list = list(kq.keys()) q_dict = kq elif "kernel" in head: # kernel quantizers field_name = "kernel" kq = self.quantization_config["kernel"] index = 0 q_list = list(kq.keys()) q_dict = kq elif "bias" in head: # bias quantizers field_name = "bias" bq = self.quantization_config["bias"] index = 1 q_list = list(bq.keys()) q_dict = bq elif "pointwise_kernel" in head: # limit is same as kernel # pointwise kernel quantizers field_name = "pointwise_kernel" kq = self.quantization_config["pointwise_kernel"] index = 2 q_list = list(kq.keys()) q_dict = kq elif "recurrent_kernel" in head: # limit is same as kernel # recurrent kernel quantizers field_name = "recurrent_kernel" kq = self.quantization_config["recurrent_kernel"] index = 2 q_list = list(kq.keys()) q_dict = kq elif "recurrent_activation" in head: # limit is same as kernel # recurrent activation quantizers field_name = "recurrent_activation" raq = self.quantization_config["recurrent_activation"] index = -1 q_list = list(raq.keys()) q_dict = raq else: # activation quantizers field_name = "activation" aq = self.quantization_config["activation"] index = -1 q_list = list(aq.keys()) q_dict = aq # we first we search for layer name. If it is not there, we switch to # layer class name. found_pattern = False name = layer_class_name count = -1 for i, pattern in enumerate(self.limit): if re.match(pattern, layer_name): found_pattern = True name = pattern count = i break # for partially quantized networks we may not have # the layer class name in the set. if name == layer_class_name and name not in self.limit: return None, -1 # groups is a dictionary that contains dictionary of the # patterns so that we can group everything together if found_pattern: if name in self.groups and index in self.groups[name]: return self.groups[name][index] # not there, let's use a different name for # the head and field head = "qk_group_" + str(count) + "_" + field_name head = name + "_" + field_name # limit group can be a list of quantizers or a # number that tells us maximum number of bits if isinstance(self.limit[name][index], list): # we assume this is a subset of the q_keys # entry in quantization_config will be like: # "Conv2D": [ ["q1", "q2", "q3"], ... ] # # we always assume this list is a subset of # the original list or we will raise an # error. q_list = self.limit[name][index] q_dict = { key: q_dict[key] for key in q_list } else: q_dict = { key: value for (key, value) in q_dict.items() if value <= self.limit[name][index] } q_list = list(q_dict.keys()) # didn't found a match in groups, create one. if len(q_list) == 1: q_name = hp.Fixed(self._n(head + "_quantizer", i_list), q_list[0]) else: q_name = hp.Choice(self._n(head + "_quantizer", i_list), q_list) if found_pattern: if name not in self.groups: self.groups[name] = {index: (q_name, q_dict[q_name])} else: self.groups[name][index] = (q_name, q_dict[q_name]) return (q_name, q_dict[q_name]) def quantize_model(self, hp): """Quantize model by hyperparameter search and extracting size schema.""" # configuration for quantization. q_dict = {} model = clone_model(self.model, self.custom_objects) fanin = [] filter_range = [0.5, 0.75, 1.0, 1.5, 2.0] # network_filters=hp.Choice(...) should only be defined if we are sure # current blocks has any layer that need filter sweep. # Otherwise, when no layer needs filter sweep and a hp variable is defined, # there will be uneffective trials that loop around the network # filter range, even though none of the filter sweep was ever applied to # any layers. Therfore, we use filter_sweep_enabled to mark if any layer # in current block needs filter sweep. kernel_quantizer_dict = {} filter_sweep_enabled = False for layer in model.layers: if layer.__class__.__name__ in REGISTERED_LAYERS: kernel_quantizer, bits = self._get_quantizer( hp, layer.name + "_kernel", layer.name, layer.__class__.__name__, is_kernel=True) kernel_quantizer_dict[layer.name] = (kernel_quantizer, bits) # kernel_quantizer is not None -> layer in the current block need # to be quantized if kernel_quantizer: if ( not filter_sweep_enabled and self.tune_filters in ["layer", "block"] and not self.tune_filters_exceptions.search(layer.name) and layer.__class__.__name__ in ["Dense", "Conv1D", "Conv2D", "Conv2DTranspose"] ): filter_sweep_enabled = True if layer.__class__.__name__ in SEQUENCE_LAYERS: recurrent_quantizer, _ = self._get_quantizer( hp, layer.name + "_recurrent_kernel", layer.name, layer.__class__.__name__, is_kernel=True) if layer.__class__.__name__ in ["SeparableConv1D", "SeparableConv2D"]: pointwise_quantizer, _ = self._get_quantizer( hp, layer.name + "_pointwise_kernel", layer.name, layer.__class__.__name__, is_kernel=True) if self.tune_filters == "block" and filter_sweep_enabled: network_filters = hp.Choice( "network_filters", values=filter_range, default=1.0 ) else: network_filters = 1.0 for layer_id, layer in enumerate(model.layers): # we can use these indexes to disable some layers, like the last # layer if self.layer_indexes is not None and layer_id not in self.layer_indexes: continue layer_d = {} if layer.__class__.__name__ in Q_LAYERS: weights = layer.get_weights()[0] if ( layer.get_quantizers()[0] and hasattr(layer.get_quantizers()[0], "bits") ): bits = layer.get_quantizers()[0].bits else: bits = 8 fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.) if layer.__class__.__name__ in REGISTERED_LAYERS: # difference between depthwise and the rest is just the name # of the kernel. if layer.__class__.__name__ in [ "DepthwiseConv2D", "SeparableConv1D", "SeparableConv2D" ]: kernel_name = "depthwise_quantizer" else: kernel_name = "kernel_quantizer" # sample kernel quantizer. (kernel_quantizer, bits) = kernel_quantizer_dict[layer.name] if not kernel_quantizer: continue # process fanin here if bits < 8: weights = layer.get_weights()[0] fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.) # we only want to do that if we are going to quantize layer if ( self.tune_filters in ["layer", "block"] and not self.tune_filters_exceptions.search(layer.name) and layer.__class__.__name__ in [ "Dense", "Conv1D", "Conv2D", "Conv2DTranspose", "SeparableConv1D", "SeparableConv2D" ] ): if self.tune_filters == "layer": layer_filters = hp.Choice( "network_filters_" + layer.name, values=filter_range, default=1.0 ) else: layer_filters = network_filters if layer.__class__.__name__ == "Dense": layer.units = max(int(layer.units * layer_filters), 1) elif layer.__class__.__name__ in [ "Conv1D", "Conv2D", "Conv2DTranspose", "SeparableConv1D", "SeparableConv2D" ]: layer.filters = max(int(layer.filters * layer_filters), 1) layer_d[kernel_name] = kernel_quantizer if layer.__class__.__name__ in SEQUENCE_LAYERS: layer_d['recurrent_quantizer'] = recurrent_quantizer if layer.__class__.__name__ in ["SeparableConv1D", "SeparableConv2D"]: layer_d['pointwise_quantizer'] = pointwise_quantizer if layer.__class__.__name__ in ["LSTM", "GRU", "Bidirectional"]: layer_d['recurrent_activation'], _ = self._get_quantizer( hp, layer.name + "_recurrent_activation", layer.name, layer.__class__.__name__, is_kernel=False) # if we use bias, sample quantizer. if layer.__class__.__name__ == "Bidirectional": layer_d["bias_quantizer"], bits = self._get_quantizer( hp, layer.name + "_bias", layer.name, layer.__class__.__name__, is_kernel=False) layer_d["activation"], bits = self._get_quantizer( hp, layer.name + "_activation", layer.name, layer.__class__.__name__, is_kernel=False) q_dict[layer.name] = layer_d else: if layer.use_bias: layer_d["bias_quantizer"], bits = self._get_quantizer( hp, layer.name + "_bias", layer.name, layer.__class__.__name__, is_kernel=False) # if activation is not linear/softmax we need to process it. if layer.activation is None: is_softmax = False is_linear = False else: if isinstance(layer.activation, six.string_types): is_softmax = layer.activation == "softmax" is_linear = layer.activation == "linear" else: is_softmax = layer.activation.__name__ == "softmax" is_linear = layer.activation.__name__ == "linear" if not is_softmax and not is_linear: layer_d["activation"], bits = self._get_quantizer( hp, layer.name + "_activation", layer.name, layer.__class__.__name__, is_kernel=False) q_dict[layer.name] = layer_d elif layer.__class__.__name__ in ["Reshape"]: # we cannot handle fine tuning filters per layer right now. assert self.tune_filters in ["none", "block"] # we need to make sure this pattern exists, this should only occur for # "scheduler", so the name will be complete and not a pattern. if ( self.tune_filters == "none" or layer.name not in self.limit or self.tune_filters_exceptions.search(layer.name) ): continue if K.image_data_format() == "channels_last": layer.target_shape = layer.target_shape[:-1] + ( min(int(layer.target_shape[-1] * network_filters), 1),) else: layer.target_shape = (int(layer.target_shape[0] * network_filters), ) + layer.target_shape[1:] elif layer.__class__.__name__ in ["Activation"]: if isinstance(layer.activation, six.string_types): is_linear = layer.activation == "linear" is_softmax = layer.activation == "softmax" else: is_linear = layer.activation.__name__ == "linear" is_softmax = layer.activation.__name__ == "softmax" # if it is a linear activation, we will notify the # quantizer we are searching for linear type of # quantizers if not is_softmax: activation, bits = self._get_quantizer( hp, layer.name + "_activation", layer.name, layer.__class__.__name__, is_kernel=False, is_linear=is_linear) if not activation: continue # look at documentation on model_quantize q_dict[layer.name] = activation elif layer.__class__.__name__ in self.limit: # mark it for conversion q_dict[layer.name] = {} else: for pattern in self.limit: if re.match(pattern, layer.name): q_dict[layer.name] = {} break q_model = model_quantize( model, q_dict, self.activation_bits, custom_objects=self.custom_objects, transfer_weights=self.transfer_weights) return q_model, fanin def build(self, hp): """Builds hyperparameterized quantized model.""" self.groups = {} # we are not using the fanin right now. q_model, _ = self.quantize_model(hp) # transfer weights from previous run as we know we will not if self.learning_rate_optimizer: # if learning_rate_optimizer, we try to transfer weights from previous run print("... freezing layers {}.".format(", ".join(self.frozen_layers))) for layer_name in self.frozen_layers: o_weights = self.model.get_layer(layer_name).get_weights() layer = q_model.get_layer(layer_name) # don't know if setting trainable to False is good or not yet # try to do "soft-freeze" by transferring weights. More experiments # needed before we decide what to do. # layer.trainable = False weights = layer.get_weights() # because we can be changing number of layers, we do not know # if we can really use some of the weights or not. equal_layer = True for w in range(len(o_weights)): if o_weights[w].shape != weights[w].shape: equal_layer = False break if equal_layer: layer.set_weights(o_weights) self.trial_size = self.target.get_trial(q_model) # we will use a boosted accuracy computation delta = self.target.delta() # by default, we use the first metric specified by the # user to be the target metric. if not self.metrics: score_metric = None elif isinstance(self.metrics, dict): if not self.head_name: # if head_name not provided, find the first metric from the dict score_key = list(self.metrics.keys())[0] else: # find the metric assoicated with the head_name score_key = self.head_name score_metric = self.metrics[score_key] if isinstance(score_metric, list): score_metric = score_metric[0] elif isinstance(self.metrics, list): score_metric = self.metrics[0] self.score = AutoQKHyperModel.adjusted_score( self, delta, score_metric) # some papers suggest that we use learning_rate * sqrt(fanin) / layer # we cannot do that right now, but we can definitely do that # if we are quantizing one layer at a time # # https://arxiv.org/pdf/1511.00363.pdf # we use the magic number to smooth out the average total_factor = self.target.get_total_factor() delta_lr = 1.0 + (total_factor < 0) * total_factor # we assume model has been compiled at least. lr = float(self.model.optimizer.lr.numpy()) # we assume that delta_lr can lower lr to accommodate # for more quantization # # if learning rate scheduler is used, we assume the callback to manage # learning rate. Just set it to constant. if self.learning_rate_optimizer: lr_range = list(lr * np.linspace(delta_lr, 1.1, 5)) lr_choice = hp.Choice("learning_rate", lr_range) self.model.optimizer.learning_rate = lr_choice else: lr_choice = lr print("learning_rate: {}".format(lr)) optimizer = self.model.optimizer q_model.summary() metrics = self.metrics # extend metrics by including score and trial_size metrics if self.extend_model_metrics: ext_metrics = copy.deepcopy(metrics) if isinstance(ext_metrics, dict): # for dict, add trial_size_metric and score metric to target output if not self.head_name: # if head_name not provided, find the first metric from the dict score_key = list(ext_metrics.keys())[0] else: # find the metric assoicated with the head_name score_key = self.head_name score_metric = ext_metrics[score_key] if isinstance(score_metric, list): score_metric += [self.trial_size_metric(self.trial_size), self.score] else: score_metric = [score_metric] score_metric += [self.trial_size_metric(self.trial_size), self.score] ext_metrics[score_key] = score_metric else: ext_metrics += [ self.trial_size_metric(self.trial_size), self.score] metrics = ext_metrics q_model.compile( optimizer=optimizer, loss=self.model.loss, metrics=metrics ) self.q_model = q_model # this just prints a summary of the quantization for debugging # purposes self.target.print_stats() print_qmodel_summary(q_model) return q_model @staticmethod def adjusted_score(hyper_model, delta, metric_function=None): def score(y_true, y_pred): y_t_rank = len(y_true.shape.as_list()) y_p_rank = len(y_pred.shape.as_list()) y_t_last_dim = y_true.shape.as_list()[-1] y_p_last_dim = y_pred.shape.as_list()[-1] is_binary = y_p_last_dim == 1 is_sparse_categorical = ( y_t_rank < y_p_rank or y_t_last_dim == 1 and y_p_last_dim > 1) if isinstance(metric_function, six.string_types): if metric_function in ["accuracy", "acc"]: if is_binary: metric = binary_accuracy(y_true, y_pred) elif is_sparse_categorical: metric = sparse_categorical_accuracy(y_true, y_pred) else: metric = categorical_accuracy(y_true, y_pred) else: metric = categorical_accuracy(y_true, y_pred) else: metric = metric_function(y_true, y_pred) return K.cast(metric * (1.0 + delta), K.floatx()) if not metric_function: metric_function = "accuracy" return score @staticmethod def trial_size_metric(trial_size): def trial(y_true, y_pred): # pylint: disable=unused-argument return K.cast(trial_size, K.floatx()) return trial class AutoQKeras: """Performs autoquantization in Keras model. Arguments: model: Model to be quantized. metrics: List of metrics to be used. custom_objects: Custom objects used by Keras during quantization. goal: Metric to compute secondary goal of search (bits or energy) output_dir: name of output directory to store results. mode: random, hyperband or bayesian used by keras_tuner. custom_tuner: The Keras Tuner class to use to search hyperparams transfer_weights: if true, transfer weights from unquantized model. frozen_layers: if true, these layers will not be quantized but weights transferred from original model. activation_bits: parameter to be used by 'model_quantize'. limit: limit the number of bits in quantizers specified as a dictionary. tune_filters: one of "block", "layer", "none" for tuning entire network, each layer separately, or no tuning. tune_filters_exceptions: name of layers that will not be tuned. layer_indexes: indexes of layers we will quantize. learning_rate_optimizer: if true, user will provide lr scheduler callback. quantization_config: file name of dictionary containing configuration of quantizers for kernel, bias and activation. head_name: specify which head to calcuate score/trial-size from in autoqkeras score_metric: Str. Optional metric name to use to evaluate the trials. Defaults to val_score tuner_kwargs: parameters for keras_tuner depending on whether mode is random, hyperband or baeysian. Please refer to the documentation of kerstuner Tuners. """ def __init__( self, model, metrics=None, custom_objects=None, goal=None, output_dir="result", mode="random", custom_tuner=None, transfer_weights=False, frozen_layers=None, activation_bits=4, limit=None, tune_filters="none", tune_filters_exceptions=None, learning_rate_optimizer=False, layer_indexes=None, quantization_config=None, overwrite=True, head_name=None, score_metric=None, **tuner_kwargs): # Collect input arguments to AutoQKeras for usage by custom tuner autoqkeras_input_args = locals() if not metrics: metrics = [] if not custom_objects: custom_objects = {} # goal: { "type": ["bits", "energy"], "params": {...} } or ForgivingFactor # type # For type == "bits": # delta_p: increment (in %) of the accuracy if trial is smaller. # delta_n: decrement (in %) of the accuracy if trial is bigger. # rate: rate of decrease/increase in model size in terms of bits. # input_bits; size of input tensors. # output_bits; size of output tensors. # stress: parameter to reduce reference size to force tuner to # choose smaller models. # config: configuration on what to compute for each layer # minimum configuration is { "default": ["parameters", "activations"] } # use simplest one - number of bits if not goal: goal = { "type": "bits", "params": { "delta_p": 8.0, "delta_n": 8.0, "rate": 2.0, "stress": 1.0, "input_bits": 8, "output_bits": 8, "ref_bits": 8, "config": { "default": ["parameters", "activations"] } } } self.overwrite = overwrite # for multi-head model, we need to specify which head(/output) that # score and trial metric needs to calculate from self.head_name = head_name # if we have not created it already, create new one. if not isinstance(goal, ForgivingFactor): target = forgiving_factor[goal["type"]](**goal["params"]) else: target = goal # if no metrics were specified, we want to make sure we monitor at least # accuracy. if not metrics: metrics = ["acc"] self.hypermodel = AutoQKHyperModel( model, metrics, custom_objects, target, transfer_weights=transfer_weights, frozen_layers=frozen_layers, activation_bits=activation_bits, limit=limit, tune_filters=tune_filters, tune_filters_exceptions=tune_filters_exceptions, layer_indexes=layer_indexes, learning_rate_optimizer=learning_rate_optimizer, head_name=head_name, quantization_config=quantization_config ) # right now we create unique results directory idx = 0 name = output_dir if self.overwrite: while os.path.exists(name): idx += 1 name = output_dir + "_" + str(idx) output_dir = name self.output_dir = output_dir if score_metric is None: if self.head_name: score_metric = "val_" + self.head_name + "_score" else: score_metric = "val_score" assert mode in ["random", "bayesian", "hyperband"] if custom_tuner is not None: self.tuner = custom_tuner( self.hypermodel, autoqkeras_config=autoqkeras_input_args, objective=kt.Objective(score_metric, "max"), project_name=output_dir, **tuner_kwargs) elif mode == "random": self.tuner = RandomSearch( self.hypermodel, objective=kt.Objective(score_metric, "max"), project_name=output_dir, **tuner_kwargs) elif mode == "bayesian": self.tuner = BayesianOptimization( self.hypermodel, objective=kt.Objective(score_metric, "max"), project_name=output_dir, **tuner_kwargs) elif mode == "hyperband": self.tuner = Hyperband( self.hypermodel, objective=kt.Objective(score_metric, "max"), project_name=output_dir, **tuner_kwargs) else: pass self.tuner.search_space_summary() def _has_earlystopping(self, callbacks): """Check if EarlyStopping has been defined or not.""" if callbacks is None: return False for callback in callbacks: if isinstance(callback, tf.keras.callbacks.EarlyStopping): return True return False def history(self, number_of_trials=-1): """Returns the history of the model search.""" trials = self.tuner.oracle.get_best_trials(number_of_trials) state = [trial.get_state() for trial in trials] result = {} result["score"] = [ state[i]["score"] for i in range(len(state)) if trials[i].score is not None ] for i in range(len(state)): if trials[i].score is not None: keys = state[i]["metrics"]["metrics"].keys() for key in keys: if key != "score" and not key.startswith( "val_") and key != "loss" and key != "trial": cur_accuracy = state[i]["metrics"]["metrics"][key][ "observations"][0]["value"][0] if "val_" + key in state[i]["metrics"]["metrics"].keys(): cur_val_accuracy = state[i]["metrics"]["metrics"]["val_" + key][ "observations"][0]["value"][0] else: cur_val_accuracy = None # only update result if both key and val_key exist if cur_val_accuracy: if key not in result.keys(): result[key] = [cur_accuracy] result["val_" + key] = [cur_val_accuracy] else: result[key].append(cur_accuracy) result["val_" + key].append(cur_val_accuracy) if self.head_name: trial_from_output = self.head_name + "_trial" else: trial_from_output = "trial" result["trial_size"] = [ state[i]["metrics"]["metrics"][trial_from_output]["observations"][0] ["value"][0] for i in range(len(state)) if trials[i].score is not None ] return result def fit(self, *fit_args, **fit_kwargs): """Invokes tuner fit algorithm.""" callbacks = fit_kwargs.get("callbacks", None) if callbacks is None: callbacks = [] epochs = fit_kwargs.get("epochs", None) if epochs is None: epochs = 10 if not self._has_earlystopping(callbacks): callbacks = callbacks + [ tf.keras.callbacks.EarlyStopping( "val_loss", patience=min(20, epochs//5)) ] fit_kwargs["callbacks"] = callbacks self.tuner.search(*fit_args, **fit_kwargs) @staticmethod def get_best_lr(qmodel): """Extracts best lr of model.""" return qmodel.optimizer.lr.numpy() def get_best_model(self): params = self.tuner.get_best_hyperparameters()[0] q_model = self.tuner.hypermodel.build(params) self.learning_rate = q_model.optimizer.lr.numpy() return q_model def get_learning_rate(self): return self.learning_rate class AutoQKerasScheduler: """Performs autoquantization one layer/group at a time. Arguments: model: Model to be quantized. metrics: List of metrics to be monitored. custom_objects: Custom objects used by Keras during quantization. goal: Metric to compute secondary goal of search (bits or energy) output_dir: name of output directory to store results. mode: random, hyperband or bayesian used by keras_tuner. transfer_weights: if true, transfer weights from unquantized model. activation_bits: parameter to be used by 'model_quantize'. limit: limit the number of bits in quantizers specified as a dictionary. tune_filters: one of "block", "layer", "none" for tuning entire network, each layer separately, or no tuning. tune_filters_exceptions: name of layers that will not be tuned. layer_indexes: indexes of layer to be quantized. learning_rate_optimizer: if true, user will provide lr scheduler callback. blocks: list of re patterns specifygin group configuration for layers. schedule_block: "sequential" or "cost". Schedule blocks using the order of the groups or decreasing cost (energy or bits). quantization_config: file name of dictionary containing configuration of quantizers for kernel, bias and activation. debug: if True, fit will just print the groups for debugging purposes. head_name: specify which head to calcuate score/trial-size from in autoqkeras tuner_kwargs: parameters for keras_tuner depending on whether mode is random, hyperband or baeysian. Please refer to the documentation of kerstuner Tuners. """ def __init__( self, model, metrics=None, custom_objects=None, goal=None, output_dir="result", mode="random", transfer_weights=False, activation_bits=4, limit=None, tune_filters="none", tune_filters_exceptions=None, layer_indexes=None, learning_rate_optimizer=False, blocks=None, schedule_block="sequential", quantization_config=None, overwrite=True, debug=False, head_name=None, **tuner_kwargs): if not metrics: metrics = [] if not custom_objects: custom_objects = {} # goal: { "type": ["bits", "energy"], "params": {...} } # For type == "bits": # delta_p: increment (in %) of the accuracy if trial is smaller. # delta_n: decrement (in %) of the accuracy if trial is bigger. # rate: rate of decrease/increase in model size in terms of bits. # input_bits; size of input tensors. # output_bits; size of output tensors. # stress: parameter to reduce reference size to force tuner to # choose smaller models. # config: configuration on what to compute for each layer # minimum configuration is { "default": ["parameters", "activations"] } # use simplest one - number of bits if not goal: goal = { "type": "bits", "params": { "delta_p": 8.0, "delta_n": 8.0, "rate": 2.0, "stress": 1.0, "input_bits": 8, "output_bits": 8, "ref_bits": 8, "config": { "default": ["parameters", "activations"] } } } self.target = forgiving_factor[goal["type"]](**goal["params"]) self.model = model self.metrics = metrics self.custom_objects = custom_objects self.mode = mode self.transfer_weights = transfer_weights self.activation_bits = activation_bits self.limit = limit self.tune_filters = tune_filters self.tune_filters_exceptions = tune_filters_exceptions self.layer_indexes = layer_indexes self.learning_rate_optimizer = learning_rate_optimizer self.blocks = blocks self.schedule_block = schedule_block self.quantization_config = quantization_config self.tuner_kwargs = tuner_kwargs self.debug = debug self.head_name = head_name self.autoqk = None self.learning_rate = model.optimizer.lr.numpy() self.overwrite = overwrite assert self.schedule_block in ["sequential", "cost"] # right now we create unique results directory idx = 0 name = output_dir if self.overwrite: while os.path.exists(name): idx += 1 name = output_dir + "_" + str(idx) output_dir = name self.output_dir = output_dir self.next_block = self.get_next_block(overwrite) if self.next_block > 0: strategy = self.tuner_kwargs.get("distribution_strategy", None) if strategy: with strategy.scope(): self.model = tf.keras.models.load_model( os.path.join( self.output_dir, "model_block_" + str(self.next_block - 1)), custom_objects=self.custom_objects) else: self.model = tf.keras.models.load_model( os.path.join( self.output_dir, "model_block_" + str(self.next_block - 1)), custom_objects=self.custom_objects) print("Load model completed") def get_next_block(self, overwrite): """Get the next block id to be worked on.""" if overwrite: return 0 else: try: with tf.io.gfile.GFile(os.path.join(self.output_dir, "scheduler.json"), "r") as f: scheduler_json = f.read() scheduler = json.loads(scheduler_json) return scheduler["next_block"] except: # pylint: disable=bare-except return 0 def get_limit(self, model, pattern): """Apply patterned group to limit to obtain new limit set.""" limit = self.limit new_limit = {} new_pattern = collections.defaultdict(list) for layer_name in self.grouped_patterns[pattern]: layer = model.get_layer(layer_name) layer_class_name = layer.__class__.__name__ target_quantizers = limit.get(layer_class_name, -1) for limit_pattern in limit: if re.match(limit_pattern, layer_name): target_quantizers = limit[limit_pattern] new_pattern[limit_pattern].append(layer_name) layer_name = limit_pattern break if target_quantizers != -1: new_limit[layer_name] = target_quantizers for key in new_pattern: # grouped pattern in regex need to be ^(word1|word2|...)$ instead of # ^word1|word2|...$; otherwise it cause non-exact match, # e.g., fc.*_0 and fc.*_0_relu were miss-matched new_key = "^" + "(" + "|".join(new_pattern[key]) + ")" + "$" new_limit[new_key] = new_limit[key] if new_key != key: del new_limit[key] return new_limit def fit(self, *fit_args, **fit_kwargs): """Invokes tuner fit algorithm.""" self.history = [] self.compute_block_costs(self.blocks, self.model) if self.tuner_kwargs.get("max_trials", None): max_trials = float(self.tuner_kwargs["max_trials"]) lr = self.model.optimizer.lr.numpy() model = self.model frozen_layers = [] for i, (pattern, cost) in enumerate(self.retrieve_max_block()): # now create new limit pattern if not self.overwrite: if i < self.next_block: print("Resume tuning. Skipping block ", i) continue print("... block cost: {:.0f} / {:.0f}".format(cost, self.reference_size)) if self.tuner_kwargs.get("max_trials", None): self.tuner_kwargs["max_trials"] = int( max(10, max_trials * cost / self.reference_size)) print("... adjusting max_trials for this block to {}".format( self.tuner_kwargs["max_trials"])) limit = self.get_limit(model, pattern) new_frozen_layers = self.grouped_patterns[pattern] # if dictionary is empty we did not match anything. # we have a bug in the patterns specified by the # user. assert limit print("Pattern {} is : {}".format(i, limit)) if self.debug: frozen_layers = frozen_layers + new_frozen_layers continue self.autoqk = AutoQKeras( model, self.metrics, custom_objects=self.custom_objects, goal=self.target, output_dir=self.output_dir + "/" + str(i), mode=self.mode, transfer_weights=self.transfer_weights, frozen_layers=frozen_layers, activation_bits=self.activation_bits, limit=limit, tune_filters=self.tune_filters, tune_filters_exceptions=self.tune_filters_exceptions, layer_indexes=self.layer_indexes, learning_rate_optimizer=self.learning_rate_optimizer, quantization_config=self.quantization_config, overwrite=self.overwrite, head_name=self.head_name, **self.tuner_kwargs) self.autoqk.fit(*fit_args, **fit_kwargs) self.autoqk.tuner.results_summary() self.history.append(self.autoqk.history()) model = self.autoqk.get_best_model() self.learning_rate = model.optimizer.lr.numpy() # restore learning rate # this is just a placeholder for the optimizer. model.compile( model.optimizer, loss=self.model.loss, metrics=self.model.metrics) frozen_layers = frozen_layers + new_frozen_layers filename = self.output_dir + "/model_block_" + str(i) model.save(filename) self.next_block = i + 1 # update scheduler json with tf.io.gfile.GFile(os.path.join(self.output_dir, "scheduler.json"), "w") as f: f.write(json.dumps({"next_block": self.next_block})) if self.debug: return self.best_model = model # make all layers trainable again for layer_name in frozen_layers: layer = model.get_layer(layer_name) layer.trainable = True def compute_block_costs(self, patterns, model): """Computes costs for each block.""" # get block cost for original model self.reference_size = self.target.get_reference(model) self.model_size = self.target.get_reference_stats() # first group layers into the patterns groups = {pattern: [] for pattern in patterns} for layer_id, layer in enumerate(model.layers): if ( self.layer_indexes is not None and layer_id not in self.layer_indexes ): continue for pattern in groups: if re.match(pattern, layer.name): groups[pattern].append(layer.name) self.grouped_patterns = groups # now compute cost for each group self.costs = [] for pattern in patterns: # self.grouped_patterns: total = 0 for layer in self.grouped_patterns[pattern]: if layer in self.model_size: total += self.model_size[layer]["total"] self.costs.append((pattern, total)) # the costs will be sorted by the total cost of the group if self.schedule_block == "cost": self.costs = sorted(self.costs, key=lambda cost_tuple: -cost_tuple[1]) def retrieve_max_block(self): for cost in self.costs: yield cost def get_history(self): """Returns the history of the model search.""" return self.history def get_best_model(self): """Returns the best model.""" # check if we have run fit first. if not self.autoqk: return None self.autoqk.hypermodel.target.print_stats() print_qmodel_summary(self.best_model) return self.best_model def get_learning_rate(self): return self.learning_rate ================================================ FILE: qkeras/autoqkeras/examples/run/get_data.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Extracts sample dataset from tfds.""" import numpy as np from tensorflow.keras.utils import to_categorical import tensorflow_datasets as tfds def get_data(dataset_name, fast=False): """Returns dataset from tfds.""" ds_train = tfds.load(name=dataset_name, split="train", batch_size=-1) ds_test = tfds.load(name=dataset_name, split="test", batch_size=-1) dataset = tfds.as_numpy(ds_train) x_train, y_train = dataset["image"].astype(np.float32), dataset["label"] dataset = tfds.as_numpy(ds_test) x_test, y_test = dataset["image"].astype(np.float32), dataset["label"] if len(x_train.shape) == 3: x_train = x_train.reshape(x_train.shape + (1,)) x_test = x_test.reshape(x_test.shape + (1,)) x_train /= 256.0 x_test /= 256.0 x_mean = np.mean(x_train, axis=0) x_train -= x_mean x_test -= x_mean nb_classes = np.max(y_train) + 1 y_train = to_categorical(y_train, nb_classes) y_test = to_categorical(y_test, nb_classes) print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") if fast: i_train = np.arange(x_train.shape[0]) np.random.shuffle(i_train) i_test = np.arange(x_test.shape[0]) np.random.shuffle(i_test) s_x_train = x_train[i_train[0:fast]] s_y_train = y_train[i_train[0:fast]] s_x_test = x_test[i_test[0:fast]] s_y_test = y_test[i_test[0:fast]] return ((s_x_train, s_y_train), (x_train, y_train), (s_x_test, s_y_test), (x_test, y_test)) else: return (x_train, y_train), (x_test, y_test) if __name__ == "__main__": get_data("mnist") get_data("fashion_mnist") get_data("cifar10", fast=1000) get_data("cifar100") ================================================ FILE: qkeras/autoqkeras/examples/run/get_model.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from qkeras.autoqkeras.examples.run.networks import ConvBlockNetwork # pylint: disable=line-too-long def get_model(dataset): """Returns a model for the demo of AutoQKeras.""" if dataset == "mnist": model = ConvBlockNetwork( shape=(28, 28, 1), nb_classes=10, kernel_size=3, filters=[16, 32, 48, 64, 128], dropout_rate=0.2, with_maxpooling=False, with_batchnorm=True, kernel_initializer="he_uniform", bias_initializer="zeros", ).build() elif dataset == "fashion_mnist": model = ConvBlockNetwork( shape=(28, 28, 1), nb_classes=10, kernel_size=3, filters=[16, [32]*3, [64]*3], dropout_rate=0.2, with_maxpooling=True, with_batchnorm=True, use_separable="mobilenet", kernel_initializer="he_uniform", bias_initializer="zeros", use_xnornet_trick=True ).build() elif dataset == "cifar10": model = ConvBlockNetwork( shape=(32, 32, 3), nb_classes=10, kernel_size=3, filters=[16, [32]*3, [64]*3, [128]*3], dropout_rate=0.2, with_maxpooling=True, with_batchnorm=True, use_separable="mobilenet", kernel_initializer="he_uniform", bias_initializer="zeros", use_xnornet_trick=True ).build() elif dataset == "cifar100": model = ConvBlockNetwork( shape=(32, 32, 3), nb_classes=100, kernel_size=3, filters=[16, [32]*3, [64]*3, [128]*3, [256]*3], dropout_rate=0.2, with_maxpooling=True, with_batchnorm=True, use_separable="mobilenet", kernel_initializer="he_uniform", bias_initializer="zeros", use_xnornet_trick=True ).build() model.summary() return model ================================================ FILE: qkeras/autoqkeras/examples/run/networks/__init__.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from .conv_block import ConvBlockNetwork ================================================ FILE: qkeras/autoqkeras/examples/run/networks/conv_block.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import os from tensorflow.initializers import * # pylint: disable=wildcard-import from tensorflow.keras.layers import Activation from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import Dense from tensorflow.keras.layers import DepthwiseConv2D from tensorflow.keras.layers import Dropout from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.layers import MaxPooling2D from tensorflow.keras.models import Model from tensorflow.keras.optimizers import * # pylint: disable=wildcard-import from qkeras import * # pylint: disable=wildcard-import class ConvBlockNetwork(object): """Creates Convolutional block type of network.""" def __init__( self, shape, nb_classes, kernel_size, filters, dropout_rate=0.0, with_maxpooling=True, with_batchnorm=True, kernel_initializer="he_normal", bias_initializer="zeros", use_separable=False, use_xnornet_trick=False ): """Creates class. Args: shape: shape of inputs. nb_classes: number of output classes. kernel_size: kernel_size of network. filters: sizes of filters (if entry is a list, we create a block). dropout_rate: dropout rate if > 0. with_maxpooling: if true, use maxpooling. with_batchnorm: with BatchNormalization. kernel_initializer: kernel_initializer. bias_initializer: bias and beta initializer. use_separable: if "dsp", do conv's 1x3 + 3x1. If "mobilenet", use MobileNet separable convolution. If False or "none", perform single conv layer. use_xnornet_trick: use bn+act after max pool to enable binary to avoid saturation to largest value. """ self.shape = shape self.nb_classes = nb_classes self.kernel_size = kernel_size self.filters = filters self.dropout_rate = dropout_rate self.with_maxpooling = with_maxpooling self.with_batchnorm = with_batchnorm self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer self.use_separable = use_separable self.use_xnornet_trick = use_xnornet_trick def build(self): """Builds model.""" x = x_in = Input(self.shape, name="input") for i in range(len(self.filters)): if len(self.filters) > 1: name_suffix_list = [str(i)] else: name_suffix_list = [] if not isinstance(self.filters[i], list): filters = [self.filters[i]] else: filters = self.filters[i] for j in range(len(filters)): if len(filters) > 1: name_suffix = "_".join(name_suffix_list + [str(j)]) else: name_suffix = "_".join(name_suffix_list) if self.use_separable == "dsp": kernels = [(1, self.kernel_size), (self.kernel_size, 1)] else: kernels = [(self.kernel_size, self.kernel_size)] for k, kernel in enumerate(kernels): strides = 1 if ( not self.with_maxpooling and j == len(filters)-1 and k == len(kernels)-1 ): strides = 2 if self.use_separable == "dsp": kernel_suffix = ( "".join([str(k) for k in kernel]) + "_" + name_suffix) elif self.use_separable == "mobilenet": depth_suffix = ( "".join([str(k) for k in kernel]) + "_" + name_suffix) kernel_suffix = "11_" + name_suffix else: kernel_suffix = name_suffix if self.use_separable == "mobilenet": x = DepthwiseConv2D( kernel, padding="same", strides=strides, use_bias=False, name="conv2d_dw_" + depth_suffix)(x) if self.with_batchnorm: x = BatchNormalization(name="conv2d_dw_bn_" + depth_suffix)(x) x = Activation("relu", name="conv2d_dw_act_" + depth_suffix)(x) kernel = (1, 1) strides = 1 x = Conv2D( filters[j], kernel, strides=strides, use_bias=not self.with_batchnorm, padding="same", kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, name="conv2d_" + kernel_suffix)(x) if not ( self.with_maxpooling and self.use_xnornet_trick and j == len(filters)-1 and k == len(kernels)-1 ): if self.with_batchnorm: x = BatchNormalization( beta_initializer=self.bias_initializer, name="bn_" + kernel_suffix)(x) x = Activation("relu", name="act_" + kernel_suffix)(x) if self.with_maxpooling: x = MaxPooling2D(2, 2, name="mp_" + name_suffix)(x) # this is a trick from xnornet to enable full binary or ternary # networks to be after maxpooling. if self.use_xnornet_trick: x = BatchNormalization( beta_initializer=self.bias_initializer, name="mp_bn_" + name_suffix)(x) x = Activation("relu", name="mp_act_" + name_suffix)(x) if self.dropout_rate > 0: x = Dropout(self.dropout_rate, name="drop_" + name_suffix)(x) if x.shape.as_list()[1] > 1: x = Flatten(name="flatten")(x) x = Dense( self.nb_classes, kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, name="dense")(x) x = Activation("softmax", name="softmax")(x) else: x = Conv2D( self.nb_classes, 1, strides=1, padding="same", kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, name="dense")(x) x = Activation("softmax", name="softmax")(x) x = Flatten(name="flatten")(x) model = Model(inputs=[x_in], outputs=[x]) return model if __name__ == "__main__": import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "" model = ConvBlockNetwork( shape=(64, 64, 1), nb_classes=10, kernel_size=3, filters=[16, [32]*3, 48, 64, 128], dropout_rate=0.0, with_maxpooling=False, with_batchnorm=True, use_separable="mobilenet", use_xnornet_trick=True ).build() model.summary() ================================================ FILE: qkeras/autoqkeras/examples/run/plot_history.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Plots history of runs when running in scheduler mode.""" import glob import matplotlib.pyplot as plt import numpy as np import pandas as pd filenames = glob.glob("log_*.csv") filenames.sort() block_sizes = int(np.ceil(np.sqrt(len(filenames)))) for i in range(len(filenames)): history = pd.read_csv(filenames[i]) title = "block_" + str(i) fig = plt.subplot(block_sizes, block_sizes, i + 1, title=title) ax1 = fig ax1.set_xlabel("trial") ax1.set_ylabel("score / accuracy") plt1 = ax1.plot(history["score"], "ro-", label="score") plt2 = ax1.plot(history["accuracy"], "go-", label="accuracy") plt3 = ax1.plot(history["val_accuracy"], "bo-", label="val_accuracy") ax2 = ax1.twinx() ax2.set_ylabel("energy", color="m") plt4 = ax2.plot(history["trial_size"], "mo-", label="trial_size") plts = plt1+plt2+plt3+plt4 labs = [l.get_label() for l in plts] ax1.legend(plts, labs, loc=0) plt.show() ================================================ FILE: qkeras/autoqkeras/forgiving_metrics/__init__.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from .forgiving_bits import ForgivingFactorBits from .forgiving_energy import ForgivingFactorPower from .forgiving_factor import ForgivingFactor forgiving_factor = { "bits": ForgivingFactorBits, "energy": ForgivingFactorPower } ================================================ FILE: qkeras/autoqkeras/forgiving_metrics/forgiving_bits.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements forgiving factor metrics bit model size in bits.""" import numpy as np import six from qkeras.autoqkeras.forgiving_metrics.forgiving_factor import ForgivingFactor # pylint: disable=line-too-long from qkeras import get_quantizer class ForgivingFactorBits(ForgivingFactor): """Implements forgiving factor with target as number of bits.""" def __init__( self, delta_p, delta_n, rate, stress=1.0, input_bits=8, output_bits=8, ref_bits=8, config=None): self.stress = stress self.input_bits = input_bits self.output_bits = output_bits self.ref_bits = ref_bits self.ref_size = {} self.config = config if config else {} super().__init__(delta_p, delta_n, rate) def _param_size(self, layer): """Computes size of parameters of a layer in bits.""" t_size = self.ref_bits parameter_size = 0 # we only compute parameter sizes for these layers, and BatchNormalization # is a special case because it exports mean and beta that is absorbed by # previous or next layer. As mean and beta will be compressed into a single # value, we actually only need to take care of the shape. if layer.__class__.__name__ in [ "Dense", "Conv2D", "Conv1D", "DepthwiseConv2D"]: for w in layer.get_weights(): parameter_size += t_size * np.prod(w.shape) elif layer.__class__.__name__ in [ "QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D"]: for i, w in enumerate(layer.get_weights()): if layer.get_quantizers()[i]: bits = layer.get_quantizers()[i].bits else: bits = t_size parameter_size += bits * np.prod(w.shape) elif layer.__class__.__name__ in ["BatchNormalization"]: # scale index = -1 parameter_size += t_size * np.prod(layer.get_weights()[index].shape) # center (bias) if layer.center: index = int(bool(layer.scale)) parameter_size += t_size * np.prod(layer.get_weights()[index].shape) elif layer.__class__.__name__ in ["QBatchNormalization"]: # scale index = -1 bits = 6 parameter_size += bits * np.prod(layer.get_weights()[index].shape) # center (bias) if layer.center: bits = 5 index = int(bool(layer.scale)) parameter_size += bits * np.prod(layer.get_weights()[index].shape) return parameter_size def _act_size(self, layer): """Computes size of activations of a layer in bits.""" i_size = self.input_bits o_size = self.output_bits t_size = self.ref_bits output_size = np.prod(layer.output.shape[1:]) # we compute activation sizes for inputs and outputs if layer.__class__.__name__ in ["InputLayer"]: return i_size * output_size elif layer.__class__.__name__ in [ "Dense", "Conv2D", "Conv1D", "DepthwiseConv2D"]: if layer.activation is not None and layer.activation.__name__ != "linear": return t_size * output_size else: return 0 elif layer.__class__.__name__ in [ "QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D"]: if layer.activation is None: is_softmax = False is_linear = False else: if isinstance(layer.activation, six.string_types): is_softmax = layer.activation == "softmax" is_linear = layer.activation == "linear" elif hasattr(layer.activation, "__name__"): is_softmax = layer.activation.__name__ == "softmax" is_linear = layer.activation.__name__ == "linear" else: is_softmax = False is_linear = False if is_softmax: bits = o_size elif is_linear: bits = 0 else: assert not isinstance(layer.activation, six.string_types) if hasattr(layer.activation, "bits"): bits = layer.activation.bits else: bits = t_size return bits * np.prod(layer.output.shape.as_list()[1:]) elif layer.__class__.__name__ in ["QActivation", "Activation"]: if isinstance(layer.activation, six.string_types): is_linear = layer.activation == "linear" is_softmax = layer.activation == "softmax" is_sigmoid = layer.activation == "sigmoid" else: is_linear = layer.activation.__name__ == "linear" is_softmax = layer.activation.__name__ == "softmax" is_sigmoid = layer.activation.__name__ == "sigmoid" if is_linear: bits = 0 elif is_softmax or is_sigmoid: bits = o_size else: if isinstance(layer.activation, six.string_types): activation = get_quantizer(layer.activation) else: activation = layer.activation if hasattr(activation, "bits"): bits = activation.bits else: bits = t_size return bits * output_size return 0 def compute_model_size(self, model): """Computes size of model.""" a_size = 0 p_size = 0 total_size = 0 model_size_dict = {} for layer in model.layers: layer_name = layer.__class__.__name__ layer_config = self.config.get( layer_name, self.config.get("default", None)) if layer_config: parameters = self._param_size(layer) activations = self._act_size(layer) p_weight = ("parameters" in layer_config) a_weight = ("activations" in layer_config) total = p_weight * parameters + a_weight * activations model_size_dict[layer.name] = { "parameters": parameters, "activations": activations, "total": total } a_size += a_weight * activations p_size += p_weight * parameters total_size += total return (total_size, p_size, a_size, model_size_dict) def get_reference(self, model): if not hasattr(self, "reference_size"): cached_result = self.compute_model_size(model) self.reference_size = cached_result[0] * self.stress self.ref_p = cached_result[1] self.ref_a = cached_result[2] self.reference_size_dict = cached_result[3] return self.reference_size def get_reference_stats(self): return self.reference_size_dict def get_trial(self, model): """Computes size of quantization trial.""" result = self.compute_model_size(model) self.trial_size = result[0] self.total_p_bits = result[1] self.total_a_bits = result[2] self.trial_size_dict = result[3] return self.trial_size def get_total_factor(self): """we adjust the learning rate by size reduction.""" ref_total = self.ref_a + self.ref_p trial_total = self.total_a_bits + self.total_p_bits return (trial_total - ref_total) / ref_total def print_stats(self): """Prints statistics of current model.""" str_format = ( "stats: delta_p={} delta_n={} rate={} trial_size={} reference_size={}\n" " delta={:.2f}%" ) print( str_format.format( self.delta_p, self.delta_n, self.rate, self.trial_size, int(self.reference_size), 100*self.delta()) ) a_percentage = np.round( 100.0 * (self.total_a_bits - self.ref_a) / self.ref_a, 2) p_percentage = np.round( 100.0 * (self.total_p_bits - self.ref_p) / self.ref_p, 2) ref_total = self.ref_a + self.ref_p trial_total = self.total_a_bits + self.total_p_bits total_percentage = np.round( 100.0 * (trial_total - ref_total) / ref_total, 2) print( ( " a_bits={}/{} ({:.2f}%) p_bits={}/{} ({:.2f}%)\n" " total={}/{} ({:.2f}%)" ).format( int(self.total_a_bits), int(self.ref_a), a_percentage, int(self.total_p_bits), int(self.ref_p), p_percentage, int(trial_total), int(ref_total), total_percentage )) ================================================ FILE: qkeras/autoqkeras/forgiving_metrics/forgiving_energy.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements forgiving factor metrics for energy consumption.""" import json import numpy as np from qkeras.autoqkeras.forgiving_metrics.forgiving_factor import ForgivingFactor # pylint: disable=line-too-long from qkeras.qtools import run_qtools from qkeras.qtools import settings as qtools_settings class ForgivingFactorPower(ForgivingFactor): """Get Power cost of a given model.""" def __init__(self, delta_p, delta_n, rate, stress=1.0, **kwargs): # input parameters: # delta_p, delta_n, rate: same as parent class # stress: stress level to shift reference curve # process: technology process to use in configuration (horowitz, ...) # parameters_on_memory: whether to store parameters in dram, sram, or fixed # activations_on_memory: store activations in dram, sram # min_sram_size: minimum sram size in number of bits # rd_wr_on_io: whether load data from dram to sram (consider sram as a cache # for dram. If false, we will assume data will be already in SRAM # config_json: if None, use qtools/config_json by default # define default source quantizers; # default quantizers for intermediate variables if no quantizer provided # parameters for energy calculation # source_quantizers: quantizer for model input # trained_model: whether model has been trained already, which is # needed to compute tighter bounds for qBatchNorm Power estimation. # reference_internal: size to use for weight/bias/activation in # get_reference energy calculation (int8, fp16, fp32) # reference_accumulator: accumulator and multiplier type in get_reference # energy calculation # keras_layer_quantizer: quantizer for keras layers in hybrid models super().__init__(delta_p, delta_n, rate) self.stress = stress # process: horowitz... - must be present in config_json self.process = kwargs.get("process", "horowitz") # parameters_on_memory: fixed, sram, dram self.parameters_on_memory = kwargs.get( "parameters_on_memory", ["fixed"] * 2) # activations_on_memory: sram, dram self.activations_on_memory = kwargs.get( "activations_on_memory", ["dram"] * 2 ) self.min_sram_size = kwargs.get("min_sram_size", [0] * 2) # rd_wr_on_io: true/false self.rd_wr_on_io = kwargs.get("rd_wr_on_io", [True] * 2) self.config_json = kwargs.get("config_json", None) self.source_quantizers = kwargs.get("source_quantizers", None) # trained_model: true/false self.trained_model = kwargs.get("trained_model", False) # reference_internal: int8, fp16, fp32 self.reference_internal = kwargs.get("reference_internal", "fp32") # reference_internal: int8, int16, int32, fp16, fp32 self.reference_accumulator = kwargs.get("reference_accumulator", "fp32") self.reference_size = None # energy_dict is a dictionary that lists energy consumption for each layer # format: # { # "layer0_name": # { # "mem_cost": 148171, # "op_cost": 0 # }, # "layer1_name": # { # "mem_cost": 179923, # "op_cost": 34 # }, # ... # # "total_cost": 328129 # } self.ref_energy_dict = None self.trial_energy_dict = None assert self.parameters_on_memory[0] in ["dram", "sram", "fixed"] assert self.parameters_on_memory[1] in ["dram", "sram", "fixed"] assert self.activations_on_memory[0] in ["dram", "sram", "fixed"] assert self.activations_on_memory[1] in ["dram", "sram", "fixed"] assert self.reference_internal in ["fp16", "fp32", "int8"] assert self.reference_accumulator in ["int16", "int32", "fp16", "fp32"] def get_reference(self, model): # we only want to compute reference once if self.reference_size is not None: return self.reference_size * self.stress q = run_qtools.QTools( model, process=self.process, source_quantizers=self.reference_internal, is_inference=self.trained_model, weights_path=None, keras_quantizer=self.reference_internal, keras_accumulator=self.reference_accumulator, for_reference=True) energy_dict = q.pe( weights_on_memory=self.parameters_on_memory[0], activations_on_memory=self.activations_on_memory[0], min_sram_size=self.min_sram_size[0], rd_wr_on_io=self.rd_wr_on_io[0]) self.ref_energy_dict = energy_dict self.reference_size = q.extract_energy_sum( qtools_settings.cfg.include_energy, energy_dict) self.reference_energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, energy_dict) return self.reference_size * self.stress def get_trial(self, model): """Computes size of quantization trial.""" q = run_qtools.QTools( model, process=self.process, source_quantizers=self.source_quantizers, is_inference=self.trained_model, weights_path=None, keras_quantizer=self.reference_internal, keras_accumulator=self.reference_accumulator, for_reference=False) energy_dict = q.pe( weights_on_memory=self.parameters_on_memory[1], activations_on_memory=self.activations_on_memory[1], min_sram_size=self.min_sram_size[1], rd_wr_on_io=self.rd_wr_on_io[1]) self.trial_energy_dict = energy_dict # self.trial_size = energy_dict["total_cost"] self.trial_size = q.extract_energy_sum( qtools_settings.cfg.include_energy, energy_dict) self.trial_energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, energy_dict) return self.trial_size def get_total_factor(self): """we adjust the learning rate by size reduction.""" return (self.trial_size - self.reference_size) / self.reference_size def get_reference_stats(self): return self.reference_energy_profile def get_trial_stats(self): return self.trial_energy_profile def print_stats(self, verbosity=0): """Prints statistics of current model.""" delta = self.delta() if (self.ref_energy_dict and self.trial_energy_dict): str_format = ( "stats: delta_p={} delta_n={} rate={} trial_size={} " "reference_size={}\n" " delta={:.2f}%" ) print( str_format.format( self.delta_p, self.delta_n, self.rate, self.trial_size, int(self.reference_size), 100 * delta) ) if verbosity > 0 and self.ref_energy_dict: print("Reference Cost Distribution:") dict_to_json = json.dumps(self.ref_energy_dict, indent=4) print(dict_to_json) if verbosity > 0 and self.trial_energy_dict: print("Trial Cost Distribution:") dict_to_json = json.dumps(self.trial_energy_dict, indent=4) print(dict_to_json) if (self.ref_energy_dict and self.trial_energy_dict): print("Total Cost Reduction:") reduction_percentage = np.round( 100.0 * (self.trial_size - self.reference_size) / self.reference_size, 2) print( (" {} vs {} ({:.2f}%)").format( int(self.trial_size), int(self.reference_size), reduction_percentage )) ================================================ FILE: qkeras/autoqkeras/forgiving_metrics/forgiving_factor.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements forgiving factor metrics.""" import numpy as np class ForgivingFactor: """Base class. Should never be invoked.""" def __init__(self, delta_p, delta_n, rate): self.delta_p = np.float32(delta_p) / 100.0 self.delta_n = np.float32(delta_n) / 100.0 self.rate = np.float32(rate) def get_reference(self, model): """Computes reference size of model.""" raise Exception("class not implemented.") def get_trial(self, model, schema): """Computes size of quantization trial.""" raise Exception("class not implemented.") def delta(self): return np.where( self.trial_size < self.reference_size, self.delta_p * (np.log(self.reference_size/self.trial_size) / np.log(self.rate)), self.delta_n * (np.log(self.reference_size/self.trial_size) / np.log(self.rate))) ================================================ FILE: qkeras/autoqkeras/quantization_config.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Definition of default quantization configuration.""" default_quantization_config = { "kernel": { "binary": 1, "stochastic_binary": 1, "ternary": 2, "stochastic_ternary": 2, "quantized_bits(2,1,1,alpha=1.0)": 2, "quantized_bits(4,0,1)": 4, "quantized_bits(8,0,1)": 8, "quantized_po2(4,1)": 4 }, "bias": { "quantized_bits(4,0,1)": 4, "quantized_bits(8,3,1)": 8, "quantized_po2(4,8)": 4 }, "activation": { "binary": 1, "binary(alpha='auto_po2')": 1, "ternary": 2, "quantized_relu(3,1)": 3, "quantized_relu(4,2)": 4, "quantized_relu(8,2)": 8, "quantized_relu(8,4)": 8, "quantized_relu(16,8)": 16, "quantized_relu_po2(4,4)": 4 }, "linear": { "binary": 1, "ternary": 2, "quantized_bits(4,1)": 4, "quantized_bits(8,2)": 8, "quantized_bits(16,10)": 16, "quantized_po2(6,4)": 6 } } ================================================ FILE: qkeras/autoqkeras/tests/test_forgiving_factor.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import pytest from tensorflow.keras.layers import * # pylint: disable=wildcard-import from tensorflow.keras.models import Model from qkeras import * # pylint: disable=wildcard-import from qkeras.autoqkeras.forgiving_metrics import ForgivingFactorBits # pylint: disable=line-too-long from qkeras.utils import model_quantize def get_model(): """Returns sample model.""" xi = Input((28, 28, 1), name="input") # pylint: disable=undefined-variable x = Conv2D(32, 3, strides=1, padding="same", name="c1")(xi) # pylint: disable=undefined-variable x = BatchNormalization(name="b1")(x) # pylint: disable=undefined-variable x = Activation("relu", name="a1")(x) # pylint: disable=undefined-variable x = MaxPooling2D(2, 2, name="mp1")(x) # pylint: disable=undefined-variable x = QConv2D(32, 3, kernel_quantizer="binary", bias_quantizer="binary", # pylint: disable=undefined-variable strides=1, padding="same", name="c2")(x) x = QBatchNormalization(name="b2")(x) # pylint: disable=undefined-variable x = QActivation("binary", name="a2")(x) # pylint: disable=undefined-variable x = MaxPooling2D(2, 2, name="mp2")(x) # pylint: disable=undefined-variable x = QConv2D(32, 3, kernel_quantizer="ternary", bias_quantizer="ternary", # pylint: disable=undefined-variable strides=1, padding="same", activation="binary", name="c3")(x) x = Flatten(name="flatten")(x) # pylint: disable=undefined-variable x = Dense(1, name="dense", activation="softmax")(x) # pylint: disable=undefined-variable model = Model(inputs=xi, outputs=x) return model def test_forgiving_factor_bits(): """Tests forgiving factor bits.""" delta_p = 8.0 delta_n = 8.0 rate = 2.0 stress = 1.0 input_bits = 8 output_bits = 8 ref_bits = 8 config = { "QDense": ["parameters", "activations"], "Dense": ["parameters", "activations"], "QConv2D": ["parameters", "activations"], "Conv2D": ["parameters", "activations"], "DepthwiseConv2D": ["parameters", "activations"], "QDepthwiseConv2D": ["parameters", "activations"], "Activation": ["activations"], "QActivation": ["activations"], "QBatchNormalization": ["parameters"], "BatchNormalization": ["parameters"], "default": ["activations"], } model = get_model() ffb = ForgivingFactorBits( delta_p, delta_n, rate, stress, input_bits, output_bits, ref_bits, config ) cached_result = ffb.compute_model_size(model) ref_size = cached_result[0] ref_p = cached_result[1] ref_a = cached_result[2] assert ref_size == 258544 assert ref_p == 43720 assert ref_a == 214824 def test_new_forgiving_factor(): """Tests forgiving factor.""" delta_p = 8.0 delta_n = 8.0 rate = 2.0 stress = 1.0 input_bits = 8 output_bits = 8 ref_bits = 8 config = { "QDense": ["parameters", "activations"], "Dense": ["parameters", "activations"], "QConv2D": ["parameters", "activations"], "Conv2D": ["parameters", "activations"], "DepthwiseConv2D": ["parameters", "activations"], "QDepthwiseConv2D": ["parameters", "activations"], "Activation": ["activations"], "QActivation": ["activations"], "QBatchNormalization": ["parameters"], "BatchNormalization": ["parameters"], "default": ["activations"] } model = get_model() model.use_legacy_config = True ffb = ForgivingFactorBits( delta_p, delta_n, rate, stress, input_bits, output_bits, ref_bits, config ) cached_result = ffb.compute_model_size(model) ref_size = cached_result[0] ref_p = cached_result[1] ref_a = cached_result[2] ref_size_dict = cached_result[3] assert ref_size == 258544 assert ref_p == 43720 assert ref_a == 214824 q_dict = { "c1": { "kernel_quantizer": "binary", "bias_quantizer": "quantized_bits(4)" } } q_model = model_quantize(model, q_dict, 4) cached_result = ffb.compute_model_size(q_model) trial_size_dict = cached_result[3] for name in trial_size_dict: if name != "c1": assert trial_size_dict[name] == ref_size_dict[name] assert trial_size_dict["c1"]["parameters"] == 416 if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: qkeras/autoqkeras/utils.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements utility functions for support of auto-quantization.""" import json import tensorflow as tf Q_SEQUENCE_LAYERS = ["QSimpleRNN", "QLSTM", "QGRU", "QBidirectional"] def print_qmodel_summary(q_model): """Prints quantized model summary.""" for layer in q_model.layers: if (layer.__class__.__name__ == "QActivation" or layer.__class__.__name__ == "QAdaptiveActivation"): print("{:20} {}".format(layer.name, str(layer.activation))) elif ( hasattr(layer, "get_quantizers") and layer.__class__.__name__ != "QBatchNormalization" ): print("{:20} ".format(layer.name), end="") if "Dense" in layer.__class__.__name__: print("u={} ".format(layer.units), end="") elif layer.__class__.__name__ in [ "Conv2D", "QConv2D", "Conv1D", "QConv1D", "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: print("f={} ".format(layer.filters), end="") quantizers = layer.get_quantizers() for q in range(len(quantizers)): if quantizers[q] is not None: print("{} ".format(str(quantizers[q])), end="") if hasattr(layer, "recurrent_activation"): print("recurrent act={}".format(layer.recurrent_activation), end="") if ( layer.activation is not None and not ( hasattr(layer.activation, "__name__") and layer.activation.__name__ == "linear" ) ): print("act={}".format(layer.activation), end="") print() elif layer.__class__.__name__ == "QBatchNormalization": print("{:20} QBN, mean={}".format(layer.name, str(tf.keras.backend.eval(layer.moving_mean))), end="") print() elif layer.__class__.__name__ == "BatchNormalization": print("{:20} is normal keras bn layer".format(layer.name), end="") print() print() def get_quantization_dictionary(q_model): """Returns quantization dictionary.""" q_dict = {} for layer in q_model.layers: if hasattr(layer, "get_quantization_config"): q_dict[layer.name] = layer.get_quantization_config() return q_dict def save_quantization_dict(fn, q_model): """Saves quantization dictionary as json object in disk.""" q_dict = get_quantization_dictionary(q_model) json_dict = json.dumps(q_dict) f = open(fn, "w") f.write(json_dict + "\n") f.close() ================================================ FILE: qkeras/b2t.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements total/partial Binary to Thermometer decoder.""" from tensorflow.keras.utils import to_categorical import numpy as np def BinaryToThermometer( x, classes, value_range, with_residue=False, merge_with_channels=False, use_two_hot_encoding=False): """Converts binary to one-hot (with scales). Given input matrix x with values (for example) 0, 1, 2, 3, 4, 5, 6, 7, create a number of classes as follows: classes=2, value_range=8, with_residue=0 A true one-hot representation, and the remaining bits are truncated, using one bit representation. 0 - [1,0] 1 - [1,0] 2 - [1,0] 3 - [1,0] 4 - [0,1] 5 - [0,1] 6 - [0,1] 7 - [0,1] classes=2, value_range=8, with_residue=1 In this case, the residue is added to the one-hot class, and the class will use 2 bits (for the remainder) + 1 bit (for the one hot) 0 - [1,0] 1 - [1.25,0] 2 - [1.5,0] 3 - [1.75,0] 4 - [0,1] 5 - [0,1.25] 6 - [0,1.5] 7 - [0,1.75] Arguments: x: the input vector we want to convert. typically its dimension will be (B,H,W,C) for an image, or (B,T,C) or (B,C) for for a 1D signal, where B=batch, H=height, W=width, C=channels or features, T=time for time series. classes: the number of classes to (or log2(classes) bits) to use of the values. value_range: max(x) - min(x) over all possible x values (e.g. for 8 bits, we would use 256 here). with_residue: if true, we split the value range into two sets and add the decimal fraction of the set to the one-hot representation for partial thermometer representation. merge_with_channels: if True, we will not create a separate dimension for the resulting matrix, but we will merge this dimension with the last dimension. use_two_hot_encoding: if true, we will distribute the weight between the current value and the next one to make sure the numbers will always be < 1. Returns: Converted x with classes with the last shape being C*classes. """ # just make sure we are processing floats so that we can compute fractional # values x = x.astype(np.float32) # the number of ranges are equal to the span of the original values # divided by the number of target classes. # # for example, if value_range is 256 and number of classes is 16, we have # 16 values (remaining 4 bits to redistribute). ranges = value_range/classes x_floor = np.floor(x / ranges) if use_two_hot_encoding: x_ceil = np.ceil(x / ranges) if with_residue: x_mod_f = (x - x_floor * ranges) / ranges # convert values to categorical. if use_two_hot_encoding, we may # end up with one more class because we need to distribute the # remaining bits to the saturation class. For example, if we have # value_range = 4 (0,1,2,3) and classes = 2, if we use_two_hot_encoding # we will have the classes 0, 1, 2, where for the number 3, we will # allocate 0.5 to bin 1 and 0.5 to bin 2 (namelly 3 = 0.5 * (2**2 + 2**1)). xc_f = to_categorical(x_floor, classes + use_two_hot_encoding) if with_residue: xc_f_m = xc_f == 1 if use_two_hot_encoding: xc_c = to_categorical(x_ceil, classes + use_two_hot_encoding) xc_c_m = xc_c == 1 if np.any(xc_c_m): xc_c[xc_c_m] = x_mod_f.reshape(xc_c[xc_c_m].shape) if np.any(xc_f_m): xc_f[xc_f_m] = (1.0 - x_mod_f.reshape(xc_f[xc_f_m].shape)) xc_f += xc_c else: if np.any(xc_f_m): xc_f[xc_f_m] += x_mod_f.reshape(xc_f[xc_f_m].shape) if merge_with_channels and len(xc_f.shape) != len(x.shape): sz = xc_f.shape sz = sz[:-2] + (sz[-2] * sz[-1],) xc_f = xc_f.reshape(sz) return xc_f ================================================ FILE: qkeras/base_quantizer.py ================================================ # Copyright 2025 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import tensorflow.compat.v2 as tf import tensorflow.keras.backend as K def _create_variable_name(attr_name, var_name=None): """Creates variable name. Arguments: attr_name: string. attribute name var_name: string. variable name Returns: string. variable name """ if var_name: return var_name + "/" + attr_name # This naming scheme is to solve a problem of a layer having more than # one quantizer can have multiple qnoise_factor variables with the same # name of "qnoise_factor". return attr_name + "_" + str(K.get_uid(attr_name)) class BaseQuantizer(tf.Module): """Base quantizer. Defines behavior all quantizers should follow. """ def __init__(self): self.built = False def build(self, var_name=None, use_variables=False): if use_variables: if hasattr(self, "qnoise_factor"): self.qnoise_factor = tf.Variable( lambda: tf.constant(self.qnoise_factor, dtype=tf.float32), name=_create_variable_name("qnoise_factor", var_name=var_name), dtype=tf.float32, trainable=False, ) self.built = True def _set_trainable_parameter(self): pass def update_qnoise_factor(self, qnoise_factor): """Update qnoise_factor.""" if isinstance(self.qnoise_factor, tf.Variable): # self.qnoise_factor is a tf.Variable. # This is to update self.qnoise_factor during training. self.qnoise_factor.assign(qnoise_factor) else: if isinstance(qnoise_factor, tf.Variable): # self.qnoise_factor is a numpy variable, and qnoise_factor is a # tf.Variable. self.qnoise_factor = qnoise_factor.eval() else: # self.qnoise_factor and qnoise_factor are numpy variables. # This is to set self.qnoise_factor before building # (creating tf.Variable) it. self.qnoise_factor = qnoise_factor # Override not to expose the quantizer variables. @property def variables(self): return () # Override not to expose the quantizer variables. @property def trainable_variables(self): return () # Override not to expose the quantizer variables. @property def non_trainable_variables(self): return () ================================================ FILE: qkeras/bn_folding_utils.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utility functions for folding batchnorm with qconv/qdense layers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import networkx as nx import tensorflow as tf from six.moves import range from tensorflow.keras.models import clone_model from tensorflow.keras.models import Model from tensorflow.keras import Input from .qconvolutional import QConv2D from .qconvolutional import QDepthwiseConv2D from .qtools import generate_layer_data_type_map as gen_map from .qtools import qgraph def convert_folded_layer_to_unfolded(layer): """Replace a source batchnorm folded layer with a non-folded layer. Args: layer: keras/qkeras layer type. Source layer to be replaced with Returns: new layer instance """ # get layer config from the composite layer config = layer.get_config() # set layer config for QConv2D layer by first creating a tmp # QConv2D object and generate template for its config if layer.__class__.__name__ == "QConv2DBatchnorm": new_layer = QConv2D(filters=1, kernel_size=(2, 2), use_bias=True) elif layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm": new_layer = QDepthwiseConv2D(kernel_size=(2, 2), use_bias=True) else: # TODO(lishanok): will extend to QDense in the future assert ValueError, "%s is not supported!" % layer.__class__.__name__ new_layer_cfg = new_layer.get_config() # set qconv2d config according to the values in the composite layer for (key, _) in new_layer_cfg.items(): if key in config.keys(): new_layer_cfg[key] = config[key] # in case use_bias is False in the composite layer, # we need to set it True because we have folded bias new_layer_cfg["use_bias"] = True # create a non-folded, e.g., qconv2d layer from config and replace # old layer with it if layer.__class__.__name__ == "QConv2DBatchnorm": new_layer = QConv2D.from_config(new_layer_cfg) elif layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm": new_layer = QDepthwiseConv2D.from_config(new_layer_cfg) else: raise ValueError("Unsupported layer conversion {}".format(layer.name)) return new_layer def unfold_model(model): """Convert a model with batchnorm folded layer to a normal model. "Normal" here refers to a model without composite folded layer such as QConv2DBatchnorm layer. This function replace the folded layers with a normal QConv/QDense layer. It aslo sets the weights in the normal layer with the folded weights in the folded layer. Model architecture could be either sequential or non-sequential. Arguments: model: keras object, model with folded layers. Returns: A model that replaces folded layers (e.g., QConv2DBatchnorm) with normal qkeras layers (e.g., QConv2D). This model can be passed on to hardware generator so that hardware doesn't see batch normalization parameters. """ def _convert_folded_layer(layer): if layer.__class__.__name__ in [ "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: new_layer = convert_folded_layer_to_unfolded(layer) else: new_layer = layer.__class__.from_config(layer.get_config()) new_layer.build(layer.input_shape) return new_layer def _clone_weights(src_layer, new_layer): if (src_layer.__class__.__name__ == "QConv2DBatchnorm") and ( new_layer.__class__.__name__ == "QConv2D"): src_weights = src_layer.get_folded_weights() # transfer weights from folded layer to the target layer folded_kernel_quantized = ( src_weights[0].numpy()) folded_bias_quantized = ( src_weights[1].numpy()) new_layer.set_weights([folded_kernel_quantized, folded_bias_quantized]) elif (src_layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm") and ( new_layer.__class__.__name__ == "QDepthwiseConv2D"): # transfer weights from folded layer to the target layer src_weights = src_layer.get_folded_weights() folded_depthwise_kernel_quantized = src_weights[0].numpy() folded_bias_quantized = src_weights[1].numpy() new_layer.set_weights( [folded_depthwise_kernel_quantized, folded_bias_quantized]) else: new_layer.set_weights(src_layer.get_weights()) inp = Input(shape=model.input_shape[1:]) cloned_model = clone_model( model, input_tensors=inp, clone_function=_convert_folded_layer) # replace weights for (src_layer, new_layer) in zip(model.layers, cloned_model.layers): _clone_weights(src_layer, new_layer) return cloned_model def populate_bias_quantizer_from_accumulator(model, source_quantizers): """Populate the bias quantizer from accumulator type. When user set bias_quantizer=None for layers(e.g., QConv2DBatchnorm), this function generates the accumulator type of the layer MAC op and set it as the bias quantizer. Such step is skipped if user provided a specific bias quantizer type. Args: model: keras/qkeras model object. If the model doesn't contain any batchnorm folded layer or if the bias quanizer type in the folded layer is already given, no operation needed. Else we generate the bias quantizer type and set it in model. source_quantizers: list of qkeras quantizers. A list of quantizer types for model inputs. Returns: keras model object """ default_quantizer = "quantized_bits(8, 0, 1)" # if source_quantizers is None, CreateGraph will use default_quantizer (graph, source_quantizer_list) = qgraph.CreateGraph( model, source_quantizers, default_quantizer) qgraph.GraphPropagateActivationsToEdges(graph) # generate the quantizer types of each layer. For folded layers, if bias # quantizer is not given by user, this function will generate the accumulator # type and set it as the bias quantizer type. is_inference = False keras_quantizer = "quantized_bits(8, 0, 1)" keras_accumulator = "quantized_bits(8, 0, 1)" for_reference = False layer_map = gen_map.generate_layer_data_type_map( graph, source_quantizer_list, is_inference, keras_quantizer, keras_accumulator, for_reference) for layer in model.layers: # TODO(lishanok): extend to other layer types if necessary if layer.__class__.__name__ in [ "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: if not layer.bias_quantizer: # if user didn't specify the bias quantizer, we set it as the # MAC accumulator type of the current layer's MAC operation qtools_bias_quantizer = layer_map["layer_data_type_map"][ layer].bias_quantizer if tf.is_tensor(qtools_bias_quantizer.int_bits): qtools_bias_quantizer.int_bits = ( qtools_bias_quantizer.int_bits.numpy()) layer.bias_quantizer = ( qtools_bias_quantizer.convert_to_qkeras_quantizer()) layer.bias_quantizer_internal = layer.bias_quantizer if layer.__class__.__name__ == "QConv2DBatchnorm": layer.quantizers = [layer.kernel_quantizer_internal, layer.bias_quantizer_internal] elif layer.__class__.__name__ == "QDepthwiseConv2DBatchnorm": layer.quantizers = [layer.depthwise_quantizer_internal, layer.bias_quantizer_internal] return model ================================================ FILE: qkeras/callbacks.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import os import tensorflow as tf class QNoiseScheduler(tf.keras.callbacks.Callback): """Schedules the gradual quantization noise training for each step (or epoch). It updates the qnoise_factor in the quantizers to gradually introduce the quantization noise during training. The idea was adopted from "https://arxiv.org/pdf/1903.01061.pdf" """ def __init__(self, start, finish, freq_type="epoch", update_freq=1, initial_step_or_epoch=0, exponent=3.0, use_ste=True, log_dir=None): """Initializes this QNoiseScheduler. Args: start: Int. The step (epoch) to start the gradual training. finish: Int. The step (epoch) to finish the gradual training. When the start and the finish are equal, the qnoise_factor will be 1.0 in the beginning of the training. freq_type: Str. "step" or "epoch". It sets the qnoise_factor update frequency type. update_freq: Int. Updating frequency of the qnoise_factor. initial_step_or_epoch: Int. Step or epoch at which to start training. exponent: Float. It is the exponent in the qnoise_factor calculation. It controls the rate of the gradual qnoise_factor change. use_ste: Bool. Whether to use "straight-through estimator" (STE) method or not. log_dir: Str. log directory to save qnoise_factor every epoch end. """ super().__init__() self.start = start self.finish = finish if start > finish: raise ValueError( ("start {} must be greater than finish {}").format(start, finish)) supported_freq_type = ["step", "epoch"] if freq_type not in supported_freq_type: raise ValueError(("Invalid frequency type {}. only {} are " "supported.").format(freq_type, supported_freq_type)) self.freq_type = freq_type self.update_freq = update_freq self.initial_step_or_epoch = initial_step_or_epoch self.exponent = exponent self.qnoise_factor = None self.use_ste = use_ste self.quantizers = None self.summary_writer = None if log_dir: self.summary_writer = tf.summary.create_file_writer(log_dir) self.num_iters = np.array(0, dtype="int64") def calculate_qnoise_factor(self, freq): """Returns calculated qnoise_factor based on the current step (epoch) and the schedule parameters. Args: freq: The current step (or epoch) to calculate the qnoise_factor. Returns: qnoise_factor : calculated qnoise_factor. """ if freq < self.start: qnoise_factor = 0.0 elif freq <= self.finish and self.start != self.finish: val = float(self.finish - freq) / float(self.finish - self.start) qnoise_factor = 1.0 - np.power(val, self.exponent) else: qnoise_factor = 1.0 return qnoise_factor def set_qnoise_factor(self, quantizer, qnoise_factor): """Set self.qnoise_factor and update the qnoise_factor of the quantizer.""" # Updating the qnoise_factor of the quantizer. quantizer.update_qnoise_factor(qnoise_factor) # Updating the qnoise_factor of the callback. self.qnoise_factor = qnoise_factor def set_quantizers(self): """Set quantizers to update the qnoise_factor. This must be called before building the quantizers. """ for quantizer in self.quantizers: if hasattr(quantizer, "use_ste"): quantizer.use_ste = self.use_ste if hasattr(quantizer, "use_variables"): quantizer.use_variables = True if hasattr(quantizer, "built"): # If the quantizer has been built but not using tf.Variable then it # builds again to create tf.Variables. if quantizer.built and not isinstance(quantizer.qnoise_factor, tf.Variable): quantizer.build(use_variables=True) # Set the qnoise_factor to 0.0 to pretrain without quantization. self.set_qnoise_factor(quantizer, qnoise_factor=0.0) def get_quantizers(self, model): """Returns a list of quantizers with qnoise_factor in the model. Args: model: model to get a list of quantizers with qnoise_factor. Returns: A list of quantizers with the qnoise_factor variable. """ all_quantizers = [] for layer in model.layers: # A list of attributes holding the quantizer(s). for attr in ["quantizers", "quantizer"]: if hasattr(layer, attr): quantizers = getattr(layer, attr) quantizers = quantizers if attr == "quantizers" else [quantizers] for quantizer in quantizers: if hasattr(quantizer, "qnoise_factor"): all_quantizers.append(quantizer) return all_quantizers def update_qnoise_factor(self, freq): """Update the qnoise_factor of the model. Args: freq: The current step (epoch) to calculate the qnoise_factor. """ # Update the qnoise_factor at the frequency of self.update_freq. if freq % self.update_freq != 0: self.num_iters += 1 return new_qnoise_factor = self.calculate_qnoise_factor(freq) for quantizer in self.quantizers: # Updates the qnoise factors of the quantizers in the model. self.set_qnoise_factor(quantizer, new_qnoise_factor) self.num_iters += 1 def on_train_begin(self, logs=None): if not self.quantizers: # Build a list of quantizers which is used for updating qnoise_factor. self.quantizers = self.get_quantizers(self.model) self.set_quantizers() def on_epoch_begin(self, epoch, logs=None): if self.freq_type == "epoch": self.update_qnoise_factor(self.initial_step_or_epoch + self.num_iters) def on_epoch_end(self, epoch, logs=None): if self.summary_writer: with self.summary_writer.as_default(): tf.summary.scalar("qnoise_factor", data=self.qnoise_factor, step=epoch) def on_train_batch_begin(self, batch, logs=None): if self.freq_type == "step": self.update_qnoise_factor(self.initial_step_or_epoch + self.num_iters) ================================================ FILE: qkeras/codebook.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Clustering based quantizers """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from sklearn.cluster import KMeans from tqdm import tqdm def create_in_out_table(km, quantizer): """Create [in, out] table needed to map compressed activations to codebook values. Given v: in_table[out_table[v]] => codebook value of v Arguments: km: KMeans model quantizer: quantizer function to apply to out_table Returns in_table: conversion of compressed table indexes to n-bit numbers out_table: conversion of n-bit output activations to compressed table indexes """ in_table = km.cluster_centers_.flatten() qrange = quantizer.range().reshape(-1, 1).astype(np.float32) out_table = km.predict(qrange).ravel() return in_table, out_table def activation_compression(model, compile_config, activation_indexes, bits, X_train, y_train, X_test, y_test, sample_size=1.0): """This function applies clustering based non-uniform quantization inspired by https://arxiv.org/pdf/1911.02079.pdf model: Keras model compile_config: Dictionary of arguments to be passed to model.compile() for all submodels activation_indexes: Index list of layers to be quantized. This will used to split the model and create submodels bits: Number of bits to compress activations to. This will results in 2**bits codebook values X_train, y_train: training data used to fit clustering algorithm X_test, y_test: validation data sample_size: fraction of training data activations to be used when computing codebook values Returns: cb_tables: [in, out] tables. See create_in_out_table docs models: list of keras submodels km_models: list of KMeans fitted models """ assert len(activation_indexes) > 0 assert 0.0 < sample_size <= 1.0 # n_init=10 maintains the same behavior as legacy versions of sklearn. This # was changed to "auto" in sklearn 1.4. km_models = [KMeans(2**bits, n_init=10)] * len(activation_indexes) cb_tables = [[]] * len(activation_indexes) models = [] x = x_in = model.layers[0].output for i in range(1, len(model.layers)): layer = model.layers[i] x = layer(x) if i in activation_indexes or i == len(model.layers) - 1: print("\nCreating submodel...") models.append(Model([x_in], [x])) x = x_in = Input(layer.output[0].shape, batch_size=layer.output.shape[0], dtype=layer.output.dtype) models[-1].compile(**compile_config) print(models[-1].summary()) print('\nsample_size: ', sample_size) x = X_train for i, model in enumerate(models[:-1]): print(f'fitting km[{i}]...') x = model.predict(x) km = km_models[i] temp = x.flatten().reshape(-1, 1) if sample_size < 1.0: idxs = np.random.choice(x.shape[0], size=int(sample_size * x.shape[0])) temp = temp[idxs] km.fit(temp) quantizer = getattr(model.layers[-1], 'quantizer', getattr(model.layers[-1], 'activation')) km.cluster_centers_ = quantizer(km.cluster_centers_).numpy() km.cluster_centers_.sort(axis=0) cb_tables[i] = create_in_out_table(km, quantizer) x = X_test for i, model in enumerate(models[:-1]): x = model.predict(x) km = km_models[i] preds = km.predict(x.flatten().reshape(-1, 1)) x = km.cluster_centers_[preds].reshape(x.shape) n_unique = np.unique(x.flatten()).shape[0] print(f"Number of unique activations: {n_unique}") assert n_unique <= 2**bits print('\nEvaluating...') models[-1].evaluate(x, y_test, verbose=2) return cb_tables, models, km_models def weight_compression(weights, bits, axis=0, quantizer=None): """Creates an in, out table that maps weight values to their codebook values. Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf Arguments: weights: Numpy array bits: Number of bits to compress weights to. This will results in 2**bits codebook values axis: axis to apply quantization by quantizer: quantizer function that will be applied to codebook values Returns: index_table: array of indices that maps to codebook values for all weights codebook_table: array of codebook values """ assert bits <= 8 n = 2**bits index_table = [] codebook_table = np.zeros((weights.shape[axis], n)) km_models = [None] * weights.shape[axis] for i, w in tqdm(enumerate(np.split(weights, weights.shape[axis], axis))): original_shape = w.shape w = w.ravel() km = KMeans(n, n_init=10) km.fit(w.reshape(-1, 1)) if quantizer: km.cluster_centers_ = quantizer(km.cluster_centers_).numpy() km.cluster_centers_.sort(axis=0) km_models[i] = km codebook_table[i, :] = km.cluster_centers_.flatten() preds = km.predict(w.reshape(-1, 1)) index_table.append(preds.reshape(original_shape)) index_table = np.concatenate(index_table, axis) return index_table, codebook_table def two_tier_embedding_compression(embeddings, bits, quantizer=None): """ Creates tables that maps embedding values to their codebook values. Based on the idea presented by https://arxiv.org/pdf/1911.02079.pdf Arguments: weights: Numpy array bits: Number of bits to compress weights to. This will results in 2**bits codebook values quantizer: quantizer function that will be applied to codebook values Returns: index_table: array of indices that maps to codebook values cluster_index_table: array that maps each row to the codebook table index codebook_table: array of codebook values quantized_embeddings: Numpy array MxN of quantized weights """ assert bits <= 8 n = 2**bits quantized_embeddings = embeddings.copy() index_table = np.zeros(embeddings.shape, dtype=np.uint8) cluster_index_table = np.zeros(index_table.shape[0], dtype=np.uint8) codebook_table = np.zeros((n, n)) km1 = KMeans(n, n_init=10) km1.fit(embeddings) tier1 = km1.predict(embeddings) km_models = [0] * n block_sizes = [0] * n for block_label in tqdm(range(n)): mask = block_label == tier1 indices = np.arange(embeddings.shape[0])[mask] block = embeddings[mask] km2 = KMeans(n, n_init=10) km2.fit(block.flatten().reshape(-1, 1)) if quantizer: km2.cluster_centers_ = quantizer(km2.cluster_centers_).numpy() km2.cluster_centers_.sort(axis=0) km_models[block_label] = km2 codebook_table[block_label, :] = km2.cluster_centers_.flatten() cluster_index_table[indices] = block_label block_sizes[block_label] = block.shape[0] for i in indices: preds = km2.predict(embeddings[i, :].reshape(-1, 1)) index_table[indices, :] = preds quantized_embeddings[i, :] = km2.cluster_centers_[preds].flatten() print('block_sizes:', block_sizes) return index_table, cluster_index_table, codebook_table, quantized_embeddings ================================================ FILE: qkeras/estimate.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Definition of quantization package.""" # Some parts of the code were taken from # # https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow # # and follows several papers. # # https://arxiv.org/pdf/1609.07061.pdf # from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import defaultdict import numpy as np import tensorflow.compat.v1 as tf from absl import logging from tensorflow.keras.layers import Activation from tensorflow.keras.layers import InputLayer from tensorflow.keras.models import Model from .qlayers import QActivation from .qlayers import QAdaptiveActivation from .qlayers import QDense from .qconvolutional import QConv1D from .qconvolutional import QConv2D from .qconvolutional import QDepthwiseConv2D from .qconvolutional import QSeparableConv2D from .qpooling import QAveragePooling2D from .quantizers import quantized_bits from .quantizers import quantized_relu from .quantizers import quantized_tanh from .quantizers import quantized_ulaw from .bn_folding_utils import unfold_model from .utils import get_model_sparsity def analyze_accumulator(in_model, x, verbose=False): """Analyzes the distribution of weights to specify size of accumulators. Computes the maximum number of bits for the accumulator assuming the inputs have a distribution given by the dictionary x. for each output channel i: max_positive_value[i] = sum(w[i]) + bias[i] for the positive weights max_negative_value[i] = sum(w[i]) + bias[i] for the negative weights max_value = max( max_positive_value[i] * positive(x) + max_negative_value[i] * negative(x), - (max_negative_value[i] * positive(x) + max_positive_value[i] * negative(x)) ) accumulator_size = ceil( log2( max_value ) ) x right now is a dictionary of the form: { layer_name: (min_value, max_value) } in the future, we want to provide a sample and compute this automatically Arguments: in_model: keras model object, model to be evaluated x: dictionary of the form: { layer_name: (min_value, max_value) } input distribution verbose: boolean, if true, print statistics messages Returns: dictionary containing { layer_name: accumulator_size } """ # this function converts a folded model to a "normal" model. It replace folded # layers (e.g., QConv2dBatchnorm) layer with qconv2d layer whenever possible. model = unfold_model(in_model) acc_sizes = {} for layer in model.layers: if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or isinstance(layer, QConv1D) or isinstance(layer, QDense)): weights = layer.get_weights() k = weights[0] if layer.use_bias: b = weights[1] else: b = np.zeros((k.shape[-1],), dtype=np.float32) all_bits = [] nbits = [] for i in range(k.shape[1]): # compute sum of positive weights npp = np.sum(k[..., i] * (k[..., i] > 0)) + (b[i] > 0) * b[i] # compute sum of negative weights nnn = np.sum(k[..., i] * (k[..., i] < 0)) + (b[i] < 0) * b[i] # largest value is # npp * largest positive - nnn * largest_negative or # nnn * largest_positive - npp * largest_negative x_min = x[layer.name][0] x_max = x[layer.name][1] n1 = npp * (x_max > 0) * x_max + nnn * (x_min < 0) * x_min n0 = - (nnn * (x_max > 0) * x_max + npp * (x_min < 0) * x_min) if n1 > n0: nbits.append(n1) else: nbits.append(n0) all_bits.append((n1, n0)) max_bits = int(np.ceil(np.log2(max(nbits)))) acc_sizes[layer.name] = max_bits if verbose: print() print(layer.name, "- input range:", x[layer.name]) print(" max value:", np.amax(k)) print(" min value:", np.amin(k)) print(" most positive sum:", np.amax(np.array(all_bits)[:, 0])) print(" most negative sum:", -np.amax(np.array(all_bits)[:, 1])) print(" number of bits:", max_bits) if verbose: print() return acc_sizes def analyze_accumulator_from_sample( in_model, x_sample, mode="conservative", verbose=False): """Extracts range of inputs of quantized layers from samples.""" # mode is one of "conservative", "sampled" if mode not in ["conservative", "sampled"]: ValueError("'mode' has to be 'conservative' or 'sampled'") # this function converts a folded model to a "normal" model. It replace folded # layers (e.g., QConv2DBatchnorm) layer with qconv2d layer whenever possible. model = unfold_model(in_model) # get layer names of quantized layers (QDense and QConv2D) layer_names = [ layer.name for layer in model.layers if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or isinstance(layer, QConv1D) or isinstance(layer, QDense)) ] # sampled mode: just apply x_sample and check the outputs if mode == "sampled": outputs = [ layer.output for layer in model.layers if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or isinstance(layer, QConv1D) or isinstance(layer, QDense)) ] eval_outputs = Model(inputs=model.inputs, outputs=outputs) # predict values for all inputs to quantized layers values = eval_outputs.predict(x_sample) acc_sizes = {} for name, value in zip(layer_names, values): max_value = np.amax(np.abs(value)) if max_value != 0: acc_sizes[name] = int(np.ceil(np.log2(max_value))) else: acc_sizes[name] = 0 return acc_sizes # get inputs of quantized layers (QDense and QConv2D # we use Activation("linear") to trick keras and tensorflow # to avoid direct connections of inputs and any other # artifacts. outputs = [ Activation("linear")(layer.input) for layer in model.layers if (isinstance(layer, QDepthwiseConv2D) or isinstance(layer, QConv2D) or isinstance(layer, QConv1D) or isinstance(layer, QDense)) ] eval_inputs = Model(inputs=model.inputs, outputs=outputs) # predict values for all inputs to quantized layers values = eval_inputs.predict(x_sample) x_dict = {} for name, value in zip(layer_names, values): x_dict[name] = (np.amin(value), np.amax(value)) return analyze_accumulator(model, x_dict, verbose) def get_quant_mode(quant): """Returns the quantizer mode, number of bits and if it is a signed number.""" # qb(n)[0] +/-,exp[1] t(-1,0,+1)[2] b(-1,+1)[3] b(0,1)[4] # entry is tuple: # (instance name, mode #(above), number of bits (-1 means check class), # sign bit) modes = [ # depending on the number of bits, quantized_bits may be 2, 2 ("quantized_bits", 0, -1, 1), ("bernoulli", 4, 1, 0), ("stochastic_ternary", 2, 2, 1), ("ternary", 2, 2, 1), ("stochastic_binary", 3, 1, 1), ("binary", 3, 1, 1), # depending on the number of bits, quantized_relu may be 4, 1 ("quantized_relu", 0, -1, 0), # depending on the number of bits, quantized_tanh may be 2, 2 ("quantized_ulaw", 0, -1, 1), ("quantized_tanh", 0, -1, 1), ("quantized_po2", 1, -1, 1), ("quantized_relu_po2", 1, -1, 0), ("float", 5, 32, 1) ] for (inst, mode, bits, sign) in modes: if not quant or getattr(quant, "__name__", None) == "linear": # if quantizer not specified or linear, we use float type if inst == "float": return (mode, bits, sign) elif quant.__class__.__name__ == inst: if bits == -1: bits = int(quant.bits) if ( isinstance(quant, quantized_bits) or isinstance(quant, quantized_tanh) or isinstance(quant, quantized_ulaw)): if bits == 2 and int(quant.integer) == 1: mode = 2 elif isinstance(quant, quantized_relu): if bits == 1 and int(quant.integer) == 1: mode = 4 return (mode, bits, sign) raise ValueError("Quantizer {} Not Found".format(quant)) def get_operation_type(layer, output_cache): """Checks quantizers around layer and weights to get operation type. Determines operator strenght according to the following table. x qb(n) +/-,exp t(-1,0,+1) b(-1,+1) b(0,1) float qb(n) * << >>,- ?,- ?,- ? * +/-,exp << >>,- + ?,- ^ ?,- * w t(-1,0,+1) ?,- ?,- ?,^ ?,^ ^ * b(-1,+1) ?,- ^ ?,^ ^ ^ * b(0,1) ? ?,- ^ ^ ^ * float * * * * * * Arguments: layer: layer in Keras to determine the operation strength. output_cache: cache of input tensor bit sizes. Returns: One of "mult", "fmult", "adder", "barrel", "mux", "xor". Note: "mult" represents quantized bit multiplier, "fmult" represents floating point multiplier. """ wx_table = [ ["mult", "barrel", "mux", "mux", "mux", "fmult"], ["barrel", "adder", "mux", "xor", "mux", "fmult"], ["mux", "mux", "mux", "mux", "xor", "fmult"], ["mux", "xor", "mux", "xor", "xor", "fmult"], ["mux", "mux", "xor", "xor", "xor", "fmult"], ["fmult", "fmult", "fmult", "fmult", "fmult", "fmult"], ] # check if this is a quantized layers (QDense, QConv, QDepthwise) if hasattr(layer, "get_quantizers"): w_quant = layer.get_quantizers()[0] w_mode, w_bits, w_sign = get_quant_mode(w_quant) if w_mode == "float": logging.warning("%s kernel is unquantized!", layer.name) # for the input, get tensor input and search the cache that associates # the quantizer with a tensor if output_cache.get(layer.input.experimental_ref(), None) is not None: x_mode, x_bits, x_sign = get_quant_mode( output_cache.get(layer.input.experimental_ref())) if x_mode == "float": logging.warning("%s input is unquantized!", layer.name) else: print("cannot determine presently model for {}".format(layer.name)) return "null", (w_mode, -1), (w_bits, -1), (w_sign, -1) mode = wx_table[w_mode][x_mode] return mode, (w_mode, x_mode), (w_bits, x_bits), (w_sign, x_sign) raise ValueError("Cannot find suitable quantization candidates for {}".format( layer.name)) def create_activation_cache(model): """Creates an activation cache for the tensors of a model.""" input_quantizer = quantized_relu(8, 0) output_cache = {} # If using a Sequential model, the input layer is hidden. Therefore, add the # input quantization to the cache if the first layer is not an input layer if not isinstance(model.layers[0], InputLayer): output_cache[model.layers[0].input.experimental_ref()] = input_quantizer # cache graph tensors' activations for l in model.layers: output_cache[l.output.experimental_ref()] = l if isinstance(l, QActivation) or isinstance(l, QAdaptiveActivation) : output_cache[l.output.experimental_ref()] = l.quantizer elif isinstance(l, InputLayer): # assume the input is 8-bit positive value output_cache[l.output.experimental_ref()] = input_quantizer elif l.__class__.__name__ in [ "QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D" ]: output_cache[l.output.experimental_ref()] = l.activation else: if isinstance(l.input, list): # right now, we just get the first one - we assume this is the leading # one. all_q = [ output_cache.get(l.input[i].experimental_ref()) for i in range(len(l.input)) ] q = all_q[0] else: q = output_cache.get(l.input.experimental_ref(), None) output_cache[l.output.experimental_ref()] = q if q is None: raise ValueError("Unknown operation in {}".format(l.name)) return output_cache def extract_model_operations(in_model): """Determines types of operations for convolutions.""" model = unfold_model(in_model) cache_q = create_activation_cache(model) cache_o = {} operations = {} for layer in model.layers: if layer.__class__.__name__ == "InputLayer": continue if isinstance(layer.input, list): input_shape = [ cache_o.get(layer.input[i].experimental_ref(), layer.input[i].get_shape()) for i in range(len(layer.input)) ] else: input_shape = cache_o.get(layer.input.experimental_ref(), layer.input.get_shape()) # Check if the inputs are a list of Dimensions if isinstance(input_shape, list): # Iterate though all of the input shapes and extract the dimension values for i, dim in enumerate(input_shape): if isinstance(dim[0], tf.Dimension): shape = [None] for j in range(1, len(dim)): shape.append(dim[j] if isinstance(dim[j], int) else dim[j].value) input_shape[i] = tuple(shape) output_shape = layer.compute_output_shape(input_shape) cache_o[layer.output.experimental_ref()] = output_shape if layer.__class__.__name__ not in ["QDense", "QConv2D", "QConv1D", "QDepthwiseConv2D", "QSeparableConv1D", "QSeparableConv2D"]: continue if layer.__class__.__name__ in ["QConv2D"]: _, _, _, channels_i = input_shape _, height_o, width_o, channels_o = output_shape weight = layer.get_weights()[0] kernel_h, kernel_w, _, _ = weight.shape number_of_operations = ( height_o * width_o * channels_o * kernel_h * kernel_w * channels_i) number_of_weights = (kernel_h * kernel_w * channels_o * channels_i) number_of_bias = 0 if len(layer.get_weights()) > 1: number_of_bias = layer.get_weights()[1].shape[0] weight_quant, bias_quant = layer.get_quantizers() weight_type = get_quant_mode(weight_quant) bias_type = get_quant_mode(bias_quant) if weight_type[0] == "float": logging.warning("%s kernel is unquantized!", layer.name) if bias_type[0] == "float": logging.warning("%s bias is unquantized!", layer.name) elif layer.__class__.__name__ in ["QConv1D"]: _, _, channels_i = input_shape _, time_o, channels_o = output_shape weight = layer.get_weights()[0] kernel_length, _, _ = weight.shape number_of_operations = ( time_o * channels_o * kernel_length * channels_i) number_of_weights = (kernel_length * channels_o * channels_i) number_of_bias = 0 if len(layer.get_weights()) > 1: number_of_bias = layer.get_weights()[1].shape[0] weight_quant, bias_quant = layer.get_quantizers() weight_type = get_quant_mode(weight_quant) bias_type = get_quant_mode(bias_quant) if weight_type[0] == "float": logging.warning("%s kernel is unquantized!", layer.name) if bias_type[0] == "float": logging.warning("%s bias is unquantized!", layer.name) elif layer.__class__.__name__ in ["QDepthwiseConv2D"]: _, _, _, channels_i = input_shape _, height_o, width_o, channels_o = output_shape weight_1 = layer.get_weights()[0] kernel_h, kernel_w, _, _ = weight_1.shape number_of_operations = ( kernel_h * kernel_w * height_o * width_o * channels_i) number_of_weights = (kernel_h * kernel_w * channels_o * channels_i) number_of_bias = 0 if len(layer.get_weights()) > 1: number_of_bias = layer.get_weights()[1].shape[0] weight_quant, bias_quant = layer.get_quantizers() weight_type = get_quant_mode(weight_quant) bias_type = get_quant_mode(bias_quant) if weight_type[0] == "float": logging.warning("%s kernel is unquantized!", layer.name) if bias_type[0] == "float": logging.warning("%s bias is unquantized!", layer.name) elif layer.__class__.__name__ in ["QSeparableConv1D"]: _, _, channels_i = input_shape _, time_o, channels_o = output_shape weight_1 = layer.get_weights()[0] kernel_length, _, _ = weight_1.shape number_of_operations = ( kernel_length * time_o * channels_i + time_o * channels_o) number_of_weights = [ kernel_length * channels_i, channels_o * channels_i] number_of_bias = 0 if len(layer.get_weights()) > 2: number_of_bias = layer.get_weights()[2].shape[0] depthwise_quant, pointwise_quant, bias_quant = layer.get_quantizers() depthwise_type = get_quant_mode(depthwise_quant) pointwise_type = get_quant_mode(pointwise_quant) weight_type = [depthwise_type, pointwise_type] bias_type = get_quant_mode(bias_quant) if depthwise_type[0] == "float": logging.warning("%s depthwise kernel is unquantized!", layer.name) if pointwise_type[0] == "float": logging.warning("%s pointwise kernel is unquantized!", layer.name) if bias_type[0] == "float": logging.warning("%s bias is unquantized!", layer.name) elif layer.__class__.__name__ in ["QSeparableConv2D"]: _, _, _, channels_i = input_shape _, height_o, width_o, channels_o = output_shape weight_1 = layer.get_weights()[0] kernel_h, kernel_w, _, _ = weight_1.shape number_of_operations = ( kernel_h * kernel_w * height_o * width_o * channels_i + height_o * width_o * channels_o) number_of_weights = [ kernel_h * kernel_w * channels_i, channels_o * channels_i] number_of_bias = 0 if len(layer.get_weights()) > 2: number_of_bias = layer.get_weights()[2].shape[0] depthwise_quant, pointwise_quant, bias_quant = layer.get_quantizers() depthwise_type = get_quant_mode(depthwise_quant) pointwise_type = get_quant_mode(pointwise_quant) weight_type = [depthwise_type, pointwise_type] bias_type = get_quant_mode(bias_quant) if depthwise_type[0] == "float": logging.warning("%s depthwise kernel is unquantized!", layer.name) if pointwise_type[0] == "float": logging.warning("%s pointwise kernel is unquantized!", layer.name) if bias_type[0] == "float": logging.warning("%s bias is unquantized!", layer.name) elif layer.__class__.__name__ in ["QDense"]: # Find the input and output shapes out of all possible dimensions. # Usually the first shape dimension will be the batch size, and the second # shape dimension will be the number of channels. However, if the # Dense layer is in Squeeze-and-Excite, the first shape dimension # will be the batch size, the second and third shape dimension will be the # spatial sizes (should both be 1), and the fourth shape dimensions will # be the number of channels ishape = np.array([i for i in input_shape if i is not None]) assert sum(ishape > 1) == 1, "Tensor shape has multiple >1 size dims" size_i = np.max(ishape) oshape = np.array([i for i in output_shape if i is not None]) assert sum(oshape > 1) == 1, "Tensor shape has multiple >1 size dims" size_o = np.max(oshape) number_of_operations = int(size_i * size_o) number_of_weights = size_i * size_o number_of_bias = 0 if len(layer.get_weights()) > 1: number_of_bias = layer.get_weights()[1].shape[0] weight_quant, bias_quant = layer.get_quantizers() weight_type = get_quant_mode(weight_quant) bias_type = get_quant_mode(bias_quant) if weight_type[0] == "float": logging.warnings("%s kernel is unquantized!", layer.name) if bias_type[0] == "float": logging.warnings("%s bias is unquantized!", layer.name) # "number_of_operations" is tensor_shape.Dimension type operations[layer.name] = { "type": get_operation_type(layer, cache_q), "number_of_operations": number_of_operations if isinstance(number_of_operations, int) else number_of_operations.value, "number_of_weights": number_of_weights, # if isinstance(number_of_weights, int) else number_of_weights.value, "number_of_bias": number_of_bias, # if isinstance(number_of_bias, int) else number_of_bias.value, "type_of_weights": weight_type, "type_of_bias": bias_type, } return operations def print_qstats(model): """Prints quantization statistics for the model.""" model_ops = extract_model_operations(model) ops_table = defaultdict(lambda: 0) print("") print("Number of operations in model:") for name in sorted(model_ops): mode, _, sizes, signs = model_ops[name]["type"] number = model_ops[name]["number_of_operations"] sign = "s" if sum(signs) > 0 else "u" op_name = sign + mode + "_" + str(sizes[0]) + "_" + str(sizes[1]) ops_table[op_name] += number print(" {:30}: {:5} ({})".format(str(name), str(number), str(op_name))) print("") print("Number of operation types in model:") for key in sorted(ops_table.keys()): if ops_table[key] > 0: print(" {:30}: {}".format(key, ops_table[key])) print("") print("Weight profiling:") total_bits = 0 for name in sorted(model_ops): weight_type = model_ops[name]["type_of_weights"] n_weights = model_ops[name]["number_of_weights"] if isinstance(weight_type, list): for i, (w_type, w_number) in enumerate(zip(weight_type, n_weights)): _, w_sizes, _ = w_type total_bits += w_number * w_sizes print(" {:30} : {:5} ({}-bit unit)".format( str(name) + "_weights_" + str(i), str(w_number), str(w_sizes))) else: _, w_sizes, _ = weight_type total_bits += n_weights * w_sizes print(" {:30} : {:5} ({}-bit unit)".format( str(name) + "_weights", str(n_weights), str(w_sizes))) _, b_sizes, _ = model_ops[name]["type_of_bias"] b_number = model_ops[name]["number_of_bias"] total_bits += b_number * b_sizes print(" {:30} : {:5} ({}-bit unit)".format( str(name) + "_bias", str(b_number), str(b_sizes))) print(" " + ("-"*40)) print(" {:30} : {:5}".format("Total Bits", total_bits)) print("") print("Weight sparsity:") total_sparsity, per_layer = get_model_sparsity(model, per_layer=True) for layer in per_layer: print(" {:30} : {:.4f}".format(str(layer[0]), layer[1])) print(" " + ("-"*40)) print(" {:30} : {:.4f}".format("Total Sparsity", total_sparsity)) ================================================ FILE: qkeras/experimental/quantizers/__init__.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Exports experimental quantizers.""" import tensorflow as tf from qkeras.experimental.quantizers.quantizers_po2 import quantized_bits_learnable_po2 from qkeras.experimental.quantizers.quantizers_po2 import quantized_bits_msqe_po2 __version__ = "0.9.0" ================================================ FILE: qkeras/experimental/quantizers/quantizers_po2.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Power-of-2 quantizers based on https://arxiv.org/pdf/2210.03671.pdf. Example usages: < MSQE-based quantizer > Default (using the second moments MSQE optimization and the outlier mask): quantized_bits_msqe_po2(bits=4) Per-channel quantization: quantized_bits_msqe_po2(bits=4, scale_axis=3, per_channel_scale=True) < Gradient-based (learnable) quantizer > Default (using the MSQE round (Round-to-Lower-MSQE)): quantized_bits_learnable_po2(bits=4) Per-channel quantization: quantized_bits_learnable_po2(bits=4, scale_axis=3, per_channel_scale=True) Relu activation (the MSQE round is not supported for non-variable tensors): quantized_bits_learnable_po2(bits=4, keep_negative=False, use_second_moments_msqe_opt=False, use_po2_scale_msqe_round=False) """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import re import numpy as np from six.moves import range import tensorflow as tf import tensorflow.keras.backend as K from tensorflow.keras.layers import Layer def _update_ema_variable(variable, new_val, ema_decay, is_initialized, should_update): """Updates exponentially moving average (EMA) of a tf.Variable. This function directly updates the variable. Args: variable: A tf.Variable to be updated. new_val: A tensor with a new value to update 'variable'. Its shape is same as 'variable'. ema_decay: A scalar python float or tensor. EMA decay factor. is_initialized: A scalar tensor indicating whether 'variable' has been initialized or not. should_update: A scalar python bool or tensor indicating whether to update 'variable' or not. """ if not tf.is_tensor(should_update): should_update = tf.convert_to_tensor(should_update) val_to_update = ema_decay * variable + (1.0 - ema_decay) * new_val val_to_update = tf.cond(is_initialized, lambda: val_to_update, lambda: new_val) val_to_update = tf.cond(should_update, lambda: val_to_update, lambda: variable) variable.assign(val_to_update) def _get_scaling_axis(scale_axis, len_axis): """Gets the axis to perform scaling with. Args: scale_axis: an integer scalar tensor or None to get which axis to calculate scale from. If None, the scaling axis is set based on the image data format. len_axis: an integer scalar tensor of the dimension of the tensor to be quantized. Returns: A list of axes to be quantized together. """ if scale_axis is not None: axis = list(range(scale_axis)) axis += list(range(scale_axis + 1, len_axis)) else: if K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) return axis def _get_msqe_scale(x, q, scale_axis=None, per_channel_scale=True, msqe_weight=None): """Gets scaling factor for scaling the tensor per channel. It uses a linear least squares method to find the scaling factor. (https://en.wikipedia.org/wiki/Linear_least_squares) Args: x: A tensor object. Its elements are in float. q: A tensor object. Its elements are in quantized format of x. scale_axis: which axis to calculate scale from per_channel_scale: A bool. Whether to perform per-channel scaling or not. msqe_weight: A tensor object or None. Its elements are in float, which are used to perform weighted least squares optimization. If None, it performs non-weighted least squares optimization. Returns: A scaling factor tensor or scalar for scaling tensor per channel or per layer. """ # in different tensorflow version (e.g., 2.4) # x.shape is a tuple which doesn't have as_list() method try: x_shape = x.shape.as_list() except AttributeError: x_shape = list(x.shape) len_axis = len(x_shape) if msqe_weight is not None: sqrt_msqe_weight = tf.math.sqrt(msqe_weight) x = tf.math.multiply(x, sqrt_msqe_weight) q = tf.math.multiply(q, sqrt_msqe_weight) if not per_channel_scale: qx = K.mean(q * x, keepdims=True) qq = K.mean(q * q, keepdims=True) else: if len_axis > 1: axis = _get_scaling_axis(scale_axis, len_axis) qx = K.mean(tf.math.multiply(q, x), axis=axis, keepdims=True) qq = K.mean(tf.math.multiply(q, q), axis=axis, keepdims=True) else: # No summing (averaging) along the channel axis to get per-channel # scales. qx = tf.math.multiply(q, x) qq = tf.math.multiply(q, q) scale = qx / (qq + K.epsilon()) # Rounds the exponent to the nearest integer for power-of-2 scale. return K.pow(2.0, tf.math.rint(K.log(scale + K.epsilon()) / np.log(2.0))) class BaseQuantizerPO2(Layer): # pylint: disable=invalid-name """This is the base class from which all power-of-2 quantizers inherit, which is based on the reference paper (https://arxiv.org/pdf/2210.03671.pdf). Attributes: bits: Integer, number of bits to perform quantization. keep_negative: Boolean, if true, it keeps negative values and sets the quantization levels symmetrically around 0. If false, negative numbers is clipped to 0. scale_axis: Integer, which axis to calculate scale from. per_channel_scale: Boolean, whether to perform per-channel (true) or per-layer (false) quantization. init_scale: Float or None, initial scale factor to initialize the scale with (if None, it will be initialized based on the first inputs.). use_second_moments_msqe_opt: Bool, whether to use the second moments based MSQE optimization or not. The second moments is used as a weighting factor to calculate the quantization error. second_moments_ema_decay: Float, EMA decay factor for the second moments update. use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight. use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask. use_stable_scale_exponent: Bool, whether to use exponentially moving averaged ("stable") scale exponent or not. Note: there is a tf.Variable (self.switch_to_stable_scale) that controls when to apply the stable scale exponent (i.e., if use_stable_scale_exponent is true and self.switch_to_stable_scale is false, the stable scale exponent is updated but not used.). stable_scale_ema_decay: Float, EMA decay factor for the stable scale update. is_gradient_based: Bool, whether to optimize the scale_exponent from the gradients or not (i.e, if true, self.scale_exponent is set to be "trainable".) """ def __init__(self, bits=4, keep_negative=True, scale_axis=None, per_channel_scale=False, init_scale=None, use_second_moments_msqe_opt=False, second_moments_ema_decay=0.999, use_sqrt_of_msqe_weight=True, use_outlier_mask_msqe_weight=True, use_stable_scale_exponent=False, stable_scale_ema_decay=0.99, is_gradient_based=True, **kwargs): self.bits = bits self.keep_negative = keep_negative self.scale_axis = scale_axis self.per_channel_scale = per_channel_scale self.init_scale = init_scale self.use_second_moments_msqe_opt = use_second_moments_msqe_opt self.second_moments_ema_decay = second_moments_ema_decay self.use_sqrt_of_msqe_weight = use_sqrt_of_msqe_weight self.use_outlier_mask_msqe_weight = use_outlier_mask_msqe_weight self.use_stable_scale_exponent = use_stable_scale_exponent self.stable_scale_ema_decay = stable_scale_ema_decay self.is_gradient_based = is_gradient_based self.alpha = "auto_po2" # scale exponent to be learned. self.scale_exponent = None # Stores the power-of-2 scale factor used for quantization. self.scale = None # Axes to perform reduce sum (mean) operation. self.reduce_axes = None # Running averaged gradient variances of the input self.msqe_weight = None # A knob to switch to "stable_scale_exponent". self.switch_to_stable_scale = None # variable holding the running averaged scale exponent self.stable_scale_exponent = None # Indicator variable whether to update stable_scale_exponent or not. This # can be used as an indicator whether it is in training or not. self.should_update_stable_scale_exponent = None # The assignments from "kwargs" are to restore from the config. # The maximum quantization level of negative numbers. self.qn = kwargs.pop("qn") if "qn" in kwargs else None # The maximum quantization level of positive numbers. self.qp = kwargs.pop("qp") if "qp" in kwargs else None # Axes scaled together. self.scaled_axes = kwargs.pop( "scaled_axes") if "scaled_axes" in kwargs else None super().__init__(**kwargs) def build(self, input_shape): """Creates and initializes variables.""" # Number of quantization levels. levels = tf.math.pow(2.0, tf.cast(self.bits, dtype=tf.float32)) - 1 # Sets the number of quantization levels for the negative and positive # ranges. if self.keep_negative: # Sets them symmetric about 0 to reduce the quantization induced bias. self.qn = float((levels - 1.0) / 2.0) self.qp = float((levels - 1.0) / 2.0) else: self.qn = 0.0 self.qp = float(levels) if self.init_scale is None: init_scale_exponent = 0.0 init_scale = 1.0 else: init_scale = self.init_scale + K.epsilon() init_scale_exponent = tf.math.log(init_scale) / tf.math.log(2.0) if self.scale_axis is None: self.scale_axis = self._get_scale_axis(input_shape) self.scaled_axes = self._get_scaled_axes(self.scale_axis, input_shape) if self.per_channel_scale: scale_exponent_shape = tf.TensorShape([ input_shape[i] if i == self.scale_axis else 1 for i in range(len(input_shape)) ]) else: scale_exponent_shape = [1 for i in range(len(input_shape))] # Creates the scale exponent variable to be learned. self.scale_exponent = tf.Variable( lambda: tf.constant( init_scale_exponent, shape=scale_exponent_shape, dtype=tf.float32), trainable=self.is_gradient_based, synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.compat.v1.VariableAggregation.MEAN, name="scale_exponent") # "self.scale" is not a trainable variable which gets assigned not learned. self.scale = tf.Variable( lambda: tf.constant( init_scale, shape=scale_exponent_shape, dtype=tf.float32), trainable=False, synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.compat.v1.VariableAggregation.MEAN, name="scale") self.reduce_axes = [ i for i in range(len(self.scale_exponent.shape)) if self.scale_exponent.shape[i] == 1 ] if self.use_second_moments_msqe_opt: msqe_weight_shape = tf.TensorShape( [1 if s is None else s for s in input_shape]) self.msqe_weight = tf.Variable( lambda: tf.ones(shape=msqe_weight_shape), trainable=False, dtype=tf.float32, name="msqe_weight") if self.use_stable_scale_exponent: self.stable_scale_exponent = tf.Variable( lambda: tf.zeros_like(self.scale_exponent), dtype=tf.float32, trainable=False, synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.compat.v1.VariableAggregation.MEAN, name="stable_scale_exponent") self.switch_to_stable_scale = tf.Variable( False, trainable=False, name="switch_to_stable_scale") self.should_update_stable_scale_exponent = tf.Variable( False, trainable=False, name="should_update_stable_scale_exponent") # Inidicator variable for initializing variables (e.g, the scale exponent # etc.). self.is_initialized = tf.Variable( False, trainable=False, name="is_initialized") def call(self, inputs, msqe_weight=None): """Returns a fake quantized tensor of 'inputs'. Args: inputs: A tensor to be fake quantized. msqe_weight: A tensor which is used in the scale optimization to weight the MSQE (Mean Squared Quantization Error) of individual input elements. Its shape is same as 'inputs' and its dtype is `float32` If None, it will be set by "self._get_msqe_weight" (this should be left as None unless you explicitly assign its value in a different way.). Returns: A tensor of fake quantized input. Its shape is same as 'inputs' and its dtype is `float32`. """ if not self.keep_negative: # Quantize only positive values (e.g. relu activation). inputs = tf.keras.activations.relu(inputs) if self.use_second_moments_msqe_opt: return self._update_second_moments_msqe_weight( self._quantize(inputs, msqe_weight=msqe_weight), inputs) return self._quantize(inputs, msqe_weight=msqe_weight) def _quantize(self, inputs, msqe_weight=None): """Returns (fake) quantized inputs and optimizes the scaling factor. Args: inputs: A tensor to be fake quantized and used in optimizing the scaling factor. msqe_weight: A tensor or None, which is used in the MSQE optimizations. Returns: A tensor of fake quantized inputs. """ # Initialize self.scale_exponent (it is initialized only once). self._initialize_scale_exponent(inputs) scale = self._get_scale(inputs, msqe_weight=msqe_weight) if self.use_stable_scale_exponent: # Only outputs the stable scale when 'self.switch_to_stable_scale' is set # to true, which is false by default. scale = self._get_stable_scale(scale) # Stores the scaling factors used for quantization. self.scale.assign(scale) # Perform rounding. inputs_rounded = self._round_quant(inputs / scale) # Perform clipping. inputs_clipped = self._clip_quant(inputs_rounded) inputs_quant = scale * inputs_clipped # Update initialization indicator. self.is_initialized.assign(True) return inputs_quant @tf.custom_gradient def _update_second_moments_msqe_weight(self, input_quantized, inputs): """Updates the second moments of the gradients respect to the inputs. Args: input_quantized: A tensor which is the output from 'self._quantize' method (fake quantized input). inputs: A tensor which is the input to 'self._quantize' method. Returns: 'input_quantized', the upstream gradient of 'input_quantized', and the gradients (zeros) of 'inputs' """ def grad(upstream_grad): """Calculates and updates the second moments of the gradients.""" # Get a mask for clipped inputs (i.e., 1.0 for rounded inputs and # 0.0 for clipped inputs). self.scale is the previously used scaling # factors. clip_error_mask = self._get_clipped_inputs_mask(inputs, self.scale) # Calculate the second moments of the gradients respect to 'inputs' that # is clip_error_mask * upstream_grad. second_moments = clip_error_mask * upstream_grad * upstream_grad # Update the second moments _update_ema_variable( self.msqe_weight, second_moments, self.second_moments_ema_decay, self.is_initialized, should_update=True) return upstream_grad, tf.zeros_like(inputs) return input_quantized, grad @abc.abstractmethod def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None): """Returns power-of-2 scaling factors for quantization. Args: inputs: A tensor to be used to optimize the scale value. reduce_axes: A list of axes to be summed (averaged) over. msqe_weight: A tensor which is used in scale optimization to weight the MSQE (Mean Squared Quantization Error) of individual input elements. Its shape is same as 'inputs' and its dtype is `float32`. Returns: A tensor of power-of-2 scaling factors. Its shape is same as 'self.scale_exponent' and its dtype is `float32`. """ raise NotImplementedError @abc.abstractmethod def _get_init_scale_exponent(self, inputs): """Returns a scale exponent tensor to initialize "self.scale_exponent". Args: inputs: A tensor to be used to calculate initial scale exponent values. Returns: A tensor of scale exponent. Its shape is same as 'self.scale_exponent' and its dtype is `float32`. """ raise NotImplementedError @abc.abstractmethod def _get_outlier_mask(self, inputs): """Returns a tensor to suppress outliers in the input for MSQE optimizations. Args: inputs: A tensor to be used to generate the outlier mask. Returns: A tensor to mask out the outliers of the inputs. Its shape is same as 'inputs' and its dtype is `float32`. """ raise NotImplementedError def _get_msqe_weight(self, inputs=None): """Returns weighting factors for MSQE optimizations. Args: inputs: A tensor to be used to generate the outlier mask. Returns: A tensor to be used as weighting factors for MSQE optimizations or None. Note: it is assumed that when 'None' is returned, no weighting factors will be applied for MSQE optimizations. Raises: ValueError: if 'inputs' is None when self.use_outlier_mask_msqe_weight is True. """ if self.use_outlier_mask_msqe_weight and inputs is None: raise ValueError( f"inputs must not be None if self.use_outlier_mask_msqe_weight is" f" True.") if self.msqe_weight is None: # Only returns the outlier mask return self._get_outlier_mask( inputs) if self.use_outlier_mask_msqe_weight else None msqe_weight = self.msqe_weight if self.use_sqrt_of_msqe_weight: # To use square rooted msqe_weight msqe_weight = tf.math.sqrt(msqe_weight) if self.use_outlier_mask_msqe_weight: # Returns the outlier mask modulated msqe_weight msqe_weight = msqe_weight * self._get_outlier_mask(inputs) return msqe_weight def _get_stable_scale(self, scale): """Updates and returns power-of-2 'stable' scaling factors. It updates the exponential moving average (EMA) of the scale exponent when self.should_update_stable_scale_exponent is true and self.switch_to_stable_scale is false, and returns scaling factor based on the stable (EMAed) scale exponent when self.switch_to_stable_scale is set true else returns passed-in 'scale'. Args: scale: A tensor of power-of-2 scaling factors. Returns: A tensor of power-of-2 scaling factors. """ # Freezes updating exponential moving average of self.stable_scale_exponent # when self.should_update_stable_scale_exponent is false or # self.switch_to_stable_scale is set True. should_update = tf.logical_and(self.should_update_stable_scale_exponent, not self.switch_to_stable_scale) # Update the stable (EMAed) scale exponent. # Note: when 'self.is_initialized' is false, 'self.stable_scale_exponent' is # assigned with the scale exponent of the 'scale' input otherwise it is # updated with exponential moving average. stable_scale = self._update_stable_scale_exponent(scale, should_update, self.is_initialized) # Use the stable scale only when self.switch_to_stable_scale is set True. scale = tf.cond(self.switch_to_stable_scale, lambda: stable_scale, lambda: scale) return scale def _update_stable_scale_exponent(self, scale, should_update, is_initialized): """Updates and returns stable (EMAed) power-of-2 scaling factors. It performs exponential moving average on the scale exponent, not on the scale itself. Args: scale: a tensor to be used to update exponential moving average of scale exponents. should_update: A bool. Whether to update exponential moving average of scale exponents. is_initialized: A bool. Whether to initialize the stable scale exponent. Returns: A tensor of (stable) power-of-2 scaling factors """ scale_exponent = self._get_po2_scale_exponent(scale) _update_ema_variable( self.stable_scale_exponent, scale_exponent, ema_decay=self.stable_scale_ema_decay, is_initialized=is_initialized, should_update=should_update) return tf.math.pow(2.0, tf.math.rint(self.stable_scale_exponent)) def _initialize_scale_exponent(self, inputs): """Initializes the scale exponent only once. It only initializes 'self.scale_exponent' once when there is no preset initial scaling factor (i.e., self.init_scale is None). Args: inputs: A tensor, where the initial scale exponent is based on. """ update_cond = tf.math.logical_and(not self.is_initialized, self.init_scale is None) scale_exponent_to_init = tf.cond( update_cond, lambda: tf.stop_gradient(self._get_init_scale_exponent(inputs)), lambda: self.scale_exponent) self.scale_exponent.assign(scale_exponent_to_init) def _get_clipped_inputs_mask(self, inputs, scale): """Returns a tensor to mask out the clipped inputs. The mask has 1.0 for the rounded inputs and 0.0 for the clipped inputs. Args: inputs: A tensor to get the clipping mask from. scale: A tensor of the scaling factor. Returns: A tensor to mask out the clipped inputs. """ inputs_rounded = tf.math.rint(inputs / scale) clip_error_mask = tf.math.logical_and( tf.less_equal(inputs_rounded, self.qp), tf.greater_equal(inputs_rounded, -self.qn)) return tf.cast(clip_error_mask, tf.float32) def _get_scale_axis(self, input_shape): """Returns the scaling axis based on the input shape. Args: input_shape: a tuple of integers which is the size of the input channels. Returns: A scalar value. """ if K.image_data_format() == "channels_last": scale_axis = (len(input_shape) - 1) if len(input_shape) else 0 else: scale_axis = 1 if input_shape[0] is None else 0 return scale_axis def _get_scaled_axes(self, scale_axis, input_shape): """Returns the axes scaled together. Args: scale_axis: an integer of the scaling axis. input_shape: a tuple of integers which is the size of the input channels. Returns: A list of integers. """ if self.per_channel_scale: scaled_axes = list(range(scale_axis)) else: scaled_axes = list(range(len(input_shape))) return scaled_axes def _clip_quant(self, inputs): """Returns clipped inputs (scale-normalized) by the quantization levels. Args: inputs: A tensor (scale-normalized input value). Returns: A tensor clipped by the quantization levels. """ return tf.minimum(tf.maximum(inputs, -self.qn), self.qp) def _round_quant(self, inputs): """Returns rounded inputs using a straight-through estimator (STE). Args: inputs: A tensor to be rounded. Returns: A tensor through a straight-through estimator. """ return inputs + tf.stop_gradient(-inputs + tf.math.rint(inputs)) def _simple_quantize(self, inputs, scale, should_return_q=False): """Returns quantized inputs without a straight-through estimator (STE). Args: inputs: A tensor to be quantized. scale: A tensor of the scaling factor. should_return_q: if true, quantized inputs in integer will be also returned. Returns: A tensor of fake quantized inputs (, a tensor of quantized inputs) """ inputs_rounded = tf.math.rint(inputs / scale) inputs_clipped = self._clip_quant(inputs_rounded) if should_return_q: return scale * inputs_clipped, inputs_clipped else: return scale * inputs_clipped def _get_po2_scale(self, scale): """Returns power-of-2 constrained scaling factors. Args: scale: A tensor to be power-of-2 constrained. Returns: A tensor (power-of-2 constrained scaling factor). """ return tf.math.pow(2.0, self._get_po2_scale_exponent(scale)) def _get_po2_scale_exponent(self, scale): """Returns power-of-2 constrained scale exponent. Args: scale: A tensor to get power-of-2 scale exponent from. Returns: A tensor constrained to be in integer values. """ scale_exponent = tf.math.log(scale + K.epsilon()) / tf.math.log(2.0) return tf.round(scale_exponent) def _calculate_msqe(self, x, xq, reduce_axes=None, msqe_weight=None): """Returns the mean squared quantization error (MSQE). Args: x: a tensor of the original inputs. xq: a tensor of the fake quantized inputs. reduce_axes: A list of axes to be summed (averaged) over or None. If None, self.reduce_axes will be used. msqe_weight: A tensor or None. If None, no weighting is applied in the MSQE calculation. Returns: A tensor of the MSQE """ if reduce_axes is None: reduce_axes = self.reduce_axes msqe = tf.math.pow(x - xq, 2.0) if msqe_weight is not None: msqe *= msqe_weight return tf.reduce_sum(msqe, axis=reduce_axes, keepdims=True) def _calculate_msqe_inputs(self, inputs, scale, reduce_axes=None, msqe_weight=None): """Returns the mean squared quantization error (MSQE) of the inputs. Args: inputs: a tensor to calculate the MSQE from. scale: a tensor to scale (quantize) the input with. reduce_axes: A list of axes to be summed (averaged) over or None. If None, self.reduce_axes will be used. msqe_weight: A tensor or None. If None, no weighting is applied in the MSQE calculation. Returns: A tensor of the MSQE """ inputs_quant = self._simple_quantize(inputs, scale) return self._calculate_msqe( inputs, inputs_quant, reduce_axes=reduce_axes, msqe_weight=msqe_weight) def _least_squares_msqe_scale(self, inputs, scale, reduce_axes=None, msqe_weight=None, num_lls_iters=3, should_return_msqe=False): """Returns power-of-2 scaling factors from linear least squares regression. Args: inputs: a tensor to optimize the scaling factor from. scale: a tensor to be used as initial quantize the input with. reduce_axes: A list of axes to be summed (averaged) over or None. If None, self.reduce_axes will be used. msqe_weight: A tensor or None. If None, no weighting is applied in the linear least squares regression. num_lls_iters: An integer. Number of linear least squares regression iterations. should_return_msqe: A bool. Whether to return the MSQE of the inputs. Returns: A tensor of power-of-2 scaling factors (, a tensor of the MSQE) """ if reduce_axes is None: reduce_axes = self.reduce_axes best_scale = tf.identity(scale) xq, q = self._simple_quantize(inputs, best_scale, should_return_q=True) best_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight) for _ in range(num_lls_iters): # performs linear least squares regression new_scale = _get_msqe_scale( x=inputs, q=q, scale_axis=self.scale_axis, per_channel_scale=self.per_channel_scale, msqe_weight=msqe_weight) xq, q = self._simple_quantize(inputs, new_scale, should_return_q=True) new_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight) # Update the best scale and the best msqe best_scale = tf.where(new_msqe < best_msqe, new_scale, best_scale) best_msqe = tf.where(new_msqe < best_msqe, new_msqe, best_msqe) if should_return_msqe: return best_scale, best_msqe else: return best_scale def _line_search_msqe_scale(self, inputs, scale, reduce_axes=None, msqe_weight=None, line_search_range=6, should_return_msqe=False): """Returns power-of-2 scaling factors from line search. Args: inputs: a tensor to optimize the scaling factor from. scale: a tensor to be used as initial quantize the input with. reduce_axes: A list of axes to be summed (averaged) over or None. If None, self.reduce_axes will be used. msqe_weight: A tensor or None. If None, no weighting is applied in the line search. line_search_range: An integer. Search range of the line search. should_return_msqe: A bool. Whether to return the MSQE of the inputs. Returns: A tensor of power-of-2 scaling factors (, a tensor of the MSQE) """ if reduce_axes is None: reduce_axes = self.reduce_axes best_scale = tf.identity(scale) xq = self._simple_quantize(inputs, best_scale) best_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight) best_scale_exponent = self._get_po2_scale_exponent(best_scale) # PO2 exponent search offsets end_range = line_search_range // 2 + 1 po2_exponent_offsets = [i for i in range(-end_range+1,end_range) if i != 0] for exp_offset in po2_exponent_offsets: # Optimize scale new_scale = tf.math.pow(2.0, best_scale_exponent + exp_offset) xq = self._simple_quantize(inputs, new_scale) new_msqe = self._calculate_msqe(inputs, xq, reduce_axes, msqe_weight) # Update the best scale and msqe best_scale = tf.where(new_msqe < best_msqe, new_scale, best_scale) best_msqe = tf.where(new_msqe < best_msqe, new_msqe, best_msqe) if should_return_msqe: return best_scale, best_msqe else: return best_scale def _optimize_msqe_scale(self, inputs, scale, reduce_axes=None, msqe_weight=None, num_lls_iters=None, should_line_search=True, line_search_range=None): """Returns optimized power-of-2 scaling factors. It performs an iterative linear least squares regression and an optional line search to find optimal power-of-2 scaling factors for the given inputs from the initial scaling factors ('scale'). Args: inputs: a tensor to find optimal power-of-2 scaling factors for. scale: a tensor to be used as initial scaling factors. reduce_axes: A list of axes to be summed (averaged) over or None. If None, self.reduce_axes will be used. msqe_weight: A tensor or None. If None, no weighting is applied in the optimizations. num_lls_iters: An integer. Number of linear least squares regression iterations. should_line_search: A bool. Whether to perform a line search. line_search_range: An integer. Search range of the line search. Returns: A tensor of power-of-2 scaling factors, A tensor of the MSQE """ if reduce_axes is None: reduce_axes = self.reduce_axes if num_lls_iters is None: num_lls_iters = self.num_lls_iters if line_search_range is None: line_search_range = self.line_search_range scale, msqe = self._least_squares_msqe_scale( inputs, scale, reduce_axes=self.reduce_axes, msqe_weight=msqe_weight, num_lls_iters=num_lls_iters, should_return_msqe=True) if should_line_search: scale, msqe = self._line_search_msqe_scale( inputs, scale, reduce_axes=self.reduce_axes, msqe_weight=msqe_weight, line_search_range=line_search_range, should_return_msqe=True) # Having an additional '_get_po2_scale' is just to make sure returning # scaling factors are in power-of-2. return self._get_po2_scale(scale), msqe def max(self): """Returns the maximum value that the quantizer can represent.""" if hasattr(self, "is_initialized") and self.is_initialized.numpy(): return self._get_scale() * self.qp else: return 1.0 def min(self): """Returns the minimum value that the quantizer can represent.""" if self.keep_negative: if hasattr(self, "is_initialized") and self.is_initialized.numpy(): return self._get_scale() * (-self.qn) else: return -1.0 else: return 0.0 class quantized_bits_learnable_po2(BaseQuantizerPO2): # pylint: disable=invalid-name """Quantizes the number to a number of bits by learnable scale factors. For more details, see https://arxiv.org/abs/2210.03671. The implementation was inspired by "TRAINED QUANTIZATION THRESHOLDS FOR ACCURATE AND EFFICIENT FIXED-POINT INFERENCE OF DEEP NEURAL NETWORKS" (https://arxiv.org/pdf/1903.08066.pdf). Attributes: bits: Integer, number of bits to perform quantization. keep_negative: Boolean, if true, it keeps negative values and sets the quantization levels symmetrically around 0. If false, negative numbers is clipped to 0. scale_axis: Integer, which axis to calculate scale from. per_channel_scale: Boolean, whether to perform per-channel (true) or per-layer (false) quantization. init_scale: Float or None, initial scale factor to initialize the scale with (if None, it will be initialized based on the first inputs.). use_second_moments_msqe_opt: Bool, whether to use the second moments based MSQE optimization or not. second_moments_ema_decay: Float, EMA decay factor for the second moments update. use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight. use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask. use_stable_scale_exponent: Bool, whether to use exponentially moving averaged ("stable") scale exponent or not. Note: there is a tf.Variable (self.switch_to_stable_scale) that controls when to apply the stable scale exponent (i.e., if use_stable_scale_exponent is true and self.switch_to_stable_scale is false, the stable scale exponent is updated but not used.). stable_scale_ema_decay: Float, EMA decay factor for the stable scale update. min_init_scale: float or None. minimum initial scale value. If None, the initial scale value is not bounded by a minimum value. It is useful to prevent zero initial scale value for inputs with all zeros (e.g., bias). use_po2_scale_ceil: Bool, whether to use ceil function for constraining power-of-2 scale exponents. If false, round function is used instead. use_po2_scale_msqe_round: Bool, whether to use MSQE rounding function for constraining power-of-2 scale exponents. Note: MSQE rounding has precedence over ceil and round function. """ def __init__(self, bits=4, keep_negative=True, scale_axis=None, per_channel_scale=False, init_scale=None, use_second_moments_msqe_opt=True, second_moments_ema_decay=0.999, use_sqrt_of_msqe_weight=True, use_outlier_mask_msqe_weight=True, use_stable_scale_exponent=False, stable_scale_ema_decay=0.99, min_init_scale=0.00001, use_po2_scale_ceil=True, use_po2_scale_msqe_round=True, **kwargs): self.min_init_scale = min_init_scale self.use_po2_scale_ceil = use_po2_scale_ceil self.use_po2_scale_msqe_round = use_po2_scale_msqe_round # An indicator variable to control usage of MSQE rounding function, which is # set to true by default (i.e, if use_po2_scale_msqe_round is true, MSQE # rounding is used by default based on self.switch_to_msqe_round.). It can # be used to delay using MSQE rounding. self.switch_to_msqe_round = None super().__init__( bits=bits, keep_negative=keep_negative, scale_axis=scale_axis, per_channel_scale=per_channel_scale, init_scale=init_scale, use_second_moments_msqe_opt=use_second_moments_msqe_opt, second_moments_ema_decay=second_moments_ema_decay, use_sqrt_of_msqe_weight=use_sqrt_of_msqe_weight, use_outlier_mask_msqe_weight=use_outlier_mask_msqe_weight, use_stable_scale_exponent=use_stable_scale_exponent, stable_scale_ema_decay=stable_scale_ema_decay, is_gradient_based=True, **kwargs) def __str__(self): # Convert Tensors to printable strings by converting to a numpy array and # then using regex to remove brackets when there is only one integer bit. ptn, repl = r"\[(\d)\]", r"\g<1>" bits = re.sub( ptn, repl, str(self.bits.numpy() if isinstance(self.bits, tf.Variable) else self .bits)) flags = [] flags.append("bits=" + str(int(bits))) flags.append("keep_negative=" + str(self.keep_negative)) flags.append("scale_axis=" + str(self.scale_axis)) flags.append("per_channel_scale=" + str(self.per_channel_scale)) flags.append("init_scale=" + str(self.init_scale)) flags.append("use_second_moments_msqe_opt=" + str(self.use_second_moments_msqe_opt)) flags.append("second_moments_ema_decay=" + str(self.second_moments_ema_decay)) flags.append("use_outlier_mask_msqe_weight=" + str(self.use_outlier_mask_msqe_weight)) flags.append("use_sqrt_of_msqe_weight=" + str(self.use_sqrt_of_msqe_weight)) flags.append("use_stable_scale_exponent=" + str(self.use_stable_scale_exponent)) flags.append("stable_scale_ema_decay=" + str(self.stable_scale_ema_decay)) flags.append("min_init_scale=" + str(self.min_init_scale)) flags.append("use_po2_scale_ceil=" + str(self.use_po2_scale_ceil)) flags.append("use_po2_scale_msqe_round=" + str(self.use_po2_scale_msqe_round)) return "quantized_bits_learnable_po2(" + ",".join(flags) + ")" def build(self, input_shape): """Creates and initializes variables.""" super().build(input_shape) if self.use_po2_scale_msqe_round: self.switch_to_msqe_round = tf.Variable( True, trainable=False, name="switch_to_msqe_round") def _get_init_scale_exponent(self, inputs): """Returns inputs distribution based initial scale exponent values. Args: inputs: A tensor to be used to calculate initial scale exponent values. Returns: A tensor of initial scale exponent values. """ std = tf.math.reduce_std(inputs, axis=self.reduce_axes, keepdims=True) # Uses 3 sigma percentile to get scale scale = 3.0 * std / tf.cast(self.qp, dtype=tf.float32) # Prevents zero scale values for inputs with all zeros (e.g., bias). if self.min_init_scale is not None: scale = tf.math.maximum(scale, self.min_init_scale) # Returns scale exponent return tf.math.log(scale) / tf.math.log(2.0) def _get_outlier_mask(self, inputs): """Returns a tensor to mask outliers in the input for MSQE optimizations. The outlier threshold is based on the (unconstrained) output dynamic range of the quantizer. Args: inputs: A tensor to be used to generate the outlier mask. Returns: A tensor to mask out the outliers of the inputs. Its shape is same as 'inputs' and its dtype is `float32`. """ # Calculates the output (unconstrained) dynamic range of the quantizer (i.e. # , self.scale_exponent is not power-of-2 constrained.). outlier_threshold = tf.math.pow(2.0, self.scale_exponent) * (self.qp + 0.5) return tf.where( abs(inputs) <= outlier_threshold, tf.ones_like(inputs, dtype=tf.float32), tf.zeros_like(inputs, dtype=tf.float32)) def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None): """Returns power-of-2 scaling factors for quantization. Args: inputs: A tensor to be used for MSQE rounding. Note: ceil and round functions do not use the inputs. reduce_axes: A list of axes to be summed (averaged) over. msqe_weight: A tensor which is used in scale optimization to weight the MSQE (Mean Squared Quantization Error) of individual input elements. Its shape is same as 'inputs' and its dtype is `float32`. Returns: A tensor of power-of-2 scaling factors. """ if self.use_po2_scale_ceil: scale_exponent = tf.math.ceil(self.scale_exponent) else: scale_exponent = tf.math.rint(self.scale_exponent) # MSQE rounding requires the inputs to optimize the scale exponent. if self.use_po2_scale_msqe_round and inputs is not None: scale_exponent_msqe = self.msqe_round( inputs=inputs, scale_exponent=self.scale_exponent, reduce_axes=reduce_axes, msqe_weight=msqe_weight) # Control when to use MSQE rounding. Note: self.switch_to_msqe_round is # set to true by default. scale_exponent = tf.cond(self.switch_to_msqe_round, lambda: scale_exponent_msqe, lambda: scale_exponent) # Apply STE scale_exponent = self.scale_exponent + tf.stop_gradient(scale_exponent - self.scale_exponent) return tf.math.pow(2.0, scale_exponent) def msqe_round(self, inputs, scale_exponent, reduce_axes=None, msqe_weight=None): """Returns MSQE-wise optimum power-of-2 scale exponents. Args: inputs: A tensor, MSQE rounding is based on. scale_exponent: A tensor, learnable scale exponents which are not constrained in power-of-2. reduce_axes: A list of axes to be summed (averaged) over or None. If None, self.reduce_axes is used. msqe_weight: A tensor which is used to weight MSQE rounding or None. If None, a tensor (or None) from self._get_msqe_weight is used. Returns: A tensor of power-of-2 scale exponents. """ if reduce_axes is None: reduce_axes = self.reduce_axes if msqe_weight is None: # Returned msqe_weight can be None. msqe_weight = self._get_msqe_weight(inputs) # floor scale_exponent_floor = tf.math.floor(scale_exponent) msqe_floor = self._calculate_msqe_inputs( inputs=inputs, scale=tf.math.pow(2.0, scale_exponent_floor), reduce_axes=reduce_axes, msqe_weight=msqe_weight) # ceil scale_exponent_ceil = tf.math.ceil(scale_exponent) msqe_ceil = self._calculate_msqe_inputs( inputs=inputs, scale=tf.math.pow(2.0, scale_exponent_ceil), reduce_axes=reduce_axes, msqe_weight=msqe_weight) return tf.where(msqe_floor < msqe_ceil, scale_exponent_floor, scale_exponent_ceil) def get_config(self): config = { "bits": self.bits, "keep_negative": self.keep_negative, "scale_axis": self.scale_axis, "per_channel_scale": self.per_channel_scale, "init_scale": self.init_scale, "use_second_moments_msqe_opt": self.use_second_moments_msqe_opt, "second_moments_ema_decay": self.second_moments_ema_decay, "use_outlier_mask_msqe_weight": self.use_outlier_mask_msqe_weight, "use_sqrt_of_msqe_weight": self.use_sqrt_of_msqe_weight, "use_stable_scale_exponent": self.use_stable_scale_exponent, "stable_scale_ema_decay": self.stable_scale_ema_decay, "min_init_scale": self.min_init_scale, "use_po2_scale_ceil": self.use_po2_scale_ceil, "use_po2_scale_msqe_round": self.use_po2_scale_msqe_round, "qn": self.qn, "qp": self.qp, "scaled_axes": self.scaled_axes, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) class quantized_bits_msqe_po2(BaseQuantizerPO2): # pylint: disable=invalid-name """Quantizes the number to a number of bits by MSQE based scaling factors. For more details, see https://arxiv.org/abs/2210.03671. Attributes: bits: Integer, number of bits to perform quantization. keep_negative: Boolean, if true, it keeps negative values and sets the quantization levels symmetrically around 0. If false, negative numbers is clipped to 0. scale_axis: Integer, which axis to calculate scale from. per_channel_scale: Boolean, whether to perform per-channel (true) or per-layer (false) quantization. init_scale: Float or None, initial scale factor to initialize the scale with (if None, it will be initialized based on the first inputs.). use_second_moments_msqe_opt: Bool, whether to use the second moments based MSQE optimization or not. second_moments_ema_decay: Float, EMA decay factor for the second moments update. use_sqrt_of_msqe_weight: Bool, whether to use square root of MSQE weight. use_outlier_mask_msqe_weight: Bool, whether to apply outlier mask. use_stable_scale_exponent: Bool, whether to use exponentially moving averaged ("stable") scale exponent or not. Note: there is a tf.Variable (self.switch_to_stable_scale) that controls when to apply the stable scale exponent (i.e., if use_stable_scale_exponent is true and self.switch_to_stable_scale is false, the stable scale exponent is updated but not used.). stable_scale_ema_decay: Float, EMA decay factor for the stable scale update. outlier_mask_sigma: Float, sigma to apply for the outlier masking threshold. num_lls_iters: An integer. Number of linear least squares regression iterations. should_line_search: A bool. Whether to perform a line search. line_search_range: An integer. Search range of the line search. """ def __init__(self, bits=4, keep_negative=True, scale_axis=None, per_channel_scale=False, init_scale=None, use_second_moments_msqe_opt=True, second_moments_ema_decay=0.999, use_sqrt_of_msqe_weight=True, use_outlier_mask_msqe_weight=True, use_stable_scale_exponent=False, stable_scale_ema_decay=0.99, outlier_mask_sigma=2.0, num_lls_iters=3, should_line_search=True, line_search_range=6, **kwargs): self.outlier_mask_sigma = outlier_mask_sigma self.num_lls_iters = num_lls_iters self.should_line_search = should_line_search self.line_search_range = line_search_range super().__init__( bits=bits, keep_negative=keep_negative, scale_axis=scale_axis, per_channel_scale=per_channel_scale, init_scale=init_scale, use_second_moments_msqe_opt=use_second_moments_msqe_opt, second_moments_ema_decay=second_moments_ema_decay, use_sqrt_of_msqe_weight=use_sqrt_of_msqe_weight, use_outlier_mask_msqe_weight=use_outlier_mask_msqe_weight, use_stable_scale_exponent=use_stable_scale_exponent, stable_scale_ema_decay=stable_scale_ema_decay, is_gradient_based=False, **kwargs) def __str__(self): # Convert Tensors to printable strings by converting to a numpy array and # then using regex to remove brackets when there is only one integer bit. ptn, repl = r"\[(\d)\]", r"\g<1>" bits = re.sub( ptn, repl, str(self.bits.numpy() if isinstance(self.bits, tf.Variable) else self .bits)) flags = [] flags.append("bits=" + str(int(bits))) flags.append("keep_negative=" + str(self.keep_negative)) flags.append("scale_axis=" + str(self.scale_axis)) flags.append("per_channel_scale=" + str(self.per_channel_scale)) flags.append("init_scale=" + str(self.init_scale)) flags.append("use_second_moments_msqe_opt=" + str(self.use_second_moments_msqe_opt)) flags.append("second_moments_ema_decay=" + str(self.second_moments_ema_decay)) flags.append("use_sqrt_of_msqe_weight=" + str(self.use_sqrt_of_msqe_weight)) flags.append("use_outlier_mask_msqe_weight=" + str(self.use_outlier_mask_msqe_weight)) flags.append("use_stable_scale_exponent=" + str(self.use_stable_scale_exponent)) flags.append("stable_scale_ema_decay=" + str(self.stable_scale_ema_decay)) flags.append("outlier_mask_sigma=" + str(self.outlier_mask_sigma)) flags.append("num_lls_iters=" + str(self.num_lls_iters)) flags.append("should_line_search=" + str(self.should_line_search)) flags.append("line_search_range=" + str(self.line_search_range)) return "quantized_bits_msqe_po2(" + ",".join(flags) + ")" def _get_init_scale_exponent(self, inputs): """Returns min and max of the inputs based initial scale exponent values. Args: inputs: A tensor to be used to calculate initial scale exponent values. Returns: A tensor of initial scale exponent values. """ scale = K.max( abs(inputs), axis=self.scaled_axes, keepdims=True) / tf.cast( self.qp, dtype=tf.float32) return self._get_po2_scale_exponent(scale) def _get_outlier_mask(self, inputs): """Returns a tensor to mask outliers in the input for MSQE optimizations. The outlier threshold is based on the inputs distribution. Args: inputs: A tensor to be used to generate the outlier mask. Returns: A tensor to mask out the outliers of the inputs. Its shape is same as 'inputs' and its dtype is `float32`. """ std = tf.math.reduce_std(inputs, axis=self.reduce_axes, keepdims=True) outlier_threshold = self.outlier_mask_sigma * std return tf.where( abs(inputs) <= outlier_threshold, tf.ones_like(inputs), tf.zeros_like(inputs)) def _get_scale(self, inputs=None, reduce_axes=None, msqe_weight=None): """Returns power-of-2 scaling factors for quantization. Args: inputs: A tensor to be used to optimize the scale value. reduce_axes: A list of axes to be summed (averaged) over. msqe_weight: A tensor which is used in scale optimization to weight the MSQE (Mean Squared Quantization Error) of individual input elements. Its shape is same as 'inputs' and its dtype is `float32`. Returns: A tensor of power-of-2 scaling factors. Its shape is same as 'self.scale_exponent' and its dtype is `float32`. """ if inputs is None: return self._get_po2_scale(self.scale) if reduce_axes is None: reduce_axes = self.reduce_axes if msqe_weight is None: msqe_weight = self._get_msqe_weight(inputs) scale, _ = self._optimize_msqe_scale( inputs, tf.math.pow(2.0, tf.round(self.scale_exponent)), reduce_axes=reduce_axes, msqe_weight=msqe_weight, num_lls_iters=self.num_lls_iters, should_line_search=self.should_line_search, line_search_range=self.line_search_range, ) self.scale_exponent.assign(self._get_po2_scale_exponent(scale)) return scale def get_config(self): config = { "bits": self.bits, "keep_negative": self.keep_negative, "scale_axis": self.scale_axis, "per_channel_scale": self.per_channel_scale, "init_scale": self.init_scale, "use_second_moments_msqe_opt": self.use_second_moments_msqe_opt, "second_moments_ema_decay": self.second_moments_ema_decay, "use_sqrt_of_msqe_weight": self.use_sqrt_of_msqe_weight, "use_outlier_mask_msqe_weight": self.use_outlier_mask_msqe_weight, "use_stable_scale_exponent": self.use_stable_scale_exponent, "stable_scale_ema_decay": self.stable_scale_ema_decay, "outlier_mask_sigma": self.outlier_mask_sigma, "num_lls_iters": self.num_lls_iters, "should_line_search": self.should_line_search, "line_search_range": self.line_search_range, "qn": self.qn, "qp": self.qp, "scaled_axes": self.scaled_axes, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) ================================================ FILE: qkeras/qconv2d_batchnorm.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Fold batchnormalization with previous QConv2D layers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from six.moves import range import tensorflow as tf from tensorflow.keras import layers from tensorflow.keras.models import Model from .qconvolutional import QConv2D from .quantizers import * from tensorflow.python.framework import smart_cond as tf_utils from tensorflow.python.ops import math_ops tf.compat.v2.enable_v2_behavior() # TODO(lishanok): Create an abstract folding parent class class QConv2DBatchnorm(QConv2D): """Fold batchnormalization with a previous qconv2d layer.""" def __init__( self, # qconv2d params filters, kernel_size, strides=(1, 1), padding="valid", data_format="channels_last", dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer="he_normal", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, kernel_quantizer=None, bias_quantizer=None, # batchnorm params axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, virtual_batch_size=None, adjustment=None, # other params ema_freeze_delay=None, folding_mode="ema_stats_folding", **kwargs): """Initialize a composite layer that folds conv2d and batch normalization. The first group of parameters correponds to the initialization parameters of a qconv2d layer. check qkeras.qconvolutional.qconv2d for details. The 2nd group of parameters corresponds to the initialization parameters of a BatchNormalization layer. Check keras.layers.normalization.BatchNorma lizationBase for details. The 3rd group of parameters corresponds to the initialization parameters specific to this class. ema_freeze_delay: int. number of steps before batch normalization mv_mean and mv_variance will be frozen and used in the folded layer. folding_mode: string "ema_stats_folding": mimic tflite which uses the ema statistics to fold the kernel to suppress quantization induced jitter then performs the correction to have a similar effect of using the current batch statistics. "batch_stats_folding": use batch mean and variance to fold kernel first; after enough training steps switch to moving_mean and moving_variance for kernel folding. """ # intialization the qconv2d part of the composite layer super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, kernel_quantizer=kernel_quantizer, bias_quantizer=bias_quantizer, **kwargs ) # initialization of batchnorm part of the composite layer self.batchnorm = layers.BatchNormalization( axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, moving_mean_initializer=moving_mean_initializer, moving_variance_initializer=moving_variance_initializer, beta_regularizer=beta_regularizer, gamma_regularizer=gamma_regularizer, beta_constraint=beta_constraint, gamma_constraint=gamma_constraint, renorm=renorm, renorm_clipping=renorm_clipping, renorm_momentum=renorm_momentum, fused=fused, trainable=trainable, virtual_batch_size=virtual_batch_size, adjustment=adjustment) self.ema_freeze_delay = ema_freeze_delay assert folding_mode in ["ema_stats_folding", "batch_stats_folding"] self.folding_mode = folding_mode def build(self, input_shape): super(QConv2DBatchnorm, self).build(input_shape) # self._iteration (i.e., training_steps) is initialized with -1. When # loading ckpt, it can load the number of training steps that have been # previously trainied. If start training from scratch. # TODO(lishanok): develop a way to count iterations outside layer self._iteration = tf.Variable(-1, trainable=False, name="iteration", dtype=tf.int64) def call(self, inputs, training=None): # numpy value, mark the layer is in training training = self.batchnorm._get_training_value(training) # pylint: disable=protected-access # checking if to update batchnorm params if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0): # if ema_freeze_delay is None or a negative value, do not freeze bn stats bn_training = tf.cast(training, dtype=bool) else: bn_training = tf.math.logical_and(training, tf.math.less_equal( self._iteration, self.ema_freeze_delay)) kernel = self.kernel # run conv to produce output for the following batchnorm conv_outputs = tf.keras.backend.conv2d( inputs, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: bias = self.bias conv_outputs = tf.keras.backend.bias_add( conv_outputs, bias, data_format=self.data_format) else: bias = 0 _ = self.batchnorm(conv_outputs, training=bn_training) self._iteration.assign_add(tf_utils.smart_cond( training, lambda: tf.constant(1, tf.int64), lambda: tf.constant(0, tf.int64))) # calcuate mean and variance from current batch bn_shape = conv_outputs.shape ndims = len(bn_shape) reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis] keep_dims = len(self.batchnorm.axis) > 1 mean, variance = self.batchnorm._moments( # pylint: disable=protected-access math_ops.cast(conv_outputs, self.batchnorm._param_dtype), # pylint: disable=protected-access reduction_axes, keep_dims=keep_dims) # get batchnorm weights gamma = self.batchnorm.gamma beta = self.batchnorm.beta moving_mean = self.batchnorm.moving_mean moving_variance = self.batchnorm.moving_variance if self.folding_mode == "batch_stats_folding": # using batch mean and variance in the initial training stage # after sufficient training, switch to moving mean and variance new_mean = tf_utils.smart_cond( bn_training, lambda: mean, lambda: moving_mean) new_variance = tf_utils.smart_cond( bn_training, lambda: variance, lambda: moving_variance) # get the inversion factor so that we replace division by multiplication inv = math_ops.rsqrt(new_variance + self.batchnorm.epsilon) if gamma is not None: inv *= gamma # fold bias with bn stats folded_bias = inv * (bias - new_mean) + beta elif self.folding_mode == "ema_stats_folding": # We always scale the weights with a correction factor to the long term # statistics prior to quantization. This ensures that there is no jitter # in the quantized weights due to batch to batch variation. During the # initial phase of training, we undo the scaling of the weights so that # outputs are identical to regular batch normalization. We also modify # the bias terms correspondingly. After sufficient training, switch from # using batch statistics to long term moving averages for batch # normalization. # use batch stats for calcuating bias before bn freeze, and use moving # stats after bn freeze mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon) batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon) if gamma is not None: mv_inv *= gamma batch_inv *= gamma folded_bias = tf_utils.smart_cond( bn_training, lambda: batch_inv * (bias - mean) + beta, lambda: mv_inv * (bias - moving_mean) + beta) # moving stats is always used to fold kernel in tflite; before bn freeze # an additional correction factor will be applied to the conv2d output inv = mv_inv else: assert ValueError # wrap conv kernel with bn parameters folded_kernel = inv * kernel # quantize the folded kernel if self.kernel_quantizer is not None: q_folded_kernel = self.kernel_quantizer_internal(folded_kernel) else: q_folded_kernel = folded_kernel # If loaded from a ckpt, bias_quantizer is the ckpt value # Else if bias_quantizer not specified, bias # quantizer is None and we need to calculate bias quantizer # type according to accumulator type. User can call # bn_folding_utils.populate_bias_quantizer_from_accumulator( # model, input_quantizer_list]) to populate such bias quantizer. if self.bias_quantizer_internal is not None: q_folded_bias = self.bias_quantizer_internal(folded_bias) else: q_folded_bias = folded_bias applied_kernel = q_folded_kernel applied_bias = q_folded_bias # calculate conv2d output using the quantized folded kernel folded_outputs = tf.keras.backend.conv2d( inputs, applied_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if training is True and self.folding_mode == "ema_stats_folding": batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon) y_corr = tf_utils.smart_cond( bn_training, lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) * math_ops.rsqrt(variance + self.batchnorm.epsilon)), lambda: tf.constant(1.0, shape=moving_variance.shape)) folded_outputs = math_ops.mul(folded_outputs, y_corr) folded_outputs = tf.keras.backend.bias_add( folded_outputs, applied_bias, data_format=self.data_format) if self.activation is not None: return self.activation(folded_outputs) return folded_outputs def get_config(self): base_config = super().get_config() bn_config = self.batchnorm.get_config() config = {"ema_freeze_delay": self.ema_freeze_delay, "folding_mode": self.folding_mode} name = base_config["name"] out_config = dict( list(base_config.items()) + list(bn_config.items()) + list(config.items())) # names from different config override each other; use the base layer name # as the this layer's config name out_config["name"] = name return out_config def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation), "filters": str(self.filters) } def get_quantizers(self): return self.quantizers def get_folded_weights(self): """Function to get the batchnorm folded weights. This function converts the weights by folding batchnorm parameters into the weight of QConv2D. The high-level equation: W_fold = gamma * W / sqrt(variance + epsilon) bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta """ kernel = self.kernel if self.use_bias: bias = self.bias else: bias = 0 # get batchnorm weights and moving stats gamma = self.batchnorm.gamma beta = self.batchnorm.beta moving_mean = self.batchnorm.moving_mean moving_variance = self.batchnorm.moving_variance # get the inversion factor so that we replace division by multiplication inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon) if gamma is not None: inv *= gamma # wrap conv kernel and bias with bn parameters folded_kernel = inv * kernel folded_bias = inv * (bias - moving_mean) + beta return [folded_kernel, folded_bias] ================================================ FILE: qkeras/qconvolutional.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import warnings import numpy as np import tensorflow as tf from tensorflow.keras import constraints from tensorflow.keras import initializers from tensorflow.keras import regularizers from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Conv1D from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import Conv2DTranspose from tensorflow.keras.layers import DepthwiseConv2D from tensorflow.keras.layers import Dropout from tensorflow.keras.layers import InputSpec from tensorflow.keras.layers import SeparableConv1D from tensorflow.keras.layers import SeparableConv2D from .qlayers import get_auto_range_constraint_initializer from .qlayers import QActivation from .quantizers import get_quantized_initializer from .quantizers import get_quantizer from tensorflow.python.eager import context from tensorflow.python.ops import array_ops # from tensorflow.python.ops import array_ops from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer def deconv_output_length( input_length, filter_size, padding, output_padding=None, stride=0, dilation=1, ): """Determines output length of a transposed convolution given input length. Args: input_length: Integer. filter_size: Integer. padding: one of `"same"`, `"valid"`, `"full"`. output_padding: Integer, amount of padding along the output dimension. Can be set to `None` in which case the output length is inferred. stride: Integer. dilation: Integer. Returns: The output length (integer). """ assert padding in {"same", "valid", "full"} if input_length is None: return None # Get the dilated kernel size filter_size = filter_size + (filter_size - 1) * (dilation - 1) pad = 0 length = 0 # Infer length if output padding is None, else compute the exact length if output_padding is None: if padding == "valid": length = input_length * stride + max(filter_size - stride, 0) elif padding == "full": length = input_length * stride - (stride + filter_size - 2) elif padding == "same": length = input_length * stride else: if padding == "same": pad = filter_size // 2 elif padding == "valid": pad = 0 elif padding == "full": pad = filter_size - 1 length = ( (input_length - 1) * stride + filter_size - 2 * pad + output_padding ) return length class QConv1D(Conv1D, PrunableLayer): """1D convolution layer (e.g. spatial convolution over images).""" # most of these parameters follow the implementation of Conv1D in Keras, # with the exception of kernel_range, bias_range, kernel_quantizer # and bias_quantizer, and kernel_initializer. # # kernel_quantizer: quantizer function/class for kernel # bias_quantizer: quantizer function/class for bias # kernel_range/bias_ranger: for quantizer functions whose values # can go over [-1,+1], these values are used to set the clipping # value of kernels and biases, respectively, instead of using the # constraints specified by the user. # # we refer the reader to the documentation of Conv1D in Keras for the # other parameters. # def __init__(self, filters, kernel_size, strides=1, padding="valid", dilation_rate=1, activation=None, use_bias=True, kernel_initializer="he_normal", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, kernel_quantizer=None, bias_quantizer=None, kernel_range=None, bias_range=None, **kwargs): if kernel_range is not None: warnings.warn("kernel_range is deprecated in QConv1D layer.") if bias_range is not None: warnings.warn("bias_range is deprecated in QConv1D layer.") self.kernel_range = kernel_range self.bias_range = bias_range self.kernel_quantizer = kernel_quantizer self.bias_quantizer = bias_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.kernel_quantizer_internal, self.bias_quantizer_internal ] kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs ) def call(self, inputs): if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel outputs = tf.keras.backend.conv1d( inputs, quantized_kernel, strides=self.strides[0], padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate[0]) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = { "kernel_quantizer": constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), "kernel_range": self.kernel_range, "bias_range": self.bias_range, } base_config = super(QConv1D, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation), "filters" : str(self.filters) } def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.kernel] class QConv2D(Conv2D, PrunableLayer): """2D convolution layer (e.g. spatial convolution over images).""" # most of these parameters follow the implementation of Conv2D in Keras, # with the exception of kernel_range, bias_range, kernel_quantizer # and bias_quantizer, and kernel_initializer. # # kernel_quantizer: quantizer function/class for kernel # bias_quantizer: quantizer function/class for bias # kernel_range/bias_ranger: for quantizer functions whose values # can go over [-1,+1], these values are used to set the clipping # value of kernels and biases, respectively, instead of using the # constraints specified by the user. # mask: Optional mask for kernel weights. # # we refer the reader to the documentation of Conv2D in Keras for the # other parameters. # def __init__( self, filters, kernel_size, strides=(1, 1), padding="valid", data_format="channels_last", dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer="he_normal", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, kernel_range=None, bias_range=None, kernel_quantizer=None, bias_quantizer=None, mask=None, **kwargs, ): if kernel_range is not None: warnings.warn("kernel_range is deprecated in QConv2D layer.") if bias_range is not None: warnings.warn("bias_range is deprecated in QConv2D layer.") self.kernel_range = kernel_range self.bias_range = bias_range self.kernel_quantizer = kernel_quantizer self.bias_quantizer = bias_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.kernel_quantizer_internal, self.bias_quantizer_internal ] kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) if mask is not None: shape = mask.shape if len(shape) < 2: raise ValueError( "Expected shape to have rank at least 2 but provided shape has" f" rank {len(shape)}" ) h, w = shape[0], shape[1] self._mask = np.reshape( mask, (h, w, 1, 1) ) # Extend the dimension to be 4D. else: self._mask = None super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs ) def convolution_op(self, inputs, kernel): return tf.keras.backend.conv2d( inputs, kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate, ) @tf.function(jit_compile=True) def _jit_compiled_convolution_op(self, inputs, kernel): return self.convolution_op(inputs, kernel) def call(self, inputs): if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel if self._mask is not None: # Apply mask to kernel weights if one is provided. quantized_kernel = quantized_kernel * self._mask # Grouped convolutions are not fully supported on the CPU for compiled # functions. # # This is a workaround taken from TF's core library. Remove when proper # support is added. # See definition of function "_jit_compiled_convolution_op" at # cs/third_party/py/tf_keras/layers/convolutional/base_conv.py for more # details. if self.groups > 1: outputs = self._jit_compiled_convolution_op( inputs, tf.convert_to_tensor(quantized_kernel) ) else: outputs = self.convolution_op(inputs, quantized_kernel) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format ) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = { "kernel_quantizer": constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), "kernel_range": self.kernel_range, "bias_range": self.bias_range, "mask": self._mask.tolist() if self._mask is not None else None, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): mask = config.get("mask") if mask is not None: mask = np.array(mask) config["mask"] = mask return cls(**config) def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation), "filters" : str(self.filters) } def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.kernel] class QConv2DTranspose(Conv2DTranspose, PrunableLayer): """2D convolution layer (e.g. spatial convolution over images).""" # most of these parameters follow the implementation of Conv2DTranspose # in Keras, with the exception of kernel_quantizer and bias_quantizer # and kernel_initializer. # # kernel_quantizer: quantizer function/class for kernel # bias_quantizer: quantizer function/class for bias # # we refer the reader to the documentation of Conv2DTranspose in Keras for # the other parameters. # def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, kernel_quantizer=None, bias_quantizer=None, **kwargs): self.kernel_quantizer = kernel_quantizer self.bias_quantizer = bias_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.kernel_quantizer_internal, self.bias_quantizer_internal ] kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=None, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs ) def call(self, inputs): inputs_shape = array_ops.shape(inputs) batch_size = inputs_shape[0] if self.data_format == 'channels_first': h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = inputs_shape[h_axis], inputs_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = deconv_output_length(height, kernel_h, padding=self.padding, output_padding=out_pad_h, stride=stride_h, dilation=self.dilation_rate[0]) out_width = deconv_output_length(width, kernel_w, padding=self.padding, output_padding=out_pad_w, stride=stride_w, dilation=self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel output_shape_tensor = array_ops.stack(output_shape) outputs = tf.keras.backend.conv2d_transpose( inputs, quantized_kernel, output_shape_tensor, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if not context.executing_eagerly(): # Infer the static output shape: out_shape = self.compute_output_shape(inputs.shape) outputs.set_shape(out_shape) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = { "kernel_quantizer": constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), } base_config = super(QConv2DTranspose, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.kernel] class QSeparableConv1D(SeparableConv1D, PrunableLayer): """Depthwise separable 1D convolution.""" # most of these parameters follow the implementation of SeparableConv1D # in Keras, with the exception of depthwise_quantizer, pointwise_quantizer # and bias_quantizer. # # depthwise_quantizer: quantizer function/class for depthwise spatial kernel # pointwise_quantizer: quantizer function/class for pointwise kernel # bias_quantizer: quantizer function/class for bias # # we refer the reader to the documentation of SeparableConv1D in Keras for # the other parameters. # def __init__(self, filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, depthwise_quantizer=None, pointwise_quantizer=None, bias_quantizer=None, **kwargs): self.depthwise_quantizer = depthwise_quantizer self.pointwise_quantizer = pointwise_quantizer self.bias_quantizer = bias_quantizer self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer) self.pointwise_quantizer_internal = get_quantizer(self.pointwise_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.depthwise_quantizer_internal, "_set_trainable_parameter"): self.depthwise_quantizer_internal._set_trainable_parameter() if hasattr(self.pointwise_quantizer_internal, "_set_trainable_parameter"): self.pointwise_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.depthwise_quantizer_internal, self.pointwise_quantizer_internal, self.bias_quantizer_internal ] depthwise_constraint, depthwise_initializer = ( get_auto_range_constraint_initializer(self.depthwise_quantizer_internal, depthwise_constraint, depthwise_initializer)) pointwise_constraint, pointwise_initializer = ( get_auto_range_constraint_initializer(self.pointwise_quantizer_internal, pointwise_constraint, pointwise_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, depth_multiplier=depth_multiplier, activation=activation, use_bias=use_bias, depthwise_initializer=initializers.get(depthwise_initializer), pointwise_initializer=initializers.get(pointwise_initializer), bias_initializer=initializers.get(bias_initializer), depthwise_regularizer=regularizers.get(depthwise_regularizer), pointwise_regularizer=regularizers.get(pointwise_regularizer), bias_regularizer=regularizers.get(bias_regularizer), activity_regularizer=regularizers.get(activity_regularizer), depthwise_constraint=constraints.get(depthwise_constraint), pointwise_constraint=constraints.get(pointwise_constraint), bias_constraint=constraints.get(bias_constraint), **kwargs ) def call(self, inputs): if self.padding == 'causal': inputs = array_ops.pad(inputs, self._compute_causal_padding()) spatial_start_dim = 1 if self.data_format == 'channels_last' else 2 # Explicitly broadcast inputs and kernels to 4D. inputs = array_ops.expand_dims(inputs, spatial_start_dim) depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0) pointwise_kernel = array_ops.expand_dims(self.pointwise_kernel, 0) dilation_rate = (1,) + self.dilation_rate if self.padding == 'causal': op_padding = 'valid' else: op_padding = self.padding if self.depthwise_quantizer: quantized_depthwise_kernel = self.depthwise_quantizer_internal( depthwise_kernel) else: quantized_depthwise_kernel = depthwise_kernel if self.pointwise_quantizer: quantized_pointwise_kernel = self.pointwise_quantizer_internal( pointwise_kernel) else: quantized_pointwise_kernel = pointwise_kernel outputs = tf.keras.backend.separable_conv2d( inputs, quantized_depthwise_kernel, quantized_pointwise_kernel, strides=self.strides * 2, padding=op_padding, dilation_rate=dilation_rate, data_format=self.data_format) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format) outputs = array_ops.squeeze(outputs, [spatial_start_dim]) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = { "depthwise_quantizer": constraints.serialize( self.depthwise_quantizer_internal# Google internal code, commented out by copybara ), "pointwise_quantizer": constraints.serialize( self.pointwise_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), } base_config = super(QSeparableConv1D, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.depthwise_kernel, self.pointwise_kernel] class QSeparableConv2D(SeparableConv2D, PrunableLayer): """Depthwise separable 2D convolution.""" # most of these parameters follow the implementation of SeparableConv2D # in Keras, with the exception of depthwise_quantizer, pointwise_quantizer # and bias_quantizer. # # depthwise_quantizer: quantizer function/class for depthwise spatial kernel # pointwise_quantizer: quantizer function/class for pointwise kernel # bias_quantizer: quantizer function/class for bias # # we refer the reader to the documentation of SeparableConv2D in Keras for # the other parameters. # def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, depthwise_quantizer=None, pointwise_quantizer=None, bias_quantizer=None, **kwargs): self.depthwise_quantizer = depthwise_quantizer self.pointwise_quantizer = pointwise_quantizer self.bias_quantizer = bias_quantizer self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer) self.pointwise_quantizer_internal = get_quantizer(self.pointwise_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.depthwise_quantizer_internal, "_set_trainable_parameter"): self.depthwise_quantizer_internal._set_trainable_parameter() if hasattr(self.pointwise_quantizer_internal, "_set_trainable_parameter"): self.pointwise_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.depthwise_quantizer_internal, self.pointwise_quantizer_internal, self.bias_quantizer_internal ] depthwise_constraint, depthwise_initializer = ( get_auto_range_constraint_initializer(self.depthwise_quantizer_internal, depthwise_constraint, depthwise_initializer)) pointwise_constraint, pointwise_initializer = ( get_auto_range_constraint_initializer(self.pointwise_quantizer_internal, pointwise_constraint, pointwise_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, depth_multiplier=depth_multiplier, activation=activation, use_bias=use_bias, depthwise_initializer=initializers.get(depthwise_initializer), pointwise_initializer=initializers.get(pointwise_initializer), bias_initializer=initializers.get(bias_initializer), depthwise_regularizer=regularizers.get(depthwise_regularizer), pointwise_regularizer=regularizers.get(pointwise_regularizer), bias_regularizer=regularizers.get(bias_regularizer), activity_regularizer=regularizers.get(activity_regularizer), depthwise_constraint=constraints.get(depthwise_constraint), pointwise_constraint=constraints.get(pointwise_constraint), bias_constraint=constraints.get(bias_constraint), **kwargs ) def call(self, inputs): # Apply the actual ops. if self.depthwise_quantizer: quantized_depthwise_kernel = self.depthwise_quantizer_internal( self.depthwise_kernel) else: quantized_depthwise_kernel = self.depthwise_kernel if self.pointwise_quantizer: quantized_pointwise_kernel = self.pointwise_quantizer_internal( self.pointwise_kernel) else: quantized_pointwise_kernel = self.pointwise_kernel outputs = tf.keras.backend.separable_conv2d( inputs, quantized_depthwise_kernel, quantized_pointwise_kernel, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate, data_format=self.data_format) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = { "depthwise_quantizer": constraints.serialize( self.depthwise_quantizer_internal# Google internal code, commented out by copybara ), "pointwise_quantizer": constraints.serialize( self.pointwise_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), } base_config = super(QSeparableConv2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.depthwise_kernel, self.pointwise_kernel] class QDepthwiseConv2D(DepthwiseConv2D, PrunableLayer): """Creates quantized depthwise conv2d. Copied from mobilenet.""" # most of these parameters follow the implementation of DepthwiseConv2D # in Keras, # with the exception of depthwise_range, bias_range, # depthwise_quantizer # and bias_quantizer, and kernel_initializer. # # depthwise_quantizer: quantizer function/class for kernel # bias_quantizer: quantizer function/class for bias # depthwise_range/bias_ranger: for quantizer functions whose values # can go over [-1,+1], these values are used to set the clipping # value of kernels and biases, respectively, instead of using the # constraints specified by the user. # # we refer the reader to the documentation of DepthwiseConv2D in Keras for the # other parameters. # def __init__(self, kernel_size, strides=(1, 1), padding="VALID", depth_multiplier=1, data_format=None, activation=None, use_bias=True, depthwise_initializer="he_normal", bias_initializer="zeros", depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, dilation_rate=(1, 1), depthwise_quantizer=None, bias_quantizer=None, depthwise_range=None, bias_range=None, **kwargs): if depthwise_range is not None: warnings.warn("depthwise_range is deprecated in QDepthwiseConv2D layer.") if bias_range is not None: warnings.warn("bias_range is deprecated in QDepthwiseConv2D layer.") self.depthwise_range = depthwise_range self.bias_range = bias_range self.depthwise_quantizer = depthwise_quantizer self.bias_quantizer = bias_quantizer self.depthwise_quantizer_internal = get_quantizer(self.depthwise_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.depthwise_quantizer_internal, "_set_trainable_parameter"): self.depthwise_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.depthwise_quantizer_internal, self.bias_quantizer_internal ] depthwise_constraint, depthwise_initializer = ( get_auto_range_constraint_initializer(self.depthwise_quantizer_internal, depthwise_constraint, depthwise_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, activation=activation, use_bias=use_bias, depthwise_regularizer=depthwise_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, depth_multiplier=depth_multiplier, depthwise_initializer=depthwise_initializer, bias_initializer=bias_initializer, depthwise_constraint=depthwise_constraint, bias_constraint=bias_constraint, dilation_rate=dilation_rate, **kwargs ) def build(self, input_shape): if len(input_shape) < 4: raise ValueError( "Inputs to `QDepthwiseConv2D` should have rank 4. " "Received input shape:", str(input_shape)) if self.data_format == "channels_first": channel_axis = 1 else: channel_axis = 3 if input_shape[channel_axis] is None: raise ValueError("The channel dimension of the inputs to " "`QDepthwiseConv2D` " "should be defined. Found `None`.") input_dim = int(input_shape[channel_axis]) depthwise_kernel_shape = (self.kernel_size[0], self.kernel_size[1], input_dim, self.depth_multiplier) self.depthwise_kernel = self.add_weight( shape=depthwise_kernel_shape, initializer=self.depthwise_initializer, name="depthwise_kernel", regularizer=self.depthwise_regularizer, constraint=self.depthwise_constraint) if self.use_bias: self.bias = self.add_weight( shape=(input_dim * self.depth_multiplier,), initializer=self.bias_initializer, name="bias", regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) self.built = True def call(self, inputs, training=None): if self.depthwise_quantizer: quantized_depthwise_kernel = ( self.depthwise_quantizer_internal(self.depthwise_kernel)) else: quantized_depthwise_kernel = self.depthwise_kernel outputs = tf.keras.backend.depthwise_conv2d( inputs, quantized_depthwise_kernel, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate, data_format=self.data_format) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs def get_config(self): config = super(QDepthwiseConv2D, self).get_config() config.pop("filters", None) config.pop("kernel_initializer", None) config.pop("kernel_regularizer", None) config.pop("kernel_constraint", None) config["depth_multiplier"] = self.depth_multiplier config["depthwise_initializer"] = initializers.serialize( self.depthwise_initializer# Google internal code, commented out by copybara ) config["depthwise_regularizer"] = regularizers.serialize( self.depthwise_regularizer# Google internal code, commented out by copybara ) config["depthwise_constraint"] = constraints.serialize( self.depthwise_constraint# Google internal code, commented out by copybara ) config["depthwise_quantizer"] = constraints.serialize( self.depthwise_quantizer_internal# Google internal code, commented out by copybara ) config["bias_quantizer"] = constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ) config["depthwise_range"] = self.depthwise_range config["bias_range"] = self.bias_range return config def get_quantization_config(self): return { "depthwise_quantizer_internal": str(self.depthwise_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation), "filters" : str(self.filters) } def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.depthwise_kernel] def QMobileNetSeparableConv2D( filters, # pylint: disable=invalid-name kernel_size, strides=(1, 1), padding="VALID", dilation_rate=(1, 1), depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer="he_normal", pointwise_initializer="he_normal", bias_initializer="zeros", depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, depthwise_quantizer=None, pointwise_quantizer=None, bias_quantizer=None, depthwise_activation=None, depthwise_range=None, pointwise_range=None, bias_range=None, depthwise_dropout_rate=0.0, pw_first=False, name=""): """Adds a quantized separableconv2d.""" # we use here a modified version that appeared in mobilenet that adds # quantization to the network, and possibly an intermediate activation # layer that acts as a quantizer and possible dropout layer between # the depthwise and pointwise convolutions. # # since this implementation expands into depthwise -> pointwise # convolutions, the users will not see a separable convolution operation # in model.summary(), but rather a depthwise convolution followed by a # pointwise convolution. # # depthwise_quantizer: depthwise quantization function # pointwise_quantizer: pointwise quantization function # bias_quantizer: bias quantization function for the pointwise convolution # depthwise_range/pointwise_range/bias_range: ranges to be used if # quantization values can become greater than -1 and +1. # depthwise_dropout_rate: dropout between depthwise and pointwise is added # if rate > 0.0 # pw_first: this may disappear in the future, but as deep quantized networks # sometimes behave in different ways, if we are using binary or ternary # quantization, it may be better to apply pointwise before depthwise. # # For the remaining parameters, please refer to Keras implementation of # SeparableConv2D. # def _call(inputs): # pylint: disable=invalid-name """Internally builds qseparableconv2d.""" x = inputs if pw_first: x = QConv2D( filters, (1, 1), strides=(1, 1), padding="same", use_bias=use_bias, kernel_constraint=pointwise_constraint, kernel_initializer=pointwise_initializer, kernel_regularizer=pointwise_regularizer, kernel_quantizer=pointwise_quantizer, bias_quantizer=bias_quantizer, bias_regularizer=bias_regularizer, bias_initializer=bias_initializer, bias_constraint=bias_constraint, activity_regularizer=activity_regularizer, kernel_range=pointwise_range, bias_range=bias_range, name=name + "_pw")( x) if depthwise_activation: if isinstance(depthwise_activation, QActivation): x = depthwise_activation(x) else: x = QActivation(depthwise_activation, name=name + "_dw_act")(x) if depthwise_dropout_rate > 0.0: x = Dropout(rate=depthwise_dropout_rate, name=name + "_dw_dropout")(x) x = QDepthwiseConv2D( kernel_size, strides=strides, dilation_rate=dilation_rate, padding=padding, depth_multiplier=depth_multiplier, use_bias=False, depthwise_regularizer=depthwise_regularizer, depthwise_initializer=depthwise_initializer, depthwise_constraint=depthwise_constraint, depthwise_quantizer=depthwise_quantizer, depthwise_range=depthwise_range, name=name + "_dw")( x) if not pw_first: if depthwise_activation: if isinstance(depthwise_activation, QActivation): x = depthwise_activation(x) else: x = QActivation(depthwise_activation, name=name + "_dw_act")(x) if depthwise_dropout_rate > 0.0: x = Dropout(rate=depthwise_dropout_rate, name=name + "_dw_dropout")(x) x = QConv2D( filters, (1, 1), strides=(1, 1), padding="same", use_bias=use_bias, kernel_constraint=pointwise_constraint, kernel_initializer=pointwise_initializer, kernel_regularizer=pointwise_regularizer, kernel_quantizer=pointwise_quantizer, bias_quantizer=bias_quantizer, bias_regularizer=bias_regularizer, bias_initializer=bias_initializer, bias_constraint=bias_constraint, activity_regularizer=activity_regularizer, kernel_range=pointwise_range, bias_range=bias_range, name=name + "_pw")( x) if activation: if isinstance(activation, QActivation): x = activation(x) else: x = Activation(activation, name=name + "_pw_act")(x) return x return _call ================================================ FILE: qkeras/qdepthwise_conv2d_transpose.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import tensorflow as tf from tensorflow.keras.layers import Conv2DTranspose from tensorflow.keras.layers import InputSpec from .qconvolutional import deconv_output_length from .quantizers import get_quantizer from tensorflow.python.eager import context from tensorflow.python.keras import constraints from tensorflow.python.ops import array_ops from tensorflow.python.ops import array_ops # TODO(akshayap): Commonized functionality with QSeparableConv2DTranspose. class QDepthwiseConv2DTranspose(Conv2DTranspose): """Quantized Depthwise Conv2DTranspose layer.""" # Most of these parameters follow the implementation of Conv2DTranspose # in Keras, with the exception of following parameters. # # depthwise_activation: activation quantizer for depthwise convolution # depthwise_kernel_quantizer: quantizer function/class for depthwise kernel # bias_quantizer: quantizer function/class for bias # # we refer the reader to the documentation of Conv2DTranspose in Keras for # the other parameters. def __init__( self, filters, kernel_size, group_size=1, strides=(1, 1), padding="valid", output_padding=None, depth_multiplier=1, depthwise_activation=None, use_bias=True, depthwise_kernel_quantizer=None, bias_quantizer=None, **kwargs, ): self.filters = filters self.kernel_size = kernel_size self.strides = strides self.padding = padding self.output_padding = output_padding self.depth_multiplier = depth_multiplier self.depthwise_activation = depthwise_activation self.use_bias = use_bias self.group_size = group_size self.depthwise_kernel_quantizer = depthwise_kernel_quantizer self.bias_quantizer = bias_quantizer self.depthwise_kernel_quantizer_internal = get_quantizer( self.depthwise_kernel_quantizer ) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible for q in [ self.depthwise_kernel_quantizer_internal, ]: if hasattr(q, "_set_trainable_parameter"): q._set_trainable_parameter() if depthwise_activation is not None: self.depthwise_activation = get_quantizer(depthwise_activation) super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=use_bias, **kwargs, ) def _get_input_axis(self): if self.data_format == "channels_first": b_axis, c_axis, h_axis, w_axis = 0, 1, 2, 3 else: b_axis, c_axis, h_axis, w_axis = 0, 3, 1, 2 return b_axis, c_axis, h_axis, w_axis def _get_input_dims(self, input_shape): b_axis, c_axis, h_axis, w_axis = self._get_input_axis() return ( input_shape[b_axis], input_shape[c_axis], input_shape[h_axis], input_shape[w_axis], ) def _get_output_size( self, inputs, output_padding, padding, strides, dilation_rate, kernel_h, kernel_w, ): input_shape = array_ops.shape(inputs) batch_size, _, height, width = self._get_input_dims(input_shape) stride_h, stride_w = strides dilation_h, dilation_w = dilation_rate[0], dilation_rate[1] if output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = output_padding # Infer the dynamic output shape: out_height = deconv_output_length( height, kernel_h, padding=padding, output_padding=out_pad_h, stride=stride_h, dilation=dilation_h, ) out_width = deconv_output_length( width, kernel_w, padding=padding, output_padding=out_pad_w, stride=stride_w, dilation=dilation_w, ) return (batch_size, out_height, out_width) def build(self, input_shape): self._input_shape = input_shape _, input_channel, _, _ = self._get_input_dims(input_shape) channel_axis = self._get_input_axis()[1] self.input_spec = InputSpec( min_ndim=self.rank + 2, axes={channel_axis: input_channel} ) # When setting kernel shape=(kw, kh, 1, input_channel), it does depthwise # convolution. depthwise_kernel_shape = self.kernel_size + ( input_channel, self.group_size, ) self.depthwise_kernel = self.add_weight( name=f"depthwise_kernel", shape=depthwise_kernel_shape, initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, trainable=True, dtype=self.dtype, ) if self.use_bias: self.bias = self.add_weight( name="bias", shape=(self.filters,), initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, trainable=True, dtype=self.dtype, ) else: self.bias = None self.built = True def compute_final_output_shape(self, input_shape, kernel_size, strides): input_shape = tf.TensorShape(input_shape).as_list() # By using list(), output_shape is a copy of input_shape, instead of a # reference to input_shape. output_shape = list(input_shape) _, c_axis, h_axis, w_axis = self._get_input_axis() kernel_h, kernel_w = kernel_size stride_h, stride_w = strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Convolution is performed separately on each spatial domain. output_shape[c_axis] = input_shape[c_axis] output_shape[h_axis] = deconv_output_length( output_shape[h_axis], kernel_h, padding=self.padding, output_padding=out_pad_h, stride=stride_h, dilation=self.dilation_rate[0], ) output_shape[w_axis] = deconv_output_length( output_shape[w_axis], kernel_w, padding=self.padding, output_padding=out_pad_w, stride=stride_w, dilation=self.dilation_rate[1], ) return tf.TensorShape(output_shape) def conv_transpose_op( self, inputs, filters, strides, padding, output_padding, dilation_rate, kernel_quantizer, kernel_weights, use_bias, bias_quantizer, bias, activation, ): """Transpose convolution operation.""" kernel_h, kernel_w = self.kernel_size batch_size, out_height, out_width = self._get_output_size( inputs, output_padding, padding, strides, dilation_rate, kernel_h, kernel_w, ) if kernel_quantizer: quantized_kernel = kernel_quantizer(kernel_weights) else: quantized_kernel = kernel_weights output_filters = self.group_size if self.data_format == "channels_first": output_shape = (batch_size, output_filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, output_filters) output_shape_tensor = array_ops.stack(output_shape) num_input_channels = self._input_shape[-1] if num_input_channels % self.group_size: raise ValueError( "Input channels should be exactly divisible by group_size." ) num_output_groups = num_input_channels // self.group_size # Split the input channels into groups. x = tf.split(inputs, num_output_groups, axis=-1) # For depthwise convolution, since CPU doesn't support grouped # convolution, we run convolution on each slice of inputs and concat # the results. outputs = [ tf.keras.backend.conv2d_transpose( x=x[i], kernel=quantized_kernel[ :, :, self.group_size * i : self.group_size * (i + 1), :, ], output_shape=output_shape_tensor, strides=strides, padding=padding, data_format=self.data_format, dilation_rate=dilation_rate, ) for i in range(num_output_groups) ] # Concat the channels. outputs = tf.concat(outputs, axis=-1) if not context.executing_eagerly(): # Infer the static output shape: out_shape = self.compute_final_output_shape( input_shape=inputs.shape, kernel_size=(kernel_h, kernel_w), strides=strides, ) outputs.set_shape(out_shape) if use_bias: quantized_bias = bias_quantizer(bias) if bias_quantizer else bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format ) if activation is not None: return activation(outputs) return outputs def call(self, inputs): input_shape = array_ops.shape(inputs) _, input_channel, _, _ = self._get_input_dims(input_shape) return self.conv_transpose_op( inputs=inputs, # Depthwise convolution doesn't operate across channels. Thereofore its # output channels is the same as input channels. filters=input_channel, strides=self.strides, padding=self.padding, output_padding=self.output_padding, dilation_rate=self.dilation_rate, kernel_quantizer=self.depthwise_kernel_quantizer_internal, kernel_weights=self.depthwise_kernel, use_bias=False, # Usually set bias=False for depthwise conv. bias_quantizer=None, bias=None, activation=self.depthwise_activation, ) def get_config(self): config = super().get_config() config.update({ "filters": self.filters, "kernel_size": self.kernel_size, "strides": self.strides, "padding": self.padding, "output_padding": self.output_padding, "dilation_rate": self.dilation_rate, "data_format": self.data_format, "depth_multiplier": self.depth_multiplier, "activation": self.activation, "use_bias": self.use_bias, "depthwise_kernel_quantizer": constraints.serialize( self.depthwise_kernel_quantizer_internal ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal, ), "group_size": self.group_size, }) return config def get_quantizers(self): return [ self.depthwise_kernel_quantizer_internal, self.bias_quantizer_internal, self.depthwise_activation, ] def get_prunable_weights(self): w = [self.depthwise_kernel] if self.use_bias: w.append(self.bias) return w ================================================ FILE: qkeras/qdepthwiseconv2d_batchnorm.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Fold batchnormalization with previous QDepthwiseConv2D layers.""" import tensorflow as tf from tensorflow.keras import layers from tensorflow.keras.models import Model from .qconvolutional import QDepthwiseConv2D from .quantizers import * from tensorflow.python.framework import smart_cond as tf_utils from tensorflow.python.ops import math_ops from tensorflow.python.ops import array_ops tf.compat.v2.enable_v2_behavior() class QDepthwiseConv2DBatchnorm(QDepthwiseConv2D): """Fold batchnormalization with a previous QDepthwiseConv2d layer.""" def __init__( self, # QDepthwiseConv2d params kernel_size, strides=(1, 1), padding="VALID", depth_multiplier=1, data_format=None, activation=None, use_bias=True, depthwise_initializer="he_normal", bias_initializer="zeros", depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, dilation_rate=(1, 1), depthwise_quantizer=None, bias_quantizer=None, depthwise_range=None, bias_range=None, # batchnorm params axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, virtual_batch_size=None, adjustment=None, # other params ema_freeze_delay=None, folding_mode="ema_stats_folding", **kwargs): """A composite layer that folds depthwiseconv2d and batch normalization. The first group of parameters correponds to the initialization parameters of a QDepthwiseConv2d layer. check qkeras.qconvolutional.QDepthwiseConv2D for details. The 2nd group of parameters corresponds to the initialization parameters of a BatchNormalization layer. Check keras.layers.normalization.BatchNorma lizationBase for details. The 3rd group of parameters corresponds to the initialization parameters specific to this class. ema_freeze_delay: int or None. number of steps before batch normalization mv_mean and mv_variance will be frozen and used in the folded layer. folding_mode: string "ema_stats_folding": mimic tflite which uses the ema statistics to fold the kernel to suppress quantization induced jitter then performs the correction to have a similar effect of using the current batch statistics. "batch_stats_folding": use batch mean and variance to fold kernel first; after enough training steps switch to moving_mean and moving_variance for kernel folding. """ # intialization the QDepthwiseConv2d part of the composite layer super().__init__( kernel_size=kernel_size, strides=strides, padding=padding, depth_multiplier=depth_multiplier, data_format=data_format, activation=activation, use_bias=use_bias, depthwise_initializer=depthwise_initializer, bias_initializer=bias_initializer, depthwise_regularizer=depthwise_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, depthwise_constraint=depthwise_constraint, bias_constraint=bias_constraint, dilation_rate=dilation_rate, depthwise_quantizer=depthwise_quantizer, bias_quantizer=bias_quantizer, depthwise_range=depthwise_range, bias_range=bias_range, **kwargs ) # initialization of batchnorm part of the composite layer self.batchnorm = layers.BatchNormalization( axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, moving_mean_initializer=moving_mean_initializer, moving_variance_initializer=moving_variance_initializer, beta_regularizer=beta_regularizer, gamma_regularizer=gamma_regularizer, beta_constraint=beta_constraint, gamma_constraint=gamma_constraint, renorm=renorm, renorm_clipping=renorm_clipping, renorm_momentum=renorm_momentum, fused=fused, trainable=trainable, virtual_batch_size=virtual_batch_size, adjustment=adjustment) self.ema_freeze_delay = ema_freeze_delay assert folding_mode in ["ema_stats_folding", "batch_stats_folding"] self.folding_mode = folding_mode def build(self, input_shape): super(QDepthwiseConv2DBatchnorm, self).build(input_shape) # If start training from scratch, self._iteration (i.e., training_steps) # is initialized with -1. When loading ckpt, it can load the number of # training steps that have been previously trainied. # TODO(lishanok): develop a way to count iterations outside layer self._iteration = tf.Variable(-1, trainable=False, name="iteration", dtype=tf.int64) def call(self, inputs, training=None): # numpy value, mark the layer is in training training = self.batchnorm._get_training_value(training) # pylint: disable=protected-access # checking if to update batchnorm params if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0): # if ema_freeze_delay is None or a negative value, do not freeze bn stats bn_training = tf.cast(training, dtype=bool) else: bn_training = tf.math.logical_and(training, tf.math.less_equal( self._iteration, self.ema_freeze_delay)) depthwise_kernel = self.depthwise_kernel # run depthwise_conv2d to produce output for the following batchnorm conv_outputs = tf.keras.backend.depthwise_conv2d( inputs, depthwise_kernel, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate, data_format=self.data_format) if self.use_bias: bias = self.bias conv_outputs = tf.keras.backend.bias_add( conv_outputs, bias, data_format=self.data_format) else: bias = 0 _ = self.batchnorm(conv_outputs, training=bn_training) self._iteration.assign_add(tf_utils.smart_cond( training, lambda: tf.constant(1, tf.int64), lambda: tf.constant(0, tf.int64))) # calcuate mean and variance from current batch bn_shape = conv_outputs.shape ndims = len(bn_shape) reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis] keep_dims = len(self.batchnorm.axis) > 1 mean, variance = self.batchnorm._moments( # pylint: disable=protected-access math_ops.cast(conv_outputs, self.batchnorm._param_dtype), # pylint: disable=protected-access reduction_axes, keep_dims=keep_dims) gamma = self.batchnorm.gamma beta = self.batchnorm.beta moving_mean = self.batchnorm.moving_mean moving_variance = self.batchnorm.moving_variance if self.folding_mode not in ["batch_stats_folding", "ema_stats_folding"]: assert ValueError("mode {} not supported!".format(self.folding_mode)) mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon) batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon) if gamma is not None: mv_inv *= gamma batch_inv *= gamma folded_bias = tf_utils.smart_cond( bn_training, lambda: batch_inv * (bias - mean) + beta, lambda: mv_inv * (bias - moving_mean) + beta) if self.folding_mode == "batch_stats_folding": # using batch mean and variance in the initial training stage # after sufficient training, switch to moving mean and variance inv = tf_utils.smart_cond(bn_training, lambda: batch_inv, lambda: mv_inv) elif self.folding_mode == "ema_stats_folding": # We always scale the weights with a correction factor to the long term # statistics prior to quantization. This ensures that there is no jitter # in the quantized weights due to batch to batch variation. During the # initial phase of training, we undo the scaling of the weights so that # outputs are identical to regular batch normalization. We also modify # the bias terms correspondingly. After sufficient training, switch from # using batch statistics to long term moving averages for batch # normalization. # use batch stats for calcuating bias before bn freeze, and use moving # stats after bn freeze # moving stats is always used to fold kernel in tflite; before bn freeze # an additional correction factor will be applied to the depthwiseconv2d # output inv = mv_inv # for DepthwiseConv2D inv needs to be broadcasted to the last 2 dimensions # of the kernels depthwise_weights_shape = [ depthwise_kernel.get_shape().as_list()[2], depthwise_kernel.get_shape().as_list()[3] ] inv = array_ops.reshape(inv, depthwise_weights_shape) # wrap conv kernel with bn parameters folded_depthwise_kernel = inv * depthwise_kernel # quantize the folded kernel if self.depthwise_quantizer is not None: q_folded_depthwise_kernel = self.depthwise_quantizer_internal( folded_depthwise_kernel) else: q_folded_depthwise_kernel = folded_depthwise_kernel # If loaded from a ckpt, bias_quantizer is the ckpt value # Else if bias_quantizer not specified, bias # quantizer is None and we need to calculate bias quantizer # type according to accumulator type. User can call # bn_folding_utils.populate_bias_quantizer_for_folded_layers( # model, input_quantizer_list]) to populate such bias quantizer. if self.bias_quantizer is not None: q_folded_bias = self.bias_quantizer_internal(folded_bias) else: q_folded_bias = folded_bias applied_kernel = q_folded_depthwise_kernel applied_bias = q_folded_bias # calculate depthwise_conv2d output using the quantized folded kernel folded_outputs = tf.keras.backend.depthwise_conv2d( inputs, applied_kernel, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate, data_format=self.data_format) if training is True and self.folding_mode == "ema_stats_folding": batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon) y_corr = tf_utils.smart_cond( bn_training, lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) * math_ops.rsqrt(variance + self.batchnorm.epsilon)), lambda: tf.constant(1.0, shape=moving_variance.shape)) folded_outputs = math_ops.mul(folded_outputs, y_corr) folded_outputs = tf.keras.backend.bias_add( folded_outputs, applied_bias, data_format=self.data_format) if self.activation is not None: return self.activation(folded_outputs) return folded_outputs def get_config(self): base_config = super().get_config() bn_config = self.batchnorm.get_config() config = {"ema_freeze_delay": self.ema_freeze_delay, "folding_mode": self.folding_mode} name = base_config["name"] out_config = dict( list(base_config.items()) + list(bn_config.items()) + list(config.items())) # names from different config override each other; use the base layer name # as the this layer's config name out_config["name"] = name return out_config def get_quantization_config(self): return { "depthwise_quantizer": str(self.depthwise_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation), "filters": str(self.filters) } def get_quantizers(self): return self.quantizers def get_folded_weights(self): """Function to get the batchnorm folded weights. This function converts the weights by folding batchnorm parameters into the weight of QDepthwiseConv2d. The high-level equation: W_fold = gamma * W / sqrt(variance + epsilon) bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta """ depthwise_kernel = self.depthwise_kernel if self.use_bias: bias = self.bias else: bias = 0 # get Batchnorm stats gamma = self.batchnorm.gamma beta = self.batchnorm.beta moving_mean = self.batchnorm.moving_mean moving_variance = self.batchnorm.moving_variance # get the inversion factor so that we replace division by multiplication inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon) if gamma is not None: inv *= gamma # fold bias with bn stats folded_bias = inv * (bias - moving_mean) + beta # for DepthwiseConv2D inv needs to be broadcasted to the last 2 dimensions # of the kernels depthwise_weights_shape = [ depthwise_kernel.get_shape().as_list()[2], depthwise_kernel.get_shape().as_list()[3] ] inv = array_ops.reshape(inv, depthwise_weights_shape) # wrap conv kernel with bn parameters folded_depthwise_kernel = inv * depthwise_kernel return [folded_depthwise_kernel, folded_bias] ================================================ FILE: qkeras/qlayers.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ============================================================================== """Definition of quantization package.""" # Some parts of the code were adapted from # # https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow # # "Copyright (c) 2017, Bert Moons" where it applies # # and were implemented following several papers. # # https://arxiv.org/pdf/1609.07061.pdf # https://arxiv.org/abs/1602.02830 # https://arxiv.org/abs/1603.05279 # https://arxiv.org/abs/1605.04711 # https://ieeexplore.ieee.org/abstract/document/6986082 # https://ieeexplore.ieee.org/iel4/78/5934/00229903.pdf # from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import warnings import numpy as np import six import tensorflow.compat.v2 as tf from tensorflow.keras import activations from tensorflow.keras import constraints from tensorflow.keras import initializers from tensorflow.keras import regularizers import tensorflow.keras.backend as K from tensorflow.keras.constraints import Constraint from tensorflow.keras.initializers import Initializer from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Layer from tensorflow.python.framework import smart_cond as tf_utils from .quantizers import * from .quantizers import _get_integer_bits from .quantizers import get_quantizer from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer def get_auto_range_constraint_initializer(quantizer, constraint, initializer): """Get value range automatically for quantizer. Arguments: quantizer: A quantizer class in quantizers.py. constraint: A tf.keras constraint. initializer: A tf.keras initializer. Returns: a tuple (constraint, initializer), where constraint is clipped by Clip class in this file, based on the value range of quantizer. initializer is initializer contraint by value range of quantizer. """ if quantizer is not None: constraint = get_constraint(constraint, quantizer) initializer = get_initializer(initializer) if initializer and initializer.__class__.__name__ not in ["Ones", "Zeros", 'QInitializer']: # we want to get the max value of the quantizer that depends # on the distribution and scale if not (hasattr(quantizer, "alpha") and isinstance(quantizer.alpha, six.string_types)): initializer = QInitializer( initializer, use_scale=True, quantizer=quantizer) return constraint, initializer class QInitializer(Initializer): """Wraps around Keras initializer to provide a fanin scaling factor.""" def __init__(self, initializer, use_scale, quantizer): self.initializer = initializer self.use_scale = use_scale self.quantizer = quantizer try: self.is_po2 = "po2" in quantizer.__class__.__name__ except: self.is_po2 = False def __call__(self, shape, dtype=None): x = self.initializer(shape, dtype) max_x = np.max(abs(x)) std_x = np.std(x) delta = self.quantizer.max() * 2**-self.quantizer.bits # delta is the minimum resolution of the number system. # we want to make sure we have enough values. if delta > std_x and hasattr(self.initializer, "scale"): q = self.quantizer(x) max_q = np.max(abs(q)) scale = 1.0 if max_q == 0.0: xx = np.mean(x * x) scale = self.quantizer.max() / np.sqrt(xx) else: qx = np.sum(q * x) qq = np.sum(q * q) scale = qq / qx self.initializer.scale *= max(scale, 1) x = self.initializer(shape, dtype) return np.clip(x, -self.quantizer.max(), self.quantizer.max()) def get_config(self): return { "initializer": self.initializer, "use_scale": self.use_scale, "quantizer": self.quantizer, } @classmethod def from_config(cls, config): config = { 'initializer' : get_initializer(config['initializer']), 'use_scale' : config['use_scale'], 'quantizer' : get_quantizer(config['quantizer'])} return cls(**config) # # Because it may be hard to get serialization from activation functions, # we may be replacing their instantiation by QActivation in the future. # class QActivation(Layer, PrunableLayer): """Implements quantized activation layers.""" # TODO(lishanok): Implement activation type conversion outside of the class. # When caller calls the initializer, it should convert string to a quantizer # object if string is given as activation. def __init__(self, activation, **kwargs): super().__init__(**kwargs) self.activation = activation if not isinstance(activation, six.string_types): self.quantizer = activation if hasattr(self.quantizer, "__name__"): self.__name__ = self.quantizer.__name__ elif hasattr(self.quantizer, "name"): self.__name__ = self.quantizer.name elif hasattr(self.quantizer, "__class__"): self.__name__ = self.quantizer.__class__.__name__ return self.__name__ = activation try: self.quantizer = get_quantizer(activation) except KeyError: raise ValueError("invalid activation '{}'".format(activation)) def call(self, inputs): return self.quantizer(inputs) def get_config(self): config = {"activation": self.activation} base_config = super(QActivation, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): try: if isinstance(config["activation"], dict): # If config["activation"] is serialized, it would be a dict. # Otherwise, it will be either string or quantizer object, which # doesn't require deserialization. config["activation"] = activations.deserialize(config["activation"]) return cls(**config) except Exception as e: raise TypeError( f"Error when deserializing class '{cls.__name__}' using " f"config={config}.\n\nException encountered: {e}" ) def get_quantization_config(self): return str(self.activation) def compute_output_shape(self, input_shape): return input_shape def get_prunable_weights(self): return [] class QAdaptiveActivation(Layer, PrunableLayer): """[EXPERIMENTAL] Implements an adaptive quantized activation layer using EMA. This layer calculates an exponential moving average of min and max of the activation values to automatically determine the scale (integer bits) of the quantizer used in this layer. """ def __init__(self, activation, total_bits, current_step=None, symmetric=True, quantization_delay=0, ema_freeze_delay=None, ema_decay=0.9999, per_channel=False, po2_rounding=False, relu_neg_slope=0.0, relu_upper_bound=None, **kwargs): """Initializes this QAdaptiveActivation layer. Args: activation: Str. The activation quantizer type to use for this activation layer, such as 'quantized_relu'. Should be a string with no params. total_bits: Int. The total bits that can be used by the quantizer current_step: tf.Variable specifying the current step in training. You can find this by passing model.optimizer.iterations (see tf.keras.optimizers.Optimizer.iterations). If set to None, the layer will attempt to estimate the current step itself, but please note that this number may not always match the optimizer step. symmetric: Bool. If to enforce symmetry about the origin in the quantized bit representation of the value. When using linear activation, this should be True for best results. quantization_delay: Int. How many training steps to wait until quantizing the activation values. ema_freeze_delay: Int. Steps to wait until stopping the update of the exponential moving average values. Set to None for an infinite delay. ema_decay: Float. The decay value used for exponential moving average (see tf.keras.backend.moving_average_update) per_channel: Bool. If to quantize the activation values on a per-channel basis. po2_rounding: Bool. If true, the EMA max value is rounded to the nearest power-of-2. If false, the EMA max value is rounded up (with ceil) to a power-of-two. These power-of-two operations are necessary to calculate the number of integer bits used in the quantizer, and the difference between using round and ceil trade off the quantizer's range and precision. relu_neg_slope: Float. Slope of the negative values in relu to enable the use of leaky relu. This parameter will only be used with the quantizer type quantized_relu. Set to 0.0 to use normal relu. relu_upper_bound: Float. The upper bound to use if the activation is set to relu. Set to None to not artificially set an upper bound. Pease note that this param is ignored if the activation is not quantized_relu **kwargs: Args passed to the Layer class. """ super().__init__(**kwargs) self.total_bits = total_bits self.symmetric = symmetric self.is_estimating_step_count = False # If the layer should estimate its # own step count by incrementing it # every call. if isinstance(current_step, tf.Variable): self.step = current_step elif current_step is None: self.step = tf.Variable(-1, dtype=tf.int64) self.is_estimating_step_count = True print("[WARNING] QAdaptiveActivation is estimating it's own training " "step count, which may not always be the same as the true optimizer" " training step. To mitigate this, please set the current_step " "parameter when initializing QAdaptiveActivation", file=sys.stderr) else: self.step = tf.Variable(current_step, dtype=tf.int64) print("[WARNING] QAdaptiveActivation is disconnected from the optimizer " "current step, which may lead to incorrect training. If you wish to" " resume training, set this layer's self.step to the optimizer's " "tf.Variable current step", file=sys.stderr) self.quantization_delay = quantization_delay self.ema_freeze_delay = ema_freeze_delay self.will_ema_freeze = True if ema_freeze_delay else False self.ema_decay = ema_decay self.per_channel = per_channel self.po2_rounding = po2_rounding self.ema_min = None self.ema_max = None self.relu_neg_slope = relu_neg_slope self.relu_upper_bound = relu_upper_bound # Verify quantizer type is correct self.supported_quantizers = ["quantized_bits", "quantized_relu"] if activation not in self.supported_quantizers: raise ValueError(("Invalid activation {}. Activation quantizer may NOT " "contain any parameters (they will be set automatically" " by this layer), and only the quantizer types {} are " "supported.").format(activation, self.supported_quantizers)) # Get the quantizer associated with the activation try: self.quantizer = get_quantizer(activation) except KeyError: raise ValueError("Invalid activation '{}'".format(activation)) # Check that the quantizer is supported if self.quantizer.__class__.__name__ not in self.supported_quantizers: raise ValueError("Unsupported activation quantizer '{}'".format( self.quantizer.__class__.__name__)) # Set keep_negative if self.quantizer.__class__.__name__ == "quantized_relu": self.quantizer.is_quantized_clip = False # Use relu_upper_bound instead if self.relu_upper_bound: self.quantizer.relu_upper_bound = self.relu_upper_bound self.quantizer.negative_slope = relu_neg_slope self.keep_negative = relu_neg_slope != 0.0 self.quantizer.is_quantized_clip = False # Use normal relu when qnoise=0 elif self.quantizer.__class__.__name__ == "quantized_bits": self.keep_negative = True self.quantizer.keep_negative = True # If not using quantization delay, then print warning if self.quantization_delay < 1: print("[WARNING] If QAdaptiveActivation has the quantization_delay set " "to 0, then the moving averages will be heavily biased towards the " "initial quantizer configuration, which will likely prevent the " "model from converging. Consider a larger quantization_delay.", file=sys.stderr) self.activation = self.quantizer # self.activation is used by QTools def build(self, input_shape): if self.will_ema_freeze: self.ema_freeze_delay = tf.constant(self.ema_freeze_delay, dtype=tf.int64) self.ema_decay = tf.constant(self.ema_decay, dtype=tf.float32) self.is_estimating_step_count = tf.constant(self.is_estimating_step_count, dtype=tf.bool) # Calculate the number of channels channel_index = -1 if K.image_data_format() == "channels_last" else 1 if self.per_channel: input_shape_list = list(input_shape) if isinstance( input_shape, tuple) else input_shape.as_list() num_channels = tf.constant(input_shape_list[channel_index], shape=(1), dtype=tf.int64) else: num_channels = tf.constant(1, shape=(1), dtype=tf.int64) # Initialize the moving mins and max if self.ema_min is None or self.ema_max is None: self.ema_min = tf.Variable(tf.zeros(num_channels), name="ema_min", trainable=False) self.ema_max = tf.Variable(tf.zeros(num_channels), name="ema_max", trainable=False) # Determine the parameters for the quantizer self.quantizer.bits = self.total_bits # Set up the initial integer bits and quantizer params self.quantizer.integer = tf.Variable(tf.zeros(num_channels, dtype=tf.int32), name="quantizer_integer_bits", trainable=False) integer_bits = _get_integer_bits(min_value=self.ema_min, max_value=self.ema_max, bits=self.total_bits, symmetric=self.symmetric, keep_negative=self.keep_negative, is_clipping=self.po2_rounding) self.quantizer.integer.assign(integer_bits) self.quantizer.alpha = 1.0 # Setting alpha to 1.0 allows the integer bits # to serve as the scale self.quantizer.symmetric = self.symmetric self.quantization_delay = tf.constant(self.quantization_delay, dtype=tf.int64) def call(self, inputs, training=False): x = inputs training = training and self.trainable self.will_ema_freeze = self.will_ema_freeze and self.trainable # Update the step count if the optimizer step count is unknown self.step.assign_add(K.switch( tf.math.logical_and(self.is_estimating_step_count, training), tf.constant(1, tf.int64), tf.constant(0, tf.int64))) # Perform the quantization if training: # Calculate the qnoise, a scalar from 0 to 1 that represents the level of # quantization noise to use. At training start, we want no quantization, # so qnoise_factor = 0.0. After quantization_delay steps, we want normal # quantization, so qnoise_factor = 1.0. qnoise_factor = K.switch( tf.greater_equal(self.step, self.quantization_delay), lambda: tf.constant(1.0), lambda: tf.constant(0.0)) self.quantizer.update_qnoise_factor(qnoise_factor) qx = self.quantizer(x) else: # If not training, we always want to use full quantization self.quantizer.update_qnoise_factor(tf.constant(1.0)) qx = self.quantizer(x) # Calculate the axis along where to find the min and max EMAs len_axis = len(x.shape) if len_axis > 1: if self.per_channel: if K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) else: axis = list(range(len_axis)) else: axis = [0] # Determine if freezing the EMA is_ema_training = tf.constant(training, dtype=tf.bool) if self.will_ema_freeze: is_ema_training = tf.cond( tf.greater(self.step, self.ema_freeze_delay), lambda: tf.constant(False), lambda: tf.constant(True)) def update_branch(): """ Update the moving average when is_ema_training is True.""" # Set the qnoise factor to 0 to update the EMA using the unquantized input prev_qnoise_factor = tf.identity(self.quantizer.qnoise_factor) self.quantizer.update_qnoise_factor(tf.constant(0.0)) # Update the EMA act_x = self.quantizer(x) # act_x is the input after the activation # function, but before the quantizer. This is # done by using a qnoise_factor of 0 new_min = tf.squeeze(K.min(act_x, axis=axis, keepdims=True)) K.moving_average_update(self.ema_min, new_min, self.ema_decay) new_max = tf.squeeze(K.max(act_x, axis=axis, keepdims=True)) K.moving_average_update(self.ema_max, new_max, self.ema_decay) # Reset the qnoise factor to the previous value self.quantizer.update_qnoise_factor(prev_qnoise_factor) # Update the moving average when is_ema_training is True tf_utils.smart_cond( is_ema_training, true_fn=update_branch, false_fn=lambda: None) # Set the integer bits for the quantizer integer_bits = _get_integer_bits( min_value=self.ema_min, max_value=self.ema_max, bits=self.total_bits, symmetric=self.symmetric, keep_negative=self.keep_negative, is_clipping=self.po2_rounding) self.quantizer.integer.assign(integer_bits) return qx # Override get_weights since we do not want ema_min or ema_max to be public def get_weights(self): return [] # Override set_weights since we do not want ema_min or ema_max to be public def set_weights(self, weights): return def get_config(self): config = { "activation": self.quantizer.__class__.__name__, "total_bits": self.total_bits, "current_step": self.step.numpy(), "symmetric": self.symmetric, "quantization_delay": np.array(self.quantization_delay), "ema_freeze_delay": np.array(self.ema_freeze_delay), "ema_decay": np.array(self.ema_decay), "per_channel": self.per_channel, "po2_rounding": self.po2_rounding, "relu_neg_slope": self.relu_neg_slope } base_config = super(QAdaptiveActivation, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): self.quantizer.integer_bits = np.array(self.quantizer) return str(self.quantizer) def compute_output_shape(self, input_shape): return input_shape def get_prunable_weights(self): return [] # # Constraint class to clip weights and bias between -1 and 1 so that: # 1. quantization approximation is symmetric (b = 0). # 2. max(x) and min(x) are 1 and -1 respectively. # class Clip(Constraint): """Clips weight constraint.""" # This function was modified from Keras minmaxconstraints. # # Constrains the weights to be between min/max values. # min_value: the minimum norm for the incoming weights. # max_value: the maximum norm for the incoming weights. # constraint: previous constraint to be clipped. # quantizer: quantizer to be applied to constraint. def __init__(self, min_value=0.0, max_value=1.0, constraint=None, quantizer=None): """Initializes Clip constraint class.""" self.min_value = min_value self.max_value = max_value self.constraint = constraints.get(constraint) # Don't wrap yourself if isinstance(self.constraint, Clip): self.constraint = None self.quantizer = get_quantizer(quantizer) def __call__(self, w): """Clips values between min and max values.""" if self.constraint: w = self.constraint(w) if self.quantizer: w = self.quantizer(w) w = tf.keras.backend.clip(w, self.min_value, self.max_value) return w def get_config(self): """Returns configuration of constraint class.""" return {"min_value": self.min_value, "max_value": self.max_value} @classmethod def from_config(cls, config): if isinstance(config.get('constraint', None), Clip): config['constraint'] = None config['constraint'] = constraints.get(config.get('constraint', None)) config['quantizer'] = get_quantizer(config.get('quantizer', None)) return cls(**config) # # Definition of Quantized NN classes. These classes were copied # from the equivalent layers in Keras, and we modified to apply quantization. # Similar implementations can be seen in the references. # class QDense(Dense, PrunableLayer): """Implements a quantized Dense layer.""" # Most of these parameters follow the implementation of Dense in # Keras, with the exception of kernel_range, bias_range, # kernel_quantizer, bias_quantizer, and kernel_initializer. # # kernel_quantizer: quantizer function/class for kernel # bias_quantizer: quantizer function/class for bias # kernel_range/bias_ranger: for quantizer functions whose values # can go over [-1,+1], these values are used to set the clipping # value of kernels and biases, respectively, instead of using the # constraints specified by the user. # # we refer the reader to the documentation of Dense in Keras for the # other parameters. def __init__(self, units, activation=None, use_bias=True, kernel_initializer="he_normal", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, kernel_quantizer=None, bias_quantizer=None, kernel_range=None, bias_range=None, **kwargs): if kernel_range is not None: warnings.warn("kernel_range is deprecated in QDense layer.") if bias_range is not None: warnings.warn("bias_range is deprecated in QDense layer.") self.kernel_range = kernel_range self.bias_range = bias_range self.kernel_quantizer = kernel_quantizer self.bias_quantizer = bias_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.kernel_quantizer_internal, self.bias_quantizer_internal ] kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( units=units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs, ) def call(self, inputs): if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel output = tf.keras.backend.dot(inputs, quantized_kernel) if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias output = tf.keras.backend.bias_add(output, quantized_bias, data_format="channels_last") if self.activation is not None: output = self.activation(output) return output def compute_output_shape(self, input_shape): assert input_shape and len(input_shape) >= 2 assert input_shape[-1] output_shape = list(input_shape) output_shape[-1] = self.units return tuple(output_shape) def get_config(self): config = { "units": self.units, "activation": activations.serialize( self.activation# Google internal code, commented out by copybara ), "use_bias": self.use_bias, "kernel_quantizer": constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), "kernel_initializer": initializers.serialize( self.kernel_initializer# Google internal code, commented out by copybara ), "bias_initializer": initializers.serialize( self.bias_initializer# Google internal code, commented out by copybara ), "kernel_regularizer": regularizers.serialize( self.kernel_regularizer# Google internal code, commented out by copybara ), "bias_regularizer": regularizers.serialize( self.bias_regularizer# Google internal code, commented out by copybara ), "activity_regularizer": regularizers.serialize( self.activity_regularizer# Google internal code, commented out by copybara ), "kernel_constraint": constraints.serialize( self.kernel_constraint# Google internal code, commented out by copybara ), "bias_constraint": constraints.serialize( self.bias_constraint# Google internal code, commented out by copybara ), "kernel_range": self.kernel_range, "bias_range": self.bias_range, } base_config = super(QDense, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation), "units" : str(self.units) } def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.kernel] def get_constraint(identifier, quantizer): """Gets the initializer. Args: identifier: A constraint, which could be dict, string, or callable function. quantizer: A quantizer class or quantization function Returns: A constraint class """ if identifier: if isinstance(identifier, dict) and identifier['class_name'] == 'Clip': return Clip.from_config(identifier['config']) else: return constraints.get(identifier) else: max_value = max(1, quantizer.max()) if hasattr(quantizer, "max") else 1.0 return Clip(-max_value, max_value, identifier, quantizer) def get_initializer(identifier): """Gets the initializer. Args: identifier: An initializer, which could be dict, string, or callable function. Returns: A initializer class Raises: ValueError: An error occurred when quantizer cannot be interpreted. """ if identifier is None: return None if isinstance(identifier, dict): if identifier['class_name'] == 'QInitializer': return QInitializer.from_config(identifier['config']) else: return initializers.get(identifier) elif isinstance(identifier, six.string_types): return initializers.get(identifier) elif callable(identifier): return identifier else: raise ValueError("Could not interpret initializer identifier: " + str(identifier)) ================================================ FILE: qkeras/qmac.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.keras import constraints from .quantizers import get_quantizer from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer from .qlayers import get_auto_range_constraint_initializer # QKeras needs to support more layers for matrix multiplication and shift # operations such as in Transformer. Such layers should be all placed here. class QScaleShift(tf.keras.layers.Layer, PrunableLayer): """Quantized scale and shift layer. output = scale * x + bias where scale and bias are each of shape (1,). QScaleShift is similar to the special case in QDepthwiseConv2D where kernel_size=(1,1). However there are several differences: 1) There is no concept of padding and striding in QScaleShift since it's not a conv layer; 2) QDepthwiseConv2D expected min_ndim=4 for input shape; while QScaleShift input could be any shape; 3) In QDepthwiseConv2D each output channel has its own weight value; while QScaleShift share the same weight across the entire input tensor. 4) Since it's not a Conv operation, hardware implementation for QScaleShift and QDWConv2D is fundamentally different. Therefore it makes sense to separate them as two different types of layers. """ def __init__(self, weight_quantizer=None, bias_quantizer=None, use_bias=True, activation=None, weight_initializer="he_normal", weight_regularizer=None, bias_initializer="zeros", bias_regularizer=None, **kwargs): super().__init__() self.use_bias = use_bias self.weight_regularizer = weight_regularizer self.bias_regularizer = bias_regularizer self.weight_quantizer = weight_quantizer self.bias_quantizer = bias_quantizer self.weight_quantizer_internal = get_quantizer(self.weight_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) _, self.weight_initializer = ( get_auto_range_constraint_initializer( self.weight_quantizer_internal, None, weight_initializer)) _, self.bias_initializer = ( get_auto_range_constraint_initializer( self.bias_quantizer_internal, None, bias_initializer)) # optimize parameter set to "auto" scaling mode if possible if hasattr(self.weight_quantizer_internal, "_set_trainable_parameter"): self.weight_quantizer_internal._set_trainable_parameter() if hasattr(self.bias_quantizer_internal, "_set_trainable_parameter"): self.bias_quantizer_internal._set_trainable_parameter() self.quantizers = [self.weight_quantizer_internal, self.bias_quantizer_internal] self.activation = get_quantizer(activation) super().__init__(**kwargs) def build(self, input_shape): self.weight = self.add_weight( name="weight", shape=(1, 1), dtype="float32", initializer=self.weight_initializer, regularizer=self.weight_regularizer, trainable=True) if self.use_bias: self.bias = self.add_weight( name="bias", shape=(1, 1), dtype="float32", initializer=self.bias_initializer, regularizer=self.bias_regularizer, trainable=True) else: self.bias = None self.built = True def call(self, inputs): quantized_weight = ( self.weight_quantizer_internal(self.weight) if self.weight_quantizer_internal is not None else self.weight) outputs = tf.math.multiply(inputs, quantized_weight) if self.use_bias: quantized_bias = ( self.bias_quantizer_internal(self.bias) if self.bias_quantizer_internal is not None else self.bias) outputs = quantized_bias + outputs return self.activation(outputs) if self.activation is not None else outputs def get_config(self): config = { "weight_quantizer": constraints.serialize( self.weight_quantizer_internal# Google internal code, commented out by copybara ), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), "weight_initializer": constraints.serialize( self.weight_initializer# Google internal code, commented out by copybara ), "bias_initializer": constraints.serialize( self.bias_initializer# Google internal code, commented out by copybara ), "activation": constraints.serialize( self.activation# Google internal code, commented out by copybara ), "use_bias": self.use_bias, "weight_regularizer": constraints.serialize( self.weight_regularizer# Google internal code, commented out by copybara ), "bias_regularizer": constraints.serialize( self.bias_regularizer# Google internal code, commented out by copybara ), } base_config = super().get_config() base_config.update(config) return base_config def get_quantization_config(self): return { "weight_quantizer": str(self.weight_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "activation": str(self.activation) } def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [self.weight, self.bias] ================================================ FILE: qkeras/qmodel.proto ================================================ // Copyright 2019 Google LLC // // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================== syntax = "proto2"; package qkeras; import "google/protobuf/any.proto"; // Protobuf to represent a quantized machine learning model. message QModel { // Layers of a quantized model. repeated QLayer qlayers = 1; } // Protobuf to represent an individual layer that supports quantization. // // TODO(akshayap): Add platform agnostic way of saving weights, ideally // something that can mimic numpy arrays. message QLayer { // Layer name. optional string name = 1; // Input shape for the layer. repeated int32 input_shape = 2 [packed = true]; // Output shape for the layer. repeated int32 output_shape = 3 [packed = true]; // Quantization configuration for this layer. optional Quantization quantization = 4; // Harware parameters associated with this layer. optional HardwareParams hw_params = 5; // Model specific custom details. optional google.protobuf.Any details = 6; } // Qantization configurations for a model layer. message Quantization { // Number of bits to perform quantization. optional int32 bits = 1; // Number of bits to the left of the decimal point. optional int32 integer = 2; // The minimum allowed power of two exponent optional int32 min_po2 = 3; // The maximum allowed power of two exponent optional int32 max_po2 = 4; } // Parameters for hardware synthesis of machine learning models. message HardwareParams { // MAC bitwidth. optional int32 mac_bitwidth = 1; } ================================================ FILE: qkeras/qnormalization.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ============================================================================== """Definition of normalization quantization package.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six import warnings import tensorflow.compat.v2 as tf from tensorflow.keras import constraints from tensorflow.keras import initializers from tensorflow.keras import regularizers from tensorflow.keras.layers import BatchNormalization from tensorflow.python.framework import ops from tensorflow.python.framework import smart_cond as tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from .qlayers import Clip from .qlayers import get_auto_range_constraint_initializer from .qlayers import get_quantizer from .quantizers import quantized_relu_po2 from .quantizers import quantized_po2 from .safe_eval import safe_eval from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer class QBatchNormalization(BatchNormalization, PrunableLayer): """Quantized Batch Normalization layer. For training, mean and variance are not quantized. For inference, the quantized moving mean and moving variance are used. output = (x - mean) / sqrt(var + epsilon) * quantized_gamma + quantized_beta """ def __init__( self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, activation=None, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_quantizer='quantized_po2(5)', gamma_quantizer='quantized_relu_po2(6, 2048)', mean_quantizer='quantized_po2(5)', variance_quantizer='quantized_relu_po2(6, quadratic_approximation=True)', inverse_quantizer=None, gamma_constraint=None, beta_constraint=None, # use quantized_po2 and enforce quadratic approximation # to get an even exponent for sqrt beta_range=None, gamma_range=None, **kwargs): if gamma_range is not None: warnings.warn('gamma_range is deprecated in QBatchNormalization layer.') if beta_range is not None: warnings.warn('beta_range is deprecated in QBatchNormalization layer.') self.gamma_range = gamma_range self.beta_range = beta_range self.activation = activation self.beta_quantizer = beta_quantizer self.gamma_quantizer = gamma_quantizer self.mean_quantizer = mean_quantizer self.variance_quantizer = variance_quantizer self.inverse_quantizer = inverse_quantizer if self.inverse_quantizer is not None: assert self.variance_quantizer is None and self.gamma_quantizer is None, ( 'If using the inverse quantizer, the gamma and variance quantizers ' 'should not be used in order to avoid quantizing a value twice.') self.beta_quantizer_internal = get_quantizer(self.beta_quantizer) self.gamma_quantizer_internal = get_quantizer(self.gamma_quantizer) self.mean_quantizer_internal = get_quantizer(self.mean_quantizer) self.variance_quantizer_internal = get_quantizer(self.variance_quantizer) self.inverse_quantizer_internal = get_quantizer(self.inverse_quantizer) if hasattr(self.gamma_quantizer_internal, '_set_trainable_parameter'): self.gamma_quantizer_internal._set_trainable_parameter() if hasattr(self.variance_quantizer_internal, '_set_trainable_parameter'): self.variance_quantizer_internal._set_trainable_parameter() self.quantizers = [ self.gamma_quantizer_internal, self.beta_quantizer_internal, self.mean_quantizer_internal, self.variance_quantizer_internal, self.inverse_quantizer_internal ] if scale and self.gamma_quantizer: gamma_constraint, gamma_initializer = ( get_auto_range_constraint_initializer( self.gamma_quantizer_internal, gamma_constraint, gamma_initializer) ) if center and self.beta_quantizer: beta_constraint, beta_initializer = ( get_auto_range_constraint_initializer( self.beta_quantizer_internal, beta_constraint, beta_initializer) ) if kwargs.get('fused', None): warnings.warn('batch normalization fused is disabled ' 'in qkeras qnormalization.py.') del kwargs['fused'] if kwargs.get('renorm', None): warnings.warn('batch normalization renorm is disabled ' 'in qkeras qnormalization.py.') del kwargs['renorm'] if kwargs.get('virtual_batch_size', None): warnings.warn('batch normalization virtual_batch_size is disabled ' 'in qkeras qnormalization.py.') del kwargs['virtual_batch_size'] if kwargs.get('adjustment', None): warnings.warn('batch normalization adjustment is disabled ' 'in qkeras qnormalization.py.') del kwargs['adjustment'] super().__init__( axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, moving_mean_initializer=moving_mean_initializer, moving_variance_initializer=moving_variance_initializer, beta_regularizer=beta_regularizer, gamma_regularizer=gamma_regularizer, beta_constraint=beta_constraint, gamma_constraint=gamma_constraint, fused=False, renorm=False, virtual_batch_size=None, adjustment=None, **kwargs ) def call(self, inputs, training=None): if self.scale and self.gamma_quantizer: quantized_gamma = self.gamma_quantizer_internal(self.gamma) else: quantized_gamma = self.gamma if self.center and self.beta_quantizer: quantized_beta = self.beta_quantizer_internal(self.beta) else: quantized_beta = self.beta if self.mean_quantizer: quantized_moving_mean = self.mean_quantizer_internal(self.moving_mean) else: quantized_moving_mean = self.moving_mean if self.variance_quantizer: quantized_moving_variance = self.variance_quantizer_internal( self.moving_variance) else: quantized_moving_variance = self.moving_variance training = self._get_training_value(training) # Compute the axes along which to reduce the mean / variance input_shape = inputs.shape ndims = len(input_shape) reduction_axes = [i for i in range(ndims) if i not in self.axis] # Broadcasting only necessary for single-axis batch norm where the axis is # not the last dimension broadcast_shape = [1] * ndims broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value def _broadcast(v): if (v is not None and len(v.shape) != ndims and reduction_axes != list(range(ndims - 1))): return array_ops.reshape(v, broadcast_shape) return v scale, offset = _broadcast(quantized_gamma), _broadcast(quantized_beta) # Determine a boolean value for `training`: could be True, False, or None. training_value = tf_utils.smart_constant_value(training) if training_value == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison quantized_mean, quantized_variance = (quantized_moving_mean, quantized_moving_variance) else: # Some of the computations here are not necessary when training==False # but not a constant. However, this makes the code simpler. keep_dims = len(self.axis) > 1 mean, variance = self._moments( math_ops.cast(inputs, self._param_dtype), reduction_axes, keep_dims=keep_dims) moving_mean = self.moving_mean moving_variance = self.moving_variance mean = tf_utils.smart_cond( training, lambda: mean, lambda: ops.convert_to_tensor(moving_mean)) variance = tf_utils.smart_cond( training, lambda: variance, lambda: ops.convert_to_tensor(moving_variance)) new_mean, new_variance = mean, variance if self.mean_quantizer: quantized_mean = self.mean_quantizer_internal(mean) else: quantized_mean = mean if self.variance_quantizer: quantized_variance = self.variance_quantizer_internal(variance) else: quantized_variance = variance if self._support_zero_size_input(): inputs_size = array_ops.size(inputs) else: inputs_size = None def _do_update(var, value): """Compute the updates for mean and variance.""" return self._assign_moving_average(var, value, self.momentum, inputs_size) def mean_update(): true_branch = lambda: _do_update(self.moving_mean, new_mean) false_branch = lambda: self.moving_mean return tf_utils.smart_cond(training, true_branch, false_branch) def variance_update(): """Update the moving variance.""" true_branch = lambda: _do_update(self.moving_variance, new_variance) false_branch = lambda: self.moving_variance return tf_utils.smart_cond(training, true_branch, false_branch) self.add_update(mean_update) self.add_update(variance_update) quantized_mean = _broadcast(math_ops.cast(quantized_mean, inputs.dtype)) quantized_variance = _broadcast( math_ops.cast(quantized_variance, inputs.dtype)) if offset is not None: offset = math_ops.cast(offset, inputs.dtype) if scale is not None: scale = math_ops.cast(scale, inputs.dtype) # Calculate and quantize the inverse inv = math_ops.rsqrt(quantized_variance + self.epsilon) if scale is not None: inv *= scale if self.inverse_quantizer_internal is not None: inv = self.inverse_quantizer_internal(inv) # Calculate the forward pass of the BN outputs = inputs * math_ops.cast(inv, inputs.dtype) + math_ops.cast( offset - quantized_mean * inv if offset is not None else -quantized_mean * inv, inputs.dtype) # If some components of the shape got lost due to adjustments, fix that. outputs.set_shape(input_shape) return outputs def get_config(self): config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon, 'center': self.center, 'scale': self.scale, 'beta_quantizer': constraints.serialize( self.beta_quantizer_internal# Google internal code, commented out by copybara ), 'gamma_quantizer': constraints.serialize( self.gamma_quantizer_internal# Google internal code, commented out by copybara ), 'mean_quantizer': constraints.serialize( self.mean_quantizer_internal# Google internal code, commented out by copybara ), 'variance_quantizer': constraints.serialize( self.variance_quantizer_internal# Google internal code, commented out by copybara ), 'beta_initializer': initializers.serialize( self.beta_initializer# Google internal code, commented out by copybara ), 'gamma_initializer': initializers.serialize( self.gamma_initializer# Google internal code, commented out by copybara ), 'moving_mean_initializer': initializers.serialize( self.moving_mean_initializer# Google internal code, commented out by copybara ), 'moving_variance_initializer': initializers.serialize( self.moving_variance_initializer# Google internal code, commented out by copybara ), 'inverse_quantizer': initializers.serialize( self.inverse_quantizer_internal# Google internal code, commented out by copybara ), 'beta_regularizer': regularizers.serialize( self.beta_regularizer# Google internal code, commented out by copybara ), 'gamma_regularizer': regularizers.serialize( self.gamma_regularizer# Google internal code, commented out by copybara ), 'beta_constraint': constraints.serialize( self.beta_constraint# Google internal code, commented out by copybara ), 'gamma_constraint': constraints.serialize( self.gamma_constraint# Google internal code, commented out by copybara ), 'beta_range': self.beta_range, 'gamma_range': self.gamma_range, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape def get_quantizers(self): return self.quantizers def get_prunable_weights(self): return [] ================================================ FILE: qkeras/qoctave.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Octave Convolution.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import re from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Add from tensorflow.keras.layers import AveragePooling2D from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import SeparableConv2D from tensorflow.keras.layers import UpSampling2D from .qlayers import QActivation from .qconvolutional import QConv2D from .qconvolutional import QSeparableConv2D from .qpooling import QAveragePooling2D def GetActivationSuffix(activation): """Returns suffix for layer name to facilitate debugging.""" if not activation: return "linear" if "po2" in activation: return "q2" elif "quantized_relu" in activation: suffix = "qr" elif "quantized_tanh" in activation: suffix = "qt" else: suffix = "qb" numbers = re.findall(r"[0-9]+", activation) numbers = [n + "_" if len(n) > 1 else n for n in numbers] return suffix + "".join(numbers) def QOctaveConv2D( filters, kernel_size, alpha, strides=(1, 1), padding="valid", kernel_initializer="he_normal", bias_initializer="zeros", # NOTE: kernel_regularizer not used with separable convolution kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, use_separable=True, name="", **kwargs): """Implements quantized QOctaveConv2D.""" def _QOctaveConv2DInternal(x): """Computes QOctaveConv2D on a tensor.""" x_h, x_l = x bias_quantizer = kwargs.get("bias_quantizer", None) kernel_quantizer = kwargs.get("kernel_quantizer", None) depthwise_quantizer = kwargs.get("depthwise_quantizer", None) pointwise_quantizer = kwargs.get("pointwise_quantizer", None) acc_quantizer = kwargs.get("acc_quantizer", None) pooling_quantizer = kwargs.get("pooling_quantizer", None) depthwise_activation = kwargs.get("depthwise_activation", None) activation = kwargs.get("activation", None) bias_range = kwargs.get("bias_range", 1.0) kernel_range = kwargs.get("kernel_range", 1.0) depthwise_range = kwargs.get("depthwise_range", 1.0) pointwise_range = kwargs.get("pointwise_range", 1.0) if activation: act_suffix = "_" + GetActivationSuffix(activation) acc_suffix = "_" + GetActivationSuffix(acc_quantizer) if alpha == -1.0: if use_separable: x_h = QSeparableConv2D( filters, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, depthwise_quantizer=depthwise_quantizer, pointwise_quantizer=pointwise_quantizer, bias_quantizer=bias_quantizer, depthwise_activation=depthwise_activation, pointwise_range=pointwise_range, depthwise_range=depthwise_range, bias_range=bias_range, name=name + "_c_h_to_h")(x_h) else: x_h = QConv2D( filters, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, kernel_quantizer=kernel_quantizer, bias_quantizer=bias_quantizer, kernel_range=kernel_range, bias_range=bias_range, name=name + "_c_h_to_h")(x_h) if activation: x_h = QActivation( activation, name=name + "_c_h_to_h_act" + act_suffix)( x_h) return [x_h, None] co_h = int(filters * (1 - alpha)) co_l = filters - co_h x_h_to_h = None x_h_to_l = None x_l_to_l = None x_l_to_h = None if co_h > 0: if x_h is not None: if use_separable: x_h_to_h = QSeparableConv2D( co_h, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, depthwise_quantizer=depthwise_quantizer, pointwise_quantizer=pointwise_quantizer, bias_quantizer=bias_quantizer, depthwise_activation=depthwise_activation, pointwise_range=pointwise_range, depthwise_range=depthwise_range, bias_range=bias_range, name=name + "_c_h_to_h")(x_h) else: x_h_to_h = QConv2D( co_h, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, kernel_quantizer=kernel_quantizer, bias_quantizer=bias_quantizer, kernel_range=kernel_range, bias_range=bias_range, name=name + "_c_h_to_h")(x_h) if acc_quantizer: x_h_to_h = QActivation( acc_quantizer, name=name + "_c_h_to_h_act" + acc_suffix)(x_h_to_h) if co_l > 0: if x_h is not None: x_h_to_l = QAveragePooling2D( pool_size=2, strides=2, quantizer=pooling_quantizer, name=name + "_avg_h_to_l")(x_h) if use_separable: x_h_to_l = QSeparableConv2D( co_l, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, depthwise_quantizer=depthwise_quantizer, pointwise_quantizer=pointwise_quantizer, bias_quantizer=bias_quantizer, depthwise_activation=depthwise_activation, pointwise_range=pointwise_range, depthwise_range=depthwise_range, bias_range=bias_range, name=name + "_c_h_to_l")(x_h_to_l) else: x_h_to_l = QConv2D( co_l, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, kernel_quantizer=kernel_quantizer, bias_quantizer=bias_quantizer, kernel_range=kernel_range, bias_range=bias_range, name=name + "_c_h_to_l")(x_h_to_l) if acc_quantizer: x_h_to_l = QActivation( acc_quantizer, name=name + "_c_h_to_l_act" + acc_suffix)(x_h_to_l) if co_h > 0: if x_l is not None: _, height, width, _ = x_l.shape.as_list() if height == 1 and width == 1: local_kernel = 1 local_strides = 1 local_padding = "same" upsampling = False else: local_kernel = kernel_size local_strides = strides local_padding = padding upsampling = True if use_separable and upsampling: x_l_to_h = QSeparableConv2D( co_h, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, depthwise_quantizer=depthwise_quantizer, pointwise_quantizer=pointwise_quantizer, bias_quantizer=bias_quantizer, depthwise_activation=depthwise_activation, pointwise_range=pointwise_range, depthwise_range=depthwise_range, bias_range=bias_range, name=name + "_c_l_to_h")(x_l) else: x_l_to_h = QConv2D( co_h, local_kernel, strides=local_strides, padding=local_padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, kernel_quantizer=kernel_quantizer, bias_quantizer=bias_quantizer, kernel_range=kernel_range, bias_range=bias_range, name=name + "_c_l_to_h")(x_l) if acc_quantizer: x_l_to_h = QActivation( acc_quantizer, name=name + "_c_l_to_h_act" + acc_suffix)(x_l_to_h) if upsampling: x_l_to_h = UpSampling2D( size=(2, 2), name=name + "_u_l_to_h")(x_l_to_h) if co_l > 0: if x_l is not None: if use_separable: x_l_to_l = QSeparableConv2D( co_l, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, depthwise_quantizer=depthwise_quantizer, pointwise_quantizer=depthwise_quantizer, bias_quantizer=bias_quantizer, depthwise_activation=depthwise_activation, pointwise_range=pointwise_range, depthwise_range=depthwise_range, bias_range=bias_range, name=name + "_c_l_to_l")(x_l) else: x_l_to_l = QConv2D( co_l, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, kernel_quantizer=kernel_quantizer, bias_quantizer=bias_quantizer, kernel_range=kernel_range, bias_range=bias_range, name=name + "_c_l_to_l")(x_l) if acc_quantizer: x_l_to_l = QActivation( acc_quantizer, name=name + "_c_l_to_l_act" + acc_suffix)( x_l_to_l) if x_h_to_h is not None and x_l_to_h is not None: x_h = Add(name=name + "_a_h")([x_h_to_h, x_l_to_h]) elif x_h_to_h is not None: x_h = x_h_to_h elif x_l_to_h is not None: x_h = x_l_to_h else: x_h = None if x_l_to_l is not None and x_h_to_l is not None: x_l = Add(name=name + "_a_l")([x_l_to_l, x_h_to_l]) elif x_l_to_l is not None: x_l = x_l_to_l elif x_h_to_l is not None: x_l = x_h_to_l else: x_l = None if x_h is not None and activation is not None: x_h = QActivation(activation, name=name + "_h_act" + act_suffix)(x_h) if x_l is not None and activation is not None: x_l = QActivation(activation, name=name + "_l_act" + act_suffix)(x_l) return [x_h, x_l] return _QOctaveConv2DInternal def OctaveConv2D( filters, kernel_size, alpha, strides=(1, 1), padding="valid", kernel_initializer="he_normal", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, activation=None, use_separable=True, name="", **kwargs): """Implements OctaveConv2D.""" def _OctaveConv2DInternal(x): """Computes octave on tensor.""" acc_quantizer = kwargs.get("acc_quantizer", None) x_h, x_l = x if alpha == -1.0: if use_separable: x_h = SeparableConv2D( filters, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_h_to_h")(x_h) else: x_h = Conv2D( filters, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name+"_c_h_to_h")(x_h) if activation: x_h = Activation(activation, name=name + "_c_h_to_h_act")(x_h) return [x_h, None] co_h = int(filters * (1 - alpha)) co_l = filters - co_h x_h_to_h = None x_h_to_l = None x_l_to_l = None x_l_to_h = None if co_h > 0: if x_h is not None: if use_separable: x_h_to_h = SeparableConv2D( co_h, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_h_to_h")(x_h) else: x_h_to_h = Conv2D( co_h, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_h_to_h")(x_h) if activation: x_h_to_h = Activation( acc_quantizer, name=name + "_c_h_to_h_act")(x_h_to_h) if co_l > 0: if x_h is not None: x_h_to_l = AveragePooling2D(pool_size=2, strides=2, name=name + "_p_h_to_l")(x_h) if use_separable: x_h_to_l = SeparableConv2D( co_l, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_h_to_l")(x_h_to_l) else: x_h_to_l = Conv2D( co_l, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_h_to_l")(x_h_to_l) if activation: x_h_to_l = Activation( acc_quantizer, name=name + "_c_h_to_l_act")(x_h_to_l) if co_h > 0: if x_l is not None: _, height, width, _ = x_l.shape.as_list() if height == 1 and width == 1: local_kernel = 1 local_strides = 1 local_padding = "same" upsampling = False else: local_kernel = kernel_size local_strides = strides local_padding = padding upsampling = True if use_separable and upsampling: x_l_to_h = SeparableConv2D( co_h, kernel_size, strides=strides, padding=padding, depthwise_regularizer=kernel_regularizer, depthwise_constraint=kernel_constraint, depthwise_initializer=kernel_initializer, pointwise_regularizer=kernel_regularizer, pointwise_constraint=kernel_constraint, pointwise_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_l_to_h")(x_l) else: x_l_to_h = Conv2D( co_h, local_kernel, strides=local_strides, padding=local_padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_l_to_h")(x_l) if activation: x_l_to_h = Activation( acc_quantizer, name=name + "_c_l_to_h_act")(x_l_to_h) if upsampling: x_l_to_h = UpSampling2D( size=(2, 2), name=name + "_u_l_to_h")(x_l_to_h) if co_l > 0: if x_l is not None: if use_separable: x_l_to_l = SeparableConv2D( co_l, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_l_to_l")(x_l) else: x_l_to_l = Conv2D( co_l, kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, kernel_initializer=kernel_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, bias_initializer=bias_initializer, name=name + "_c_l_to_l")(x_l) if activation: x_l_to_l = Activation( acc_quantizer, name=name + "_c_l_to_l_act")(x_l_to_l) if x_h_to_h is not None and x_l_to_h is not None: x_h = Add(name=name + "_a_h")([x_h_to_h, x_l_to_h]) elif x_h_to_h is not None: x_h = x_h_to_h elif x_l_to_h is not None: x_h = x_l_to_h else: x_h = None if x_l_to_l is not None and x_h_to_l is not None: x_l = Add(name=name + "_a_l")([x_l_to_l, x_h_to_l]) elif x_l_to_l is not None: x_l = x_l_to_l elif x_h_to_l is not None: x_l = x_h_to_l else: x_l = None if x_h is not None: x_h = Activation(activation, name=name + "_h_act")(x_h) if x_l is not None: x_l = Activation(activation, name=name + "_l_act")(x_l) return (x_h, x_l) return _OctaveConv2DInternal ================================================ FILE: qkeras/qpooling.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import numpy as np from tensorflow.keras import constraints import tensorflow as tf import tensorflow.keras.backend as K from tensorflow.keras.layers import AveragePooling2D from tensorflow.keras.layers import GlobalAveragePooling2D from .qlayers import QActivation from .quantizers import get_quantizer class QAveragePooling2D(AveragePooling2D): """Computes the quantized version of AveragePooling2D.""" def __init__(self, pool_size=(2, 2), strides=None, padding="valid", data_format=None, average_quantizer=None, activation=None, **kwargs): self.average_quantizer = average_quantizer self.average_quantizer_internal = get_quantizer(self.average_quantizer) self.quantizers = [self.average_quantizer_internal] if activation is not None: self.activation = get_quantizer(activation) else: self.activation = activation super().__init__( pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, **kwargs ) def call(self, inputs): """Performs quantized AveragePooling followed by QActivation. Since there is no specific parameter for averaging op, we couldn't apply averaging quantizer to the averaging op. We have two options: 1. we perform our own average as sum first then multiply with the inversion of the division factor: sum(x) * quantize(1/pool_area) 2. first, we call keras version of averaging first: y1 = keras_average(x) then multiply it with pool_size^2: y2 = y1 * pool_area Last, y3 = y2 * quantize(1/ pool_area) 3. Improved based on #2, but multiply x with pool_area before averaging so that we don't lose precision during averaging. The order now becomes: first, multiply x with pool_area: y1 = x * pool_area then we call keras version of averaging: y2 = keras_average(y1) Last, y3 = y2 * quantize(1/ pool_area) 4. Since there is sum_pooling operation, another solution is to use depthwise_conv2d with kernel weights = 1 to get the pooling sum. In this case we don't lose precision due to averaging. However, this solution will introduce extra weights to the layer, which might break our code elsewhere. Since we need to match software and hardware inference numerics, we are now using #3 in the implementation. """ if self.average_quantizer: # Calculates the pool area if isinstance(self.pool_size, int): pool_area = self.pool_size * self.pool_size else: pool_area = np.prod(self.pool_size) # Calculates the pooling average of x*pool_area x = super(QAveragePooling2D, self).call(inputs*pool_area) # Quantizes the multiplication factor. mult_factor = 1.0 / pool_area q_mult_factor = self.average_quantizer_internal(mult_factor) q_mult_factor = K.cast_to_floatx(q_mult_factor) # Computes pooling average. x = x * q_mult_factor else: # Since no quantizer is available, we directly call the keras layer x = super(QAveragePooling2D, self).call(inputs) if self.activation is not None: return self.activation(x) return x def get_config(self): config = { "average_quantizer": constraints.serialize( self.average_quantizer_internal# Google internal code, commented out by copybara ), "activation": constraints.serialize( self.activation# Google internal code, commented out by copybara ), } base_config = super(QAveragePooling2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "average_quantizer": str(self.average_quantizer_internal), "activation": str(self.activation) } def get_quantizers(self): return self.quantizers class QGlobalAveragePooling2D(GlobalAveragePooling2D): """Computes the quantized version of GlobalAveragePooling2D.""" def __init__(self, data_format=None, average_quantizer=None, activation=None, **kwargs): self.average_quantizer = average_quantizer self.average_quantizer_internal = get_quantizer(self.average_quantizer) self.quantizers = [self.average_quantizer_internal] if activation is not None: self.activation = get_quantizer(activation) else: self.activation = activation super().__init__(data_format=data_format, **kwargs) def compute_pooling_area(self, input_shape): if not isinstance(input_shape, tuple): input_shape = input_shape.as_list() if self.data_format == "channels_last": return input_shape[1] * input_shape[2] else: return input_shape[2] * input_shape[3] def call(self, inputs): """Performs quantized GlobalAveragePooling followed by QActivation. Since there is no specific parameter for averaging op, we couldn't apply averaging quantizer to the averaging op. We have two options: 1. we perform our own average as sum first then multiply with the inversion of the division factor: sum(x) * quantize(1/pool_area) 2. first, we call keras version of averaging first: y1 = keras_global_average(x) then multiply it with the denominator(pool_area) used by averaging: y2 = y1 * pool_area Last, y3 = y2 * quantize(1/ pool_area) 3. we perform pooling sum, and then multiply the sum with the quantized inverse multiplication factor to get the average value. Our previous implementation uses option #2. Yet we observed minor numerical mismatch between software and hardware inference. Therefore we use #3 as the current implementation. """ if self.average_quantizer: # Calculates pooling sum. if self.data_format == "channels_last": x = K.sum(inputs, axis=[1, 2], keepdims=self.keepdims) else: x = K.sum(inputs, axis=[2, 3], keepdims=self.keepdims) # Calculates the pooling area pool_area = self.compute_pooling_area(input_shape=inputs.shape) # Quantizes the inverse multiplication factor mult_factor = 1.0 / pool_area q_mult_factor = self.average_quantizer_internal(mult_factor) # Derives average pooling value from pooling sum. x = x * q_mult_factor else: # If quantizer is not available, calls the keras layer. x = super(QGlobalAveragePooling2D, self).call(inputs) if self.activation is not None: return self.activation(x) return x def get_config(self): config = { "average_quantizer": constraints.serialize( self.average_quantizer_internal# Google internal code, commented out by copybara ), "activation": constraints.serialize( self.activation# Google internal code, commented out by copybara ), } base_config = super(QGlobalAveragePooling2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "average_quantizer": str(self.average_quantizer_internal), "activation": str(self.activation) } def get_quantizers(self): return self.quantizers ================================================ FILE: qkeras/qrecurrent.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Quantized Recurrent layers. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import warnings import tensorflow as tf from tensorflow.keras import activations from tensorflow.keras import constraints from tensorflow.keras import initializers from tensorflow.keras import regularizers from tensorflow.keras.layers import Activation from tensorflow.keras.layers import SimpleRNNCell from tensorflow.keras.layers import LSTMCell from tensorflow.keras.layers import GRUCell from tensorflow.keras.layers import RNN from tensorflow.keras.layers import Bidirectional from tensorflow.python.util import nest from tensorflow.python.ops import array_ops # from tensorflow.python.ops import array_ops from tensorflow.python.framework import ops from tensorflow_model_optimization.python.core.sparsity.keras.prunable_layer import PrunableLayer import tensorflow.keras.backend as K from .qlayers import get_auto_range_constraint_initializer from .qlayers import QActivation from .quantizers import get_quantized_initializer from .quantizers import get_quantizer class QSimpleRNNCell(SimpleRNNCell): """ Cell class for the QSimpleRNNCell layer. Most of these parameters follow the implementation of SimpleRNNCell in Keras, with the exception of kernel_quantizer, recurrent_quantizer, bias_quantizer, and state_quantizer. kernel_quantizer: quantizer function/class for kernel recurrent_quantizer: quantizer function/class for recurrent kernel bias_quantizer: quantizer function/class for bias state_quantizer: quantizer function/class for states We refer the reader to the documentation of SimpleRNNCell in Keras for the other parameters. """ def __init__(self, units, activation='quantized_tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, kernel_quantizer=None, recurrent_quantizer=None, bias_quantizer=None, state_quantizer=None, dropout=0., recurrent_dropout=0., **kwargs): self.kernel_quantizer = kernel_quantizer self.recurrent_quantizer = recurrent_quantizer self.bias_quantizer = bias_quantizer self.state_quantizer = state_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) self.state_quantizer_internal = get_quantizer(self.state_quantizer) self.quantizers = [ self.kernel_quantizer_internal, self.recurrent_quantizer_internal, self.bias_quantizer_internal, self.state_quantizer_internal ] if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() if hasattr(self.recurrent_quantizer_internal, "_set_trainable_parameter"): self.recurrent_quantizer_internal._set_trainable_parameter() kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) recurrent_constraint, recurrent_initializer = ( get_auto_range_constraint_initializer(self.recurrent_quantizer_internal, recurrent_constraint, recurrent_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) super().__init__( units=units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, **kwargs ) def call(self, inputs, states, training=None): prev_output = states[0] if nest.is_nested(states) else states dp_mask = self.get_dropout_mask_for_cell(inputs, training) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( prev_output, training) if self.state_quantizer: quantized_prev_output = self.state_quantizer_internal(prev_output) else: quantized_prev_output = prev_output if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel if dp_mask is not None: h = K.dot(inputs * dp_mask, quantized_kernel) else: h = K.dot(inputs, quantized_kernel) if self.bias is not None: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias h = K.bias_add(h, quantized_bias) if rec_dp_mask is not None: quantized_prev_output = quantized_prev_output * rec_dp_mask if self.recurrent_quantizer: quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel) else: quantized_recurrent = self.recurrent_kernel output = h + K.dot(quantized_prev_output, quantized_recurrent) if self.activation is not None: output = self.activation(output) return output, [output] def get_config(self): config = { 'kernel_quantizer': constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), 'recurrent_quantizer': constraints.serialize( self.recurrent_quantizer_internal# Google internal code, commented out by copybara ), 'bias_quantizer': constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), 'state_quantizer': constraints.serialize( self.state_quantizer_internal# Google internal code, commented out by copybara ), } base_config = super(QSimpleRNNCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class QSimpleRNN(RNN, PrunableLayer): """ Class for the QSimpleRNN layer. Most of these parameters follow the implementation of SimpleRNN in Keras, with the exception of kernel_quantizer, recurrent_quantizer, bias_quantizer and state_quantizer. kernel_quantizer: quantizer function/class for kernel recurrent_quantizer: quantizer function/class for recurrent kernel bias_quantizer: quantizer function/class for bias state_quantizer: quantizer function/class for states We refer the reader to the documentation of SimpleRNN in Keras for the other parameters. """ def __init__(self, units, activation='quantized_tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, kernel_quantizer=None, recurrent_quantizer=None, bias_quantizer=None, state_quantizer=None, dropout=0., recurrent_dropout=0., return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if 'enable_caching_device' in kwargs: cell_kwargs = {'enable_caching_device': kwargs.pop('enable_caching_device')} else: cell_kwargs = {} cell = QSimpleRNNCell( units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, kernel_quantizer=kernel_quantizer, recurrent_quantizer=recurrent_quantizer, bias_quantizer=bias_quantizer, state_quantizer=state_quantizer, dropout=dropout, recurrent_dropout=recurrent_dropout, dtype=kwargs.get('dtype'), trainable=kwargs.get('trainable', True), **cell_kwargs) super().__init__( cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs ) self.activity_regularizer = regularizers.get(activity_regularizer) self.input_spec = [tf.keras.layers.InputSpec(ndim=3)] def call(self, inputs, mask=None, training=None, initial_state=None): self._maybe_reset_cell_dropout_mask(self.cell) return super(QSimpleRNN, self).call( inputs, mask=mask, training=training, initial_state=initial_state) def get_quantizers(self): return self.cell.quantizers def get_prunable_weights(self): return [self.cell.kernel, self.cell.recurrent_kernel] @property def units(self): return self.cell.units @property def activation(self): return self.cell.activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def kernel_quantizer_internal(self): return self.cell.kernel_quantizer_internal @property def recurrent_quantizer_internal(self): return self.cell.recurrent_quantizer_internal @property def bias_quantizer_internal(self): return self.cell.bias_quantizer_internal @property def state_quantizer_internal(self): return self.cell.state_quantizer_internal @property def kernel_quantizer(self): return self.cell.kernel_quantizer @property def recurrent_quantizer(self): return self.cell.recurrent_quantizer @property def bias_quantizer(self): return self.cell.bias_quantizer @property def state_quantizer(self): return self.cell.state_quantizer @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout def get_config(self): config = { 'units': self.units, 'activation': activations.serialize( self.activation# Google internal code, commented out by copybara ), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize( self.kernel_initializer# Google internal code, commented out by copybara ), 'recurrent_initializer': initializers.serialize( self.recurrent_initializer# Google internal code, commented out by copybara ), 'bias_initializer': initializers.serialize( self.bias_initializer# Google internal code, commented out by copybara ), 'kernel_regularizer': regularizers.serialize( self.kernel_regularizer# Google internal code, commented out by copybara ), 'recurrent_regularizer': regularizers.serialize( self.recurrent_regularizer# Google internal code, commented out by copybara ), 'bias_regularizer': regularizers.serialize( self.bias_regularizer# Google internal code, commented out by copybara ), 'activity_regularizer': regularizers.serialize( self.activity_regularizer# Google internal code, commented out by copybara ), 'kernel_constraint': constraints.serialize( self.kernel_constraint# Google internal code, commented out by copybara ), 'recurrent_constraint': constraints.serialize( self.recurrent_constraint# Google internal code, commented out by copybara ), 'bias_constraint': constraints.serialize( self.bias_constraint# Google internal code, commented out by copybara ), 'kernel_quantizer': constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), 'recurrent_quantizer': constraints.serialize( self.recurrent_quantizer_internal# Google internal code, commented out by copybara ), 'bias_quantizer': constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), 'state_quantizer': constraints.serialize( self.state_quantizer_internal# Google internal code, commented out by copybara ), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, } base_config = super(QSimpleRNN, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "recurrent_quantizer": str(self.recurrent_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "state_quantizer": str(self.state_quantizer_internal), "activation": str(self.activation) } @classmethod def from_config(cls, config): if 'implementation' in config: config.pop('implementation') return cls(**config) class QLSTMCell(LSTMCell): """ Cell class for the QLSTMCell layer. Most of these parameters follow the implementation of LSTMCell in Keras, with the exception of kernel_quantizer, recurrent_quantizer, bias_quantizer, state_quantizer. kernel_quantizer: quantizer function/class for kernel recurrent_quantizer: quantizer function/class for recurrent kernel bias_quantizer: quantizer function/class for bias state_quantizer: quantizer function/class for states We refer the reader to the documentation of LSTMCell in Keras for the other parameters. """ def __init__(self, units, activation='quantized_tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, kernel_quantizer=None, recurrent_quantizer=None, bias_quantizer=None, state_quantizer=None, dropout=0., recurrent_dropout=0., implementation=1, **kwargs): self.kernel_quantizer = kernel_quantizer self.recurrent_quantizer = recurrent_quantizer self.bias_quantizer = bias_quantizer self.state_quantizer = state_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) self.state_quantizer_internal = get_quantizer(self.state_quantizer) self.quantizers = [ self.kernel_quantizer_internal, self.recurrent_quantizer_internal, self.bias_quantizer_internal, self.state_quantizer_internal, ] if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() if hasattr(self.recurrent_quantizer_internal, "_set_trainable_parameter"): self.recurrent_quantizer_internal._set_trainable_parameter() kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) recurrent_constraint, recurrent_initializer = ( get_auto_range_constraint_initializer(self.recurrent_quantizer_internal, recurrent_constraint, recurrent_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) if recurrent_activation is not None: recurrent_activation = get_quantizer(recurrent_activation) super().__init__( units=units, activation=activation, use_bias=use_bias, recurrent_activation=recurrent_activation, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, unit_forget_bias=True, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation, **kwargs ) def _compute_carry_and_output(self, x, h_tm1, c_tm1, quantized_recurrent): """Computes carry and output using split kernels.""" x_i, x_f, x_c, x_o = x h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1 i = self.recurrent_activation( x_i + K.dot(h_tm1_i, quantized_recurrent[:, :self.units])) f = self.recurrent_activation(x_f + K.dot( h_tm1_f, quantized_recurrent[:, self.units:self.units * 2])) c = f * c_tm1 + i * self.activation(x_c + K.dot( h_tm1_c, quantized_recurrent[:, self.units * 2:self.units * 3])) o = self.recurrent_activation( x_o + K.dot(h_tm1_o, quantized_recurrent[:, self.units * 3:])) return c, o def _compute_carry_and_output_fused(self, z, c_tm1): """Computes carry and output using fused kernels.""" z0, z1, z2, z3 = z i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) return c, o def call(self, inputs, states, training=None): h_tm1_tmp = states[0] # previous memory state c_tm1_tmp = states[1] # previous carry state if self.state_quantizer: c_tm1 = self.state_quantizer_internal(c_tm1_tmp) h_tm1 = self.state_quantizer_internal(h_tm1_tmp) else: c_tm1 = c_tm1_tmp h_tm1 = h_tm1_tmp dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( h_tm1, training, count=4) if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel if self.recurrent_quantizer: quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel) else: quantized_recurrent = self.recurrent_kernel if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs k_i, k_f, k_c, k_o = array_ops.split( quantized_kernel, num_or_size_splits=4, axis=1) x_i = K.dot(inputs_i, k_i) x_f = K.dot(inputs_f, k_f) x_c = K.dot(inputs_c, k_c) x_o = K.dot(inputs_o, k_o) if self.use_bias: b_i, b_f, b_c, b_o = array_ops.split( quantized_bias, num_or_size_splits=4, axis=0) x_i = K.bias_add(x_i, b_i) x_f = K.bias_add(x_f, b_f) x_c = K.bias_add(x_c, b_c) x_o = K.bias_add(x_o, b_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 x = (x_i, x_f, x_c, x_o) h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o) c, o = self._compute_carry_and_output(x, h_tm1, c_tm1, quantized_recurrent) else: if 0. < self.dropout < 1.: inputs = inputs * dp_mask[0] z = K.dot(inputs, quantized_kernel) z += K.dot(h_tm1, quantized_recurrent) if self.use_bias: z = K.bias_add(z, quantized_bias) z = array_ops.split(z, num_or_size_splits=4, axis=1) c, o = self._compute_carry_and_output_fused(z, c_tm1) h = o * self.activation(c) return h, [h, c] def get_config(self): config = { 'kernel_quantizer': constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), 'recurrent_quantizer': constraints.serialize( self.recurrent_quantizer_internal# Google internal code, commented out by copybara ), 'bias_quantizer': constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), 'state_quantizer': constraints.serialize( self.state_quantizer_internal# Google internal code, commented out by copybara ), } base_config = super(QLSTMCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class QLSTM(RNN, PrunableLayer): """ Class for the QLSTM layer. Most of these parameters follow the implementation of LSTM in Keras, with the exception of kernel_quantizer, recurrent_quantizer, bias_quantizer, state_quantizer. kernel_quantizer: quantizer function/class for kernel recurrent_quantizer: quantizer function/class for recurrent kernel bias_quantizer: quantizer function/class for bias state_quantizer: quantizer function/class for states We refer the reader to the documentation of LSTM in Keras for the other parameters. """ def __init__(self, units, activation='quantized_tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, kernel_quantizer=None, recurrent_quantizer=None, bias_quantizer=None, state_quantizer=None, dropout=0., recurrent_dropout=0., implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if implementation == 0: print('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') if 'enable_caching_device' in kwargs: cell_kwargs = {'enable_caching_device': kwargs.pop('enable_caching_device')} else: cell_kwargs = {} cell = QLSTMCell( units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, unit_forget_bias=unit_forget_bias, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, kernel_quantizer=kernel_quantizer, recurrent_quantizer=recurrent_quantizer, bias_quantizer=bias_quantizer, state_quantizer=state_quantizer, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation, dtype=kwargs.get('dtype'), trainable=kwargs.get('trainable', True), **cell_kwargs) super().__init__( cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs ) self.activity_regularizer = regularizers.get(activity_regularizer) self.input_spec = [tf.keras.layers.InputSpec(ndim=3)] def call(self, inputs, mask=None, training=None, initial_state=None): self._maybe_reset_cell_dropout_mask(self.cell) return super(QLSTM, self).call( inputs, mask=mask, training=training, initial_state=initial_state) def get_quantizers(self): return self.cell.quantizers def get_prunable_weights(self): return [self.cell.kernel, self.cell.recurrent_kernel] @property def units(self): return self.cell.units @property def activation(self): return self.cell.activation @property def recurrent_activation(self): return self.cell.recurrent_activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def unit_forget_bias(self): return self.cell.unit_forget_bias @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def kernel_quantizer_internal(self): return self.cell.kernel_quantizer_internal @property def recurrent_quantizer_internal(self): return self.cell.recurrent_quantizer_internal @property def bias_quantizer_internal(self): return self.cell.bias_quantizer_internal @property def state_quantizer_internal(self): return self.cell.state_quantizer_internal @property def kernel_quantizer(self): return self.cell.kernel_quantizer @property def recurrent_quantizer(self): return self.cell.recurrent_quantizer @property def bias_quantizer(self): return self.cell.bias_quantizer @property def state_quantizer(self): return self.cell.state_quantizer @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout @property def implementation(self): return self.cell.implementation def get_config(self): config = { 'units': self.units, 'activation': activations.serialize( self.activation# Google internal code, commented out by copybara ), 'recurrent_activation': activations.serialize( self.recurrent_activation# Google internal code, commented out by copybara ), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize( self.kernel_initializer# Google internal code, commented out by copybara ), 'recurrent_initializer': initializers.serialize( self.recurrent_initializer# Google internal code, commented out by copybara ), 'bias_initializer': initializers.serialize( self.bias_initializer# Google internal code, commented out by copybara ), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize( self.kernel_regularizer# Google internal code, commented out by copybara ), 'recurrent_regularizer': regularizers.serialize( self.recurrent_regularizer# Google internal code, commented out by copybara ), 'bias_regularizer': regularizers.serialize( self.bias_regularizer# Google internal code, commented out by copybara ), 'activity_regularizer': regularizers.serialize( self.activity_regularizer# Google internal code, commented out by copybara ), 'kernel_constraint': constraints.serialize( self.kernel_constraint# Google internal code, commented out by copybara ), 'recurrent_constraint': constraints.serialize( self.recurrent_constraint# Google internal code, commented out by copybara ), 'bias_constraint': constraints.serialize( self.bias_constraint# Google internal code, commented out by copybara ), 'kernel_quantizer': constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), 'recurrent_quantizer': constraints.serialize( self.recurrent_quantizer_internal# Google internal code, commented out by copybara ), 'bias_quantizer': constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), 'state_quantizer': constraints.serialize( self.state_quantizer_internal# Google internal code, commented out by copybara ), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation, } base_config = super(QLSTM, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "recurrent_quantizer": str(self.recurrent_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "state_quantizer": str(self.state_quantizer_internal), "activation": str(self.activation), "recurrent_activation": str(self.recurrent_activation), } @classmethod def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) class QGRUCell(GRUCell): """ Cell class for the QGRUCell layer. Most of these parameters follow the implementation of GRUCell in Keras, with the exception of kernel_quantizer, recurrent_quantizer, bias_quantizer and state_quantizer. kernel_quantizer: quantizer function/class for kernel recurrent_quantizer: quantizer function/class for recurrent kernel bias_quantizer: quantizer function/class for bias state_quantizer: quantizer function/class for states We refer the reader to the documentation of GRUCell in Keras for the other parameters. """ def __init__(self, units, activation='quantized_tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, kernel_quantizer=None, recurrent_quantizer=None, bias_quantizer=None, state_quantizer=None, dropout=0., recurrent_dropout=0., implementation=1, reset_after=False, **kwargs): self.kernel_quantizer = kernel_quantizer self.recurrent_quantizer = recurrent_quantizer self.bias_quantizer = bias_quantizer self.state_quantizer = state_quantizer self.kernel_quantizer_internal = get_quantizer(self.kernel_quantizer) self.recurrent_quantizer_internal = get_quantizer(self.recurrent_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) self.state_quantizer_internal = get_quantizer(self.state_quantizer) self.quantizers = [ self.kernel_quantizer_internal, self.recurrent_quantizer_internal, self.bias_quantizer_internal, self.state_quantizer_internal ] if hasattr(self.kernel_quantizer_internal, "_set_trainable_parameter"): self.kernel_quantizer_internal._set_trainable_parameter() if hasattr(self.recurrent_quantizer_internal, "_set_trainable_parameter"): self.recurrent_quantizer_internal._set_trainable_parameter() kernel_constraint, kernel_initializer = ( get_auto_range_constraint_initializer(self.kernel_quantizer_internal, kernel_constraint, kernel_initializer)) recurrent_constraint, recurrent_initializer = ( get_auto_range_constraint_initializer(self.recurrent_quantizer_internal, recurrent_constraint, recurrent_initializer)) if use_bias: bias_constraint, bias_initializer = ( get_auto_range_constraint_initializer(self.bias_quantizer_internal, bias_constraint, bias_initializer)) if activation is not None: activation = get_quantizer(activation) if recurrent_activation is not None: recurrent_activation = get_quantizer(recurrent_activation) super().__init__( units=units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation, reset_after=reset_after, **kwargs ) def call(self, inputs, states, training=None): # previous memory h_tm1_tmp = states[0] if nest.is_nested(states) else states dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( h_tm1_tmp, training, count=3) if self.state_quantizer: h_tm1 = self.state_quantizer_internal(h_tm1_tmp) else: h_tm1 = h_tm1_tmp if self.kernel_quantizer: quantized_kernel = self.kernel_quantizer_internal(self.kernel) else: quantized_kernel = self.kernel if self.recurrent_quantizer: quantized_recurrent = self.recurrent_quantizer_internal(self.recurrent_kernel) else: quantized_recurrent = self.kernel if self.use_bias: if self.bias_quantizer: quantized_bias = self.bias_quantizer_internal(self.bias) else: quantized_bias = self.bias if not self.reset_after: input_bias, recurrent_bias = quantized_bias, None else: input_bias, recurrent_bias = array_ops.unstack(quantized_bias) if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.dot(inputs_z, quantized_kernel[:, :self.units]) x_r = K.dot(inputs_r, quantized_kernel[:, self.units:self.units * 2]) x_h = K.dot(inputs_h, quantized_kernel[:, self.units * 2:]) if self.use_bias: x_z = K.bias_add(x_z, input_bias[:self.units]) x_r = K.bias_add(x_r, input_bias[self.units: self.units * 2]) x_h = K.bias_add(x_h, input_bias[self.units * 2:]) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, quantized_recurrent[:, :self.units]) recurrent_r = K.dot(h_tm1_r, quantized_recurrent[:, self.units:self.units * 2]) if self.reset_after and self.use_bias: recurrent_z = K.bias_add(recurrent_z, recurrent_bias[:self.units]) recurrent_r = K.bias_add(recurrent_r, recurrent_bias[self.units:self.units * 2]) z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) # reset gate applied after/before matrix multiplication if self.reset_after: recurrent_h = K.dot(h_tm1_h, quantized_recurrent[:, self.units * 2:]) if self.use_bias: recurrent_h = K.bias_add(recurrent_h, recurrent_bias[self.units * 2:]) recurrent_h = r * recurrent_h else: recurrent_h = K.dot(r * h_tm1_h, quantized_recurrent[:, self.units * 2:]) hh = self.activation(x_h + recurrent_h) else: if 0. < self.dropout < 1.: inputs = inputs * dp_mask[0] # inputs projected by all gate matrices at once matrix_x = K.dot(inputs, quantized_kernel) if self.use_bias: # biases: bias_z_i, bias_r_i, bias_h_i matrix_x = K.bias_add(matrix_x, input_bias) x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=-1) if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, quantized_recurrent) if self.use_bias: matrix_inner = K.bias_add(matrix_inner, recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner = K.dot(h_tm1, quantized_recurrent[:, :2 * self.units]) recurrent_z, recurrent_r, recurrent_h = array_ops.split( matrix_inner, [self.units, self.units, -1], axis=-1) z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) if self.reset_after: recurrent_h = r * recurrent_h else: recurrent_h = K.dot(r * h_tm1, quantized_recurrent[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h] def get_config(self): config = { 'kernel_quantizer': constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), 'recurrent_quantizer': constraints.serialize( self.recurrent_quantizer_internal# Google internal code, commented out by copybara ), 'bias_quantizer': constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), 'state_quantizer': constraints.serialize( self.state_quantizer_internal# Google internal code, commented out by copybara ), } base_config = super(QGRUCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class QGRU(RNN, PrunableLayer): """ Class for the QGRU layer. Most of these parameters follow the implementation of GRU in Keras, with the exception of kernel_quantizer, recurrent_quantizer, bias_quantizer and state_quantizer. kernel_quantizer: quantizer function/class for kernel recurrent_quantizer: quantizer function/class for recurrent kernel bias_quantizer: quantizer function/class for bias state_quantizer: quantizer function/class for states We refer the reader to the documentation of GRU in Keras for the other parameters. """ def __init__(self, units, activation='quantized_tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, kernel_quantizer=None, recurrent_quantizer=None, bias_quantizer=None, state_quantizer=None, dropout=0., recurrent_dropout=0., implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, **kwargs): if implementation == 0: print('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') if 'enable_caching_device' in kwargs: cell_kwargs = {'enable_caching_device': kwargs.pop('enable_caching_device')} else: cell_kwargs = {} cell = QGRUCell( units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, kernel_quantizer=kernel_quantizer, recurrent_quantizer=recurrent_quantizer, bias_quantizer=bias_quantizer, state_quantizer=state_quantizer, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation, reset_after=reset_after, dtype=kwargs.get('dtype'), trainable=kwargs.get('trainable', True), **cell_kwargs) super().__init__( cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs ) self.activity_regularizer = regularizers.get(activity_regularizer) self.input_spec = [tf.keras.layers.InputSpec(ndim=3)] def call(self, inputs, mask=None, training=None, initial_state=None): self._maybe_reset_cell_dropout_mask(self.cell) return super(QGRU, self).call( inputs, mask=mask, training=training, initial_state=initial_state) def get_quantizers(self): return self.cell.quantizers def get_prunable_weights(self): return [self.cell.kernel, self.cell.recurrent_kernel] @property def units(self): return self.cell.units @property def activation(self): return self.cell.activation @property def recurrent_activation(self): return self.cell.recurrent_activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def kernel_quantizer_internal(self): return self.cell.kernel_quantizer_internal @property def recurrent_quantizer_internal(self): return self.cell.recurrent_quantizer_internal @property def bias_quantizer_internal(self): return self.cell.bias_quantizer_internal @property def state_quantizer_internal(self): return self.cell.state_quantizer_internal @property def kernel_quantizer(self): return self.cell.kernel_quantizer @property def recurrent_quantizer(self): return self.cell.recurrent_quantizer @property def bias_quantizer(self): return self.cell.bias_quantizer @property def state_quantizer(self): return self.cell.state_quantizer @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout @property def implementation(self): return self.cell.implementation @property def reset_after(self): return self.cell.reset_after def get_config(self): config = { 'units': self.units, 'activation': activations.serialize( self.activation# Google internal code, commented out by copybara ), 'recurrent_activation': activations.serialize( self.recurrent_activation# Google internal code, commented out by copybara ), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize( self.kernel_initializer# Google internal code, commented out by copybara ), 'recurrent_initializer': initializers.serialize( self.recurrent_initializer# Google internal code, commented out by copybara ), 'bias_initializer': initializers.serialize( self.bias_initializer# Google internal code, commented out by copybara ), 'kernel_regularizer': regularizers.serialize( self.kernel_regularizer# Google internal code, commented out by copybara ), 'recurrent_regularizer': regularizers.serialize( self.recurrent_regularizer# Google internal code, commented out by copybara ), 'bias_regularizer': regularizers.serialize( self.bias_regularizer# Google internal code, commented out by copybara ), 'activity_regularizer': regularizers.serialize( self.activity_regularizer# Google internal code, commented out by copybara ), 'kernel_constraint': constraints.serialize( self.kernel_constraint# Google internal code, commented out by copybara ), 'recurrent_constraint': constraints.serialize( self.recurrent_constraint# Google internal code, commented out by copybara ), 'bias_constraint': constraints.serialize( self.bias_constraint# Google internal code, commented out by copybara ), 'kernel_quantizer': constraints.serialize( self.kernel_quantizer_internal# Google internal code, commented out by copybara ), 'recurrent_quantizer': constraints.serialize( self.recurrent_quantizer_internal# Google internal code, commented out by copybara ), 'bias_quantizer': constraints.serialize( self.bias_quantizer_internal# Google internal code, commented out by copybara ), 'state_quantizer': constraints.serialize( self.state_quantizer_internal# Google internal code, commented out by copybara ), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation, 'reset_after': self.reset_after, } base_config = super(QGRU, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) def get_quantization_config(self): return { "kernel_quantizer": str(self.kernel_quantizer_internal), "recurrent_quantizer": str(self.recurrent_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), "state_quantizer": str(self.state_quantizer_internal), "activation": str(self.activation), "recurrent_activation": str(self.recurrent_activation), } @classmethod def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) class QBidirectional(Bidirectional): """ Class for the QBidirecitonal wrapper. Most of these parameters follow the implementation of Bidirectional in Keras. We refer the reader to the documentation of Bidirectional in Keras for the other parameters. """ def get_quantizers(self): """ Returns quantizers in the order they were created. """ return self.forward_layer.get_quantizers() + self.backward_layer.get_quantizers() @property def activation(self): return self.layer.activation def get_quantization_config(self): return { "layer" : self.layer.get_quantization_config(), "backward_layer" : self.backward_layer.get_quantization_config() } ================================================ FILE: qkeras/qseparable_conv2d_transpose.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import tensorflow as tf from tensorflow.keras.layers import Conv2DTranspose from tensorflow.keras.layers import InputSpec from .qconvolutional import deconv_output_length from .quantizers import get_quantizer from tensorflow.python.eager import context from tensorflow.python.keras import constraints from tensorflow.python.ops import array_ops from tensorflow.python.ops import array_ops class QSeparableConv2DTranspose(Conv2DTranspose): """Quantized Separable Conv2DTranspose layer.""" # Most of these parameters follow the implementation of Conv2DTranspose # in Keras, with the exception of following parameters. # # depthwise_activation: activation quantizer for depthwise convolution # pointwise_activation: activation quantizer for pointwise convolution # depthwise_kernel_quantizer: quantizer function/class for depthwise kernel # pointwise_kernel_quantizers: quantizer function/class for pointwise kernel # bias_quantizer: quantizer function/class for bias # # we refer the reader to the documentation of Conv2DTranspose in Keras for # the other parameters. def __init__(self, filters, kernel_size, strides=(1, 1), padding="valid", output_padding=None, depth_multiplier=1, depthwise_activation=None, pointwise_activation=None, use_bias=True, depthwise_kernel_quantizer=None, pointwise_kernel_quantizer=None, bias_quantizer=None, **kwargs): self.filters = filters self.kernel_size = kernel_size self.strides = strides self.padding = padding self.output_padding = output_padding self.depth_multiplier = depth_multiplier self.depthwise_activation = depthwise_activation self.pointwise_activation = pointwise_activation self.use_bias = use_bias self.depthwise_kernel_quantizer = depthwise_kernel_quantizer self.pointwise_kernel_quantizer = pointwise_kernel_quantizer self.bias_quantizer = bias_quantizer self.depthwise_kernel_quantizer_internal = get_quantizer( self.depthwise_kernel_quantizer) self.pointwise_kernel_quantizer_internal = get_quantizer( self.pointwise_kernel_quantizer) self.bias_quantizer_internal = get_quantizer(self.bias_quantizer) # optimize parameter set to "auto" scaling mode if possible for q in [self.depthwise_kernel_quantizer_internal, self.pointwise_kernel_quantizer_internal]: if hasattr(q, "_set_trainable_parameter"): q._set_trainable_parameter() if depthwise_activation is not None: self.depthwise_activation = get_quantizer(depthwise_activation) if pointwise_activation is not None: self.pointwise_activation = get_quantizer(pointwise_activation) super().__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=use_bias, **kwargs) def _get_input_axis(self): if self.data_format == "channels_first": b_axis, c_axis, h_axis, w_axis = 0, 1, 2, 3 else: b_axis, c_axis, h_axis, w_axis = 0, 3, 1, 2 return b_axis, c_axis, h_axis, w_axis def _get_input_dims(self, input_shape): b_axis, c_axis, h_axis, w_axis = self._get_input_axis() return ( input_shape[b_axis], input_shape[c_axis], input_shape[h_axis], input_shape[w_axis]) def _get_output_size(self, inputs, output_padding, padding, strides, dilation_rate, kernel_weights): input_shape = array_ops.shape(inputs) batch_size, _, height, width = self._get_input_dims(input_shape) kernel_h, kernel_w = kernel_weights.shape[:2] stride_h, stride_w = strides dilation_h, dilation_w = dilation_rate[0], dilation_rate[1] if output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = output_padding # Infer the dynamic output shape: out_height = deconv_output_length( height, kernel_h, padding=padding, output_padding=out_pad_h, stride=stride_h, dilation=dilation_h, ) out_width = deconv_output_length( width, kernel_w, padding=padding, output_padding=out_pad_w, stride=stride_w, dilation=dilation_w, ) return (batch_size, out_height, out_width, kernel_h, kernel_w) def build(self, input_shape): self._input_shape = input_shape _, input_channel, _, _ = self._get_input_dims(input_shape) channel_axis = self._get_input_axis()[1] self.input_spec = InputSpec( min_ndim=self.rank + 2, axes={channel_axis: input_channel} ) # By enforcing the kernel shape, we can control how convolution is # done in depthwise or pointwise. # When setting kernel shape=(kw, kh, 1, input_channel), it does depthwise # convolution. depthwise_kernel_shape = self.kernel_size + (1, input_channel) self.depthwise_kernel = self.add_weight( name="depthwise_kernel", shape=depthwise_kernel_shape, initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, trainable=True, dtype=self.dtype, ) # When setting kernel shape=(1, 1, output_channel, input_channel), it does # pointwise convolution. pointwise_kernel_shape = (1, 1, self.filters, input_channel) self.pointwise_kernel = self.add_weight( name="pointwise_kernel", shape=pointwise_kernel_shape, initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, trainable=True, dtype=self.dtype, ) if self.use_bias: # This bias term is usally add at the end of the pointwise convolution. self.bias = self.add_weight( name="bias", shape=(self.filters,), initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, trainable=True, dtype=self.dtype, ) else: self.bias = None self.built = True def compute_final_output_shape( self, input_shape, kernel_size, strides, is_depthwise=True): input_shape = tf.TensorShape(input_shape).as_list() # By using list(), output_shape is a copy of input_shape, instead of a # reference to input_shape. output_shape = list(input_shape) _, c_axis, h_axis, w_axis = self._get_input_axis() kernel_h, kernel_w = kernel_size stride_h, stride_w = strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding if is_depthwise: # Convolution is performed separately on each spatial domain. output_shape[c_axis] = input_shape[c_axis] else: # Pointwise convolution maps input channels to output filters. output_shape[c_axis] = self.filters output_shape[h_axis] = deconv_output_length( output_shape[h_axis], kernel_h, padding=self.padding, output_padding=out_pad_h, stride=stride_h, dilation=self.dilation_rate[0], ) output_shape[w_axis] = deconv_output_length( output_shape[w_axis], kernel_w, padding=self.padding, output_padding=out_pad_w, stride=stride_w, dilation=self.dilation_rate[1], ) return tf.TensorShape(output_shape) def conv_transpose_op(self, inputs, filters, strides, padding, output_padding, dilation_rate, kernel_quantizer, kernel_weights, use_bias, bias_quantizer, bias, activation, is_depthwise): """Transpose convolution op that shared by both depthwise and pointwise.""" batch_size, out_height, out_width, kernel_h, kernel_w = ( self._get_output_size(inputs, output_padding, padding, strides, dilation_rate, kernel_weights)) if kernel_quantizer: quantized_kernel = kernel_quantizer(kernel_weights) else: quantized_kernel = kernel_weights output_filters = 1 if is_depthwise else filters if self.data_format == "channels_first": output_shape = (batch_size, output_filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, output_filters) output_shape_tensor = array_ops.stack(output_shape) # Split the input channels into groups. x = tf.split(inputs, self._input_shape[-1], axis=-1) if is_depthwise: # For depthwise convolution, since CPU doesn't support grouped # convolution, we run convolution on each slice of inputs and concat # the results. outputs = [ tf.keras.backend.conv2d_transpose( x=x[i], kernel=quantized_kernel[:, :, :, i : i + 1], output_shape=output_shape_tensor, strides=strides, padding=padding, data_format=self.data_format, dilation_rate=dilation_rate, ) for i in range(len(x)) ] # Concat the channels. outputs = tf.concat(outputs, axis=-1) else: outputs = tf.keras.backend.conv2d_transpose( inputs, quantized_kernel, output_shape_tensor, strides=strides, padding=padding, data_format=self.data_format, dilation_rate=dilation_rate, ) if not context.executing_eagerly(): # Infer the static output shape: out_shape = self.compute_final_output_shape( input_shape=inputs.shape, kernel_size=(kernel_h, kernel_w), strides=strides, is_depthwise=is_depthwise) outputs.set_shape(out_shape) if use_bias: quantized_bias = bias_quantizer(bias) if bias_quantizer else bias outputs = tf.keras.backend.bias_add( outputs, quantized_bias, data_format=self.data_format) if activation is not None: return activation(outputs) return outputs def call(self, inputs): input_shape = array_ops.shape(inputs) _, input_channel, _, _ = self._get_input_dims(input_shape) # First apply depthwise transposed convolution. x = self.conv_transpose_op( inputs=inputs, # Depthwise convolution doesn't operate across channels. Thereofore its # output channels is the same as input channels. filters=input_channel, strides=self.strides, padding=self.padding, output_padding=self.output_padding, dilation_rate=self.dilation_rate, kernel_quantizer=self.depthwise_kernel_quantizer_internal, kernel_weights=self.depthwise_kernel, use_bias=False, # Usually set bias=False for depthwise conv. bias_quantizer=None, bias=None, activation=self.depthwise_activation, is_depthwise=True) # Then apply pointwise transposed convolution x = self.conv_transpose_op( inputs=x, # Pointwise convolution maps input channels to output filters. filters=self.filters, strides=(1, 1), # strides is set to (1, 1) for pointwise conv. # Though it will not applied in pointwise conv, we need to set # padding here to pass value checking in keras utility functions. padding=self.padding, output_padding=None, # Prevent output_padding from adding twice. dilation_rate=self.dilation_rate, kernel_quantizer=self.pointwise_kernel_quantizer_internal, kernel_weights=self.pointwise_kernel, use_bias=self.use_bias, bias_quantizer=self.bias_quantizer_internal, bias=self.bias, activation=self.pointwise_activation, is_depthwise=False) return x def get_config(self): config = super().get_config() config.update({ "filters": self.filters, "kernel_size": self.kernel_size, "strides": self.strides, "padding": self.padding, "output_padding": self.output_padding, "dilation_rate": self.dilation_rate, "data_format": self.data_format, "depth_multiplier": self.depth_multiplier, "activation": self.activation, "use_bias": self.use_bias, "depthwise_kernel_quantizer": constraints.serialize( self.depthwise_kernel_quantizer_internal), "pointwise_kernel_quantizer": constraints.serialize( self.pointwise_kernel_quantizer_internal), "bias_quantizer": constraints.serialize( self.bias_quantizer_internal, ), }) return config def get_quantizers(self): return [ self.depthwise_kernel_quantizer_internal, self.pointwise_kernel_quantizer_internal, self.bias_quantizer_internal, self.depthwise_activation, self.pointwise_activation, ] def get_prunable_weights(self): w = [self.depthwise_kernel, self.pointwise_kernel] if self.use_bias: w.append(self.bias) return w ================================================ FILE: qkeras/qtools/DnC/divide_and_conquer.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """divide_and_conquer hardware cost profiling. Given a target throughput and a ML model, this implementation determines the key HW design parameters (bitwidth, unroll factors) for ML area optimization in a pipelined architecture. It generates recommended design parameters to assist downstream HW synthesis design. With this, it provides accurate HW cost modeling for ML training and ML complexity evaluation such as AV2/ROOF_ML. """ import enum import logging from typing import Any, List, Union import numpy as np import tensorflow as tf from qkeras import base_quantizer from qkeras import quantizers from qkeras.qtools import generate_layer_data_type_map from qkeras.qtools import qgraph from qkeras.qtools import qtools_util from qkeras.qtools.DnC import dnc_layer_cost_ace class CostMode(enum.Enum): ACE = 1 # cost is computed from theoretical equations. PE_AREA = 2 # cost is computed from compute area only. PE_BW_AREA = 3 # cost is computed from both compute and memory bandwidth. # pylint: disable=invalid-name class DivideConquerGraph: """This class creates model graph structure and methods to access layers.""" def __init__( self, model: tf.keras.Model, source_quantizers: base_quantizer.BaseQuantizer = None, ): self._model = model self._source_quantizer_list = source_quantizers or [ quantizers.quantized_bits(8, 0, 1)] (self._graph, self._source_quantizer_list) = qgraph.CreateGraph( model, source_quantizers, "quantized_bits(8, 0, 1)") # Propagate output quantizer info into the graph edges. qgraph.GraphPropagateActivationsToEdges(self._graph) self._layer_map = generate_layer_data_type_map.generate_layer_data_type_map( self._graph, self._source_quantizer_list, is_inference=False, keras_accumulator=None, for_reference=False)["layer_data_type_map"] # Create layer-to-index mapping dict. self._layer_to_idx_dict = {} for idx in self._graph._node.keys(): self._layer_to_idx_dict[self.idx_to_layer(idx)] = idx def idx_to_layer(self, idx: int): # Map layer index to the layer object. return self._graph._node[idx]["layer"][0] def layer_to_idx(self, layer: tf.keras.layers.Layer): # Map a layer object to index. return self._layer_to_idx_dict.get(layer, None) def get_first_node(self): # Get the source node of the graph. return qgraph.SOURCE def is_first_node(self, node: Union[int, tf.keras.layers.Layer]): # Find whether a given node is the first node of the graph. # Node could be either index value or layer object. idx = node if isinstance(node, int) else self.layer_to_idx(node) return idx == qgraph.SOURCE def get_last_node(self): # Find the last node of the graph. return qgraph.SINK def is_last_node(self, node: Union[int, tf.keras.layers.Layer]): # Find whether a given node is the last node of the graph. # Node could be either index value or layer object. idx = node if isinstance(node, int) else self.layer_to_idx(node) return idx == qgraph.SINK def get_prev_nodes(self, node: Union[int, tf.keras.layers.Layer]): # Find the predecessor nodes in the graph of the given node. # Node could be either index value or layer object. idx = node if isinstance(node, int) else self.layer_to_idx(node) return list(self._graph.predecessors(idx)) def get_next_nodes(self, node: Union[int, tf.keras.layers.Layer]): # Find the successor nodes in the graph of the given node. # node could be either index value or layer object. idx = node if isinstance(node, int) else self.layer_to_idx(node) return list(self._graph.successors(idx)) def get_layer_quantizer_bitwidth( self, node: Union[int, tf.keras.layers.Layer]): """Find various quantizer bitwidth of the current layer.""" layer = self.idx_to_layer(node) if isinstance(node, int) else node if layer: layer_item = self._layer_map[layer] weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer") mac_quantizer = qtools_util.get_val(layer_item, "multiplier") acc_quantizer = qtools_util.get_val(layer_item, "accumulator") input_quantizer_list = qtools_util.get_val( layer_item, "input_quantizer_list") output_quantizer = qtools_util.get_val(layer_item, "output_quantizer") return { # TODO(lishanok@): Handle multiple input quantizers # in non-sequential models. "input_bits": input_quantizer_list[0].bits, # When the current layer has no concept of weight, there won't # be any weight quantizer. "weight_bits": weight_quantizer.bits if weight_quantizer else 0, # If mac bits don't exist, that means we don't have x * w type of # operations. In this case, pass input_bits through. "mac_bits": ( mac_quantizer.output.bits if mac_quantizer else input_quantizer_list[0].bits), "acc_bits": ( acc_quantizer.output.bits if acc_quantizer else input_quantizer_list[0].bits), "output_bits": output_quantizer.bits} else: # For the "dummy" head and tail nodes in the graph that we inserted at # the begining and ending of the model graph, we run this branch. return { "input_bits": 0, "weight_bits": 0, "mac_bits": 0, "acc_bits": 0, "output_bits": 0 } def get_layer_mac_count(self, node: Union[int, tf.keras.layers.Layer]): """Find the number of multiplier ops in the current layer.""" layer = self.idx_to_layer(node) if isinstance(node, int) else node return ( qtools_util.get_val(self._layer_map[layer], "operation_count", 0) if layer else 0) def get_layer_shapes(self, node: Union[int, tf.keras.layers.Layer]): layer = self.idx_to_layer(node) if isinstance(node, int) else node # Multiple inputs with merge layers. input_shape_list = layer.input_shape if layer else 0 if not isinstance(input_shape_list, list): input_shape_list = [input_shape_list] return { "weight_shape": ( qtools_util.get_val(self._layer_map[layer], "w_shapes", 0) if layer else 0), "output_shape": ( qtools_util.get_val(self._layer_map[layer], "output_shapes", 0) if layer else 0), "input_shape_list": (input_shape_list)} class Choice: """This class stores a combination of HW design param values.""" def __init__(self, l: float = 0, k: float = 0, cin_unroll: int = 0, cout_unroll: int = 0, kh_unroll: int = 0, kw_unroll: int = 0): """Intializer for a combination of hardware design parameters. Args: l: Ratio between OutElementPerClk and ComputeOutElementPerClk k: Ratio between InElementPerClk and ComputeInElementPerClk cin_unroll: Unroll factors for input channel cout_unroll: Unroll factors for output channel kh_unroll: Unroll factors for kernel height kw_unroll: Unroll factors for kernel width """ self.k = k self.l = l self.cin_unroll = cin_unroll self.cout_unroll = cout_unroll self.kh_unroll = kh_unroll self.kw_unroll = kw_unroll def __str__(self): return (f"Choice(k={self.k}, l={self.l}, cin_unroll={self.cin_unroll}, " f"cout_unroll={self.cout_unroll} kh_unroll={self.kh_unroll}, " f"kw_unroll={self.kw_unroll})") def get_valid_unrolls(layer: tf.keras.layers.Layer, cout_unroll: int, target_pe_throughput: float): """Get valid unroll values where resulting throughput>=Target throughput.""" input_channel = qtools_util.get_layer_info(layer, "input_channel") output_channel = qtools_util.get_layer_info(layer, "output_channel") kernel_height = qtools_util.get_layer_info(layer, "kernel_height") kernel_width = qtools_util.get_layer_info(layer, "kernel_width") layer_type = qtools_util.get_layer_info(layer, "layer_type") if layer_type in ["QDepthwiseConv2D", "QAveragePooling2D", "MaxPooling2D", "QGlobalAveragePooling2D", "GlobalMaxPooling2D"]: # Since ops are done in each channel without cross-channel ops, # cin_unroll == cout_unroll in hardware. cin_unroll_list = [cout_unroll] else: # Cin_unroll needs to be a divisor of layer.input_channel cin_unroll_list = qtools_util.find_divisors(input_channel) # kw_unroll needs to be a divisor of layer.kernel_width kw_unroll_list = qtools_util.find_divisors(kernel_width) # kh_unroll needs to be a divisor of layer.kernel_height kh_unroll_list = qtools_util.find_divisors(kernel_height) valid_unrolls = [] for cin_unroll in cin_unroll_list: for kw_unroll in kw_unroll_list: for kh_unroll in kh_unroll_list: logging.debug("............cin_unroll: %d kh_unroll: %d kw_unroll: %d", cin_unroll, kh_unroll, kw_unroll) # Caculate computation throughput. pe_throughput = get_pe_throughput( layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll, input_channel, output_channel, kernel_height, kernel_width) logging.debug("............pe_throughput: %.2f", pe_throughput) if pe_throughput >= target_pe_throughput: # Save the valid combination of unroll factors to valid_unrolls. valid_unrolls.append((cin_unroll, kh_unroll, kw_unroll)) return valid_unrolls def get_per_layer_cost(layer_quantizer_bitwidth, layer_mac_count, layer_shapes, cin_unroll, cout_unroll, kh_unroll, kw_unroll, InElementPerClk, OutElementPerClk, mode): """Area per layer, including both PE and memory Bandwidth.""" # TODO(lishanok@): needs to add modes that support data-driven cost modeling. assert mode == CostMode.ACE, "Only CostMode.ACE is supported for now." # Compute memory is calculated according to ACE metric, translated to gates. mac_gates = dnc_layer_cost_ace.get_ace_mac_gates( xbit=layer_quantizer_bitwidth["input_bits"], wbit=layer_quantizer_bitwidth["weight_bits"], abit=layer_quantizer_bitwidth["acc_bits"], regen_params=False) # pe_area is not dependent on total num of MACs in the layer. pe_area = (mac_gates * cin_unroll * cout_unroll * kh_unroll * kw_unroll) # Memory includes input, output and weight memory, translated to gates. # TODO(lishanok@): weights could be stored in either SRAM or ROM, dependent # on user specification. memory_area = ( InElementPerClk * layer_quantizer_bitwidth["input_bits"] * dnc_layer_cost_ace.MemoryGatesPerBit["Register"] + OutElementPerClk * layer_quantizer_bitwidth["output_bits"] * dnc_layer_cost_ace.MemoryGatesPerBit["Register"] + np.prod(layer_shapes["weight_shape"]) * layer_quantizer_bitwidth["weight_bits"] * dnc_layer_cost_ace.MemoryGatesPerBit["ROM"]) return (pe_area + memory_area) def get_valid_candidates(input_value, output_to_input_ratio_max): candidate_list = qtools_util.find_divisors(input_value) # Add the other scenario where ComputeElementPerClk is multiple # of ElementPerClk. if output_to_input_ratio_max >= 2: candidate_list += [input_value * x for x in list( range(2, output_to_input_ratio_max+1))] return candidate_list def get_InBufferThru(InElementPerClk, input_channel): return InElementPerClk / input_channel def get_OutBufferThru(OutElementPerClk, output_channel, kernel_height, kernel_width, layer_type): if layer_type in ["UpSampling2D"]: return OutElementPerClk / ( output_channel * kernel_height * kernel_width) else: return OutElementPerClk / output_channel def is_bufferThru_greater_than_targetThru( layer_type: str, InElementPerClk: int, OutElementPerClk: int, input_channel: int, output_channel: int, kernel_height: int, kernel_width: int, target_out_throughput: float, target_in_throughput: float): """Verify whether the resulting buffer throughput > target throughput.""" # Calculate throughput of input buffer. InBuf_throughput = get_InBufferThru(InElementPerClk, input_channel) # Calculate throughput of output buffer. OutBuf_throughput = get_OutBufferThru( layer_type=layer_type, OutElementPerClk=OutElementPerClk, output_channel=output_channel, kernel_height=kernel_height, kernel_width=kernel_width) logging.debug( "...............InBuf_throughput: %.2f OutBuf_throughput: %.2f", InBuf_throughput, OutBuf_throughput) # Valid unroll values must meet buffer throughput requirements. return (InBuf_throughput >= target_out_throughput and OutBuf_throughput >= target_in_throughput) def set_best_global_cost_in_paths( OutElementPerClk_list, paths, layer_idx, cur_layer_idx, layer_quantizer_bitwidth, layer_mac_count, layer_shapes, mode): """Find the best global cost of the entire model and update the paths dict. Args: OutElementPerClk_list: list of OutElementPerClk for the current layer. paths: Dict that contains the choices that each layer has. layer_idx: Int. The index value of the current layer's predecessor. cur_layer_idx: current layer's index value. layer_quantizer_bitwidth: Dict that contains layer-related quantizer bitwidth, including acc_bits, mac_bits, input_bits and output_bits. layer_mac_count: Int. Use the number of multiplication as the operation count. To include the number of accumulations, we should multiply the value by 2, assuming accumulation count ~= multiplication count. layer_shapes: Dict with keys: weight_shape, input_shape_list and output_shape. mode: CostMode. The mode to calculate per layer cost. Returns: None. """ def calculate_cost(OutElementPerClk): cur_layer_cost = get_per_layer_cost( layer_quantizer_bitwidth, layer_mac_count, layer_shapes, 0, 0, 0, 0, 0, OutElementPerClk, mode) accumulative_cost = cur_layer_cost + paths[layer_idx][ OutElementPerClk]["acc_cost"] return (cur_layer_cost, accumulative_cost, OutElementPerClk) cost_and_values = list(map(calculate_cost, OutElementPerClk_list)) layer_cost, min_accumulative_cost, best_OutElementPerClk = ( min(cost_and_values, key=lambda x: x[1])) # For the initial node, we find the best path which contains a sentinel # choice, cost with that path, and the chosen OutElementPerClk # that will point to the corresponding choice of the following layer. paths[cur_layer_idx] = { best_OutElementPerClk: { "choice": Choice().__str__(), "cur_cost": layer_cost, "acc_cost": min_accumulative_cost, "OutElementPerClk": best_OutElementPerClk }} def backtrack(graph, paths): """Backtracking of the best path from the first layer to the last.""" best_path = {} # Get the second node from the graph as the first node is a sentinel node. layer_idx = graph.get_first_node() logging.debug("=======================") logging.debug("Trimmed Paths:") logging.debug("paths: %s", paths) logging.debug("=======================") # Find the best choice of the first layer. # TODO(lishanok@): extend code to non-sequential model where there are # multiple input layers best_OutElementPerClk = list(paths[layer_idx].keys())[0] best_entry = paths[layer_idx][best_OutElementPerClk] # Add layer name to improve readability. layer = graph.idx_to_layer(layer_idx) best_entry["layer_name"] = layer.name if layer else "None" best_path[layer_idx] = best_entry best_OutElementPerClk = best_entry["OutElementPerClk"] best_accumlative_cost = best_entry["acc_cost"] layer_idx = graph.get_next_nodes(layer_idx)[0] # Given the best choice of 1st layer, find the best choice for all following # layers by backtracking. while not graph.is_last_node(layer_idx): # Find current layer's best choice from the ptr (ie. best_OutElementPerClk) # stored in the best choice of the previous layer. best_entry = paths[layer_idx][best_OutElementPerClk] layer = graph.idx_to_layer(layer_idx) best_entry["layer_name"] = layer.name if layer else "None" best_path[layer_idx] = best_entry # Update the ptr to the next layer. best_OutElementPerClk = best_entry["OutElementPerClk"] # get the next node from the graph # TODO(lishanok@): extend the code to non-sequential model where there are # multiple next layers. layer_idx = graph.get_next_nodes(layer_idx)[0] # best_path stores the best hw param combination and cost for each layer. return best_path, best_accumlative_cost def update_cur_best_choices( cur_best_choices: List[Any], OutElementPerClk: int, prev_OutElementPerClk: int, cur_layer_cost: float, accumulative_cost: float, choice: Choice): """Update the cur_best_choices dict. At each layer, different choices of unroll factors will generate a prev_OutElementPerClk value. Some of the choices might generate the same prev_OutElementPerClk. So for each pre_OutElementPerClk, we only store the best choice which has the min cost. """ entry = cur_best_choices.get(prev_OutElementPerClk, None) existing_accumulative_cost = entry["acc_cost"] if entry else np.inf logging.debug("...............cost of cur_best_choices [%d]: %.2f", prev_OutElementPerClk, existing_accumulative_cost) if accumulative_cost < existing_accumulative_cost: # Stores the best choice and its cost for the given # prev_OutElementPerClk. We also store the ptr to next layer's # OutElementPerClk for future backtracking purpose. cur_best_choices[prev_OutElementPerClk] = { "choice": choice.__str__(), "cur_cost": cur_layer_cost, "acc_cost": accumulative_cost, "OutElementPerClk": OutElementPerClk} logging.debug( "...............Find better cost! Update cur_best_choices[%d]: %s", prev_OutElementPerClk, cur_best_choices[prev_OutElementPerClk]) def get_ComputeInElementPerClk(layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll): if layer_type in ["QConv2D", "QDense"]: return cin_unroll * kh_unroll * kw_unroll elif layer_type in ["QDepthwiseConv2D", "QAveragePooling2D", "MaxPooling2D"]: return cout_unroll * kh_unroll * kw_unroll elif layer_type in ["QGlobalAveragePooling2D", "GlobalMaxPooling2D", "UpSampling2D"]: return cout_unroll elif layer_type in ["Concatenate"]: return cin_unroll def get_InElementPerClk_base(ComputInElementPerClk, kh_unroll, kw_unroll): return int(ComputInElementPerClk / (kh_unroll * kw_unroll)) def get_pe_throughput(layer_type, cin_unroll, cout_unroll, kh_unroll, kw_unroll, input_channel, output_channel, kernel_height, kernel_width): """Calculate compute throughput for the given unroll factors.""" if layer_type in ["QConv2D", "QDense"]: return 1.0 * cin_unroll * cout_unroll * kh_unroll * kw_unroll / ( input_channel * output_channel * kernel_height * kernel_width) elif layer_type in ["QDepthwiseConv2D", "QAveragePooling2D", "MaxPooling2D", "UpSampling2D"]: return 1.0 * cout_unroll * kh_unroll * kw_unroll / ( output_channel * kernel_height * kernel_width) elif layer_type in ["QGlobalAveragePooling2D", "GlobalMaxPooling2D", "Concatenate"]: return 1.0 * cout_unroll / output_channel else: raise ValueError(f"Unspported layer type: {layer_type}") def get_target_throughputs(layer, target_out_throughput): """Update throughput for a given layer.""" # For layer that do not change the number of inference pixels, # throughput remains the same. For layers that decrease or increase the # number of inference pixels, the target throughput needs to update # accordingly. def multiply_elements_except_none(my_tuple): # Convert None values to np.nan and then use np.nanprod to calculate # the product return np.nanprod([x if x is not None else np.nan for x in my_tuple]) if layer: input_size = multiply_elements_except_none(layer.input_shape[:-1]) output_size = multiply_elements_except_none(layer.output_shape[:-1]) target_in_throughput = target_out_throughput * input_size / output_size else: target_in_throughput = target_out_throughput # Per new design, target_pe_throughput equals to target_out_throughput. target_pe_throughput = target_out_throughput return target_in_throughput, target_pe_throughput def calc_hw_params(graph, target_OutElementPerClk, target_out_throughput, input_quantizer_bits, compute_to_memory_max_ratio=4, memory_to_unroll_max_ratio=4, mode=CostMode.ACE): """Calculate HW params that minimizes total cost. Args: graph: DivideConquerGraph Object. Model graph. target_OutElementPerClk: Int. Target number of elements per clock cycle that the hardware needs to output. target_out_throughput: Float. Target number of inferences per clock cycle that the hardware needs to make. input_quantizer_bits: Int. Model's input quantizer bits. compute_to_memory_max_ratio: Int. Max allowed ratio between ComputeOutElement and OutElement memory_to_unroll_max_ratio: Int. Max allowed ratio between InElementPerClk and CinUnroll mode: CostMode. The mode to calculate per layer cost. Default is ACE. Returns: best_path: Dict. Stores the best hw param value at each layer and their irrespective cost. best_cost: Float. The best global cost of the entire model. """ # Paths stores the best choices for every layer. # For the layer_idx, for each OutElementPerClk, we can calculate the best hw # param choice. We store all these best choices, each choice will # correspond to one OutElementPerClk key. Path therefore has the format: # {layer: {OutElementPerClk: (choice, cost, downstream_OutElementPerClk)}} paths = {} # We start the computation from the last node. layer_idx = graph.get_last_node() # Store the hw choices for the last node (a dummy node) for the sake # of completion. paths[layer_idx] = { target_OutElementPerClk: { "choice": Choice().__str__(), "cur_cost": 0, "acc_cost": 0, "OutElementPerClk": -1}} logging.debug("====== Extracting HW params combinations per layer =====") # The following code calculates cost backward, from last layer to the first. while graph.get_prev_nodes(layer_idx): # Find precessor of the layer. # TODO(lishanok@): extend this code to multiple prev layers. cur_layer_idx = graph.get_prev_nodes(layer_idx)[0] cur_layer = graph.idx_to_layer(cur_layer_idx) logging.debug("processing layer_idx:%d name:%s type:%s ***", cur_layer_idx, getattr(cur_layer, "name", None), cur_layer.__class__.__name__) target_in_throughput, target_pe_throughput = get_target_throughputs( cur_layer, target_out_throughput) # Previous layer will generate a list of candidates for OutElementPerClk # values for the current layer. OutElementPerClk_list = list(paths[layer_idx].keys()) logging.debug("OutElementPerClk_list:%s", OutElementPerClk_list) layer_quantizer_bitwidth = graph.get_layer_quantizer_bitwidth(cur_layer) layer_mac_count = graph.get_layer_mac_count(cur_layer) layer_shapes = graph.get_layer_shapes(cur_layer) # TODO(lishanok@): need to extend to multiple input layers, i.e., more # than 1 layer will reach graph's first node. We should only exit if all # input layers are processed. if graph.is_first_node(cur_layer_idx): # Computation reaches the 1st node of the graph. We can now find the best # path of all OutElementPerClk choices at the first layer. set_best_global_cost_in_paths( OutElementPerClk_list, paths, layer_idx, cur_layer_idx, layer_quantizer_bitwidth, layer_mac_count, layer_shapes, mode) break # Get layer-related information input_channel = qtools_util.get_layer_info(cur_layer, "input_channel") output_channel = qtools_util.get_layer_info(cur_layer, "output_channel") kernel_height = qtools_util.get_layer_info(cur_layer, "kernel_height") kernel_width = qtools_util.get_layer_info(cur_layer, "kernel_width") layer_type = qtools_util.get_layer_info(cur_layer, "layer_type") output_channel_divisors = qtools_util.find_divisors(output_channel) logging.debug("input_channel: %d, output_channel: %d, kernel_height: %d, " "kernel_width: %d, weight_quantizer_bits: %d", input_channel, output_channel, kernel_height, kernel_width, layer_quantizer_bitwidth["weight_bits"]) cur_best_choices = {} for OutElementPerClk in OutElementPerClk_list: logging.debug("...OutElementPerClk: %d", OutElementPerClk) # Pass through OutElementPerClk and cost for non-essential layers. if layer_type in ["QBatchNormalization", "QActivation", "Dropout", "Reshape", "Activation", "ZeroPadding2D"]: logging.debug("...... Passing through layer_type: %s with 0 cost", layer_type) # Update the best choices dict with only 1 key-value pair. By # considering current light-computation layer in the graph # as a pass-through node, we set layer cost=0, and set the predecessor # node's OutElementPerClk the same as current node's OutElementPerClk. update_cur_best_choices( cur_best_choices, OutElementPerClk=OutElementPerClk, prev_OutElementPerClk=OutElementPerClk, cur_layer_cost=0, accumulative_cost=paths[layer_idx][OutElementPerClk]["acc_cost"], choice=Choice()) # Exit current iteration since there is no design param to explore # for these layer types. continue # For each of the possible OutElementPerClk values provided by the next # layer, we derive possible HW params choices of the current layer. for ComputeOutElementPerClk in get_valid_candidates( OutElementPerClk, compute_to_memory_max_ratio): logging.debug("......ComputeOutElementPerClk: %d", ComputeOutElementPerClk) l = OutElementPerClk / ComputeOutElementPerClk cout_unroll = ComputeOutElementPerClk # cout_unroll needs to be a divisor of output_channels if cout_unroll not in output_channel_divisors: continue logging.debug( ".........OutElementPerClk / ComputeOutElementPerClk = %.2f," "cout_unroll=%.2f", l, cout_unroll) # Find valid unroll values that meet pe throughput requirement. valid_unrolls = get_valid_unrolls(cur_layer, cout_unroll, target_pe_throughput) if not valid_unrolls: # Skip if no valid unroll values are found. logging.debug(".........No valid unroll values found!") continue for (cin_unroll, kh_unroll, kw_unroll) in valid_unrolls: # Check throughput requirement of each combination of unroll values. logging.debug(".........cin_unroll: %d, kh_unroll: %d, kw_unroll: %d", cin_unroll, kh_unroll, kw_unroll) ComputInElementPerClk = get_ComputeInElementPerClk( layer_type, cin_unroll=cin_unroll, cout_unroll=cout_unroll, kh_unroll=kh_unroll, kw_unroll=kw_unroll) # InElementPerClk = k*ComputeInElementPerClk/(kh_unroll * kw_unroll) # TODO(lishanok@): Confirm if it works for Concatenate layer. InElementPerClk_base = get_InElementPerClk_base( ComputInElementPerClk=ComputInElementPerClk, kh_unroll=kh_unroll, kw_unroll=kw_unroll) for InElementPerClk in get_valid_candidates( InElementPerClk_base, memory_to_unroll_max_ratio): # With given cin_unroll, check throughput requirement of each # possible candidate of InElementPerClk. logging.debug("............InElementPerClk: %d", InElementPerClk) k = cin_unroll / InElementPerClk # prev_OutElementPerClk is the predecessor node's OutElementPerClk prev_OutElementPerClk = InElementPerClk if is_bufferThru_greater_than_targetThru( layer_type=layer_type, InElementPerClk=InElementPerClk, OutElementPerClk=OutElementPerClk, input_channel=input_channel, output_channel=output_channel, kernel_height=kernel_height, kernel_width=kernel_width, target_out_throughput=target_out_throughput, target_in_throughput=target_in_throughput): # If valid unroll values meet buffer throughput requirements, # comput cost. # cost = current layer's cost + total of downstream layers' cost. # Since we derive cost iteratively starting from the last layer, # paths already store the total cost of the downstream layers. cur_layer_cost = get_per_layer_cost( layer_quantizer_bitwidth, layer_mac_count, layer_shapes, cin_unroll, cout_unroll, kh_unroll, kw_unroll, InElementPerClk, OutElementPerClk, mode) accumulative_cost = ( cur_layer_cost + paths[layer_idx][OutElementPerClk][ "acc_cost"]) logging.debug("...............Buf throughput is good! " "Accumulative_cost: %.2f", accumulative_cost) # Each choice is a hw param combination. choice = Choice(l, k, cin_unroll, cout_unroll, kh_unroll, kw_unroll) update_cur_best_choices(cur_best_choices, OutElementPerClk, prev_OutElementPerClk, cur_layer_cost, accumulative_cost, choice) if not cur_best_choices: logging.error("Cannot find any valid HW choice for layer %s! Exit!", cur_layer.name) return {}, None logging.debug("=======================") # Store the best choices of hw params for the current layer. Proceed to # the previous layer. paths[cur_layer_idx] = cur_best_choices layer_idx = cur_layer_idx # Predicessor node's OutBuf throughput is sucessor node's InBuf throughput. target_out_throughput = target_in_throughput return backtrack(graph, paths) def estimate_model_cost( model: tf.keras.Model, input_quantizer_bits: int = 8, target_OutElementPerClk: int = 10, target_out_throughput: float = 1.0, compute_to_memory_max_ratio: int = 4, memory_to_unroll_max_ratio: int = 4, mode: CostMode = CostMode.ACE): """Main function to divide and conquer cost modeling. Args: model: QKeras model. input_quantizer_bits: Model's input quantizer bits. target_OutElementPerClk: Target number of elements per clock cycle that the hardware needs to output. target_out_throughput: Target number of inferences per clock cycle that the hardware needs to make. compute_to_memory_max_ratio: Max allowed ratio between ComputeOutElement and OutElement memory_to_unroll_max_ratio: Max allowed ratio between InElementPerClk and CinUnroll mode: The mode to calculate per layer cost. Returns: best_path: Dict. Stores the best hw param value at each layer and their irrespective cost. best_cost: Float. The best global cost of the entire model. """ logging.info("Estimating model design params and cost...") # Generate graph graph = DivideConquerGraph(model) # Call the main function to generate optimal HW configs for all layers best_path, best_cost = calc_hw_params( graph=graph, target_OutElementPerClk=target_OutElementPerClk, target_out_throughput=target_out_throughput, input_quantizer_bits=input_quantizer_bits, compute_to_memory_max_ratio=( compute_to_memory_max_ratio), memory_to_unroll_max_ratio=( memory_to_unroll_max_ratio), mode=mode ) logging.info("best_design_params: %s", best_path) return (best_path, best_cost) ================================================ FILE: qkeras/qtools/DnC/dnc_layer_cost_ace.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """divide_and_conquer per layer cost modeling using ACE and data fitting. For a given layer with its hardware design params, predict its cost in actual ASIC implementation using ACE metric and actual MAC gates data points. """ import io import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.optimize import curve_fit # Rule-of-thumb mapping between bits and gates in memory area estimate. MemoryGatesPerBit = { 'Register': 10.0, 'SRAM': 1.0, 'ROM': 0.1, } # Previously calculated 3D polynomial coefficients with relative MAE<5%. MAC_POLY3D_PARAMS = np.array([7.70469119, 13.76199652, -92.15756665]) # MAC area data points generated from go/mac_vs_area. MAC24 = pd.read_csv(io.StringIO(''' 283,280,286,313,325,336,356,, 274,290,325,372,401,428,485,, 285,325,388,510,568,614,713,, 308,372,509,750,865,1002,1167,, 336,427,617,1003,1151,1309,,, 356,480,722,1165,,,,, '''), header=None) MAC32 = pd.read_csv(io.StringIO(''' 391,365,377,410,453,433,458,507, 364,382,418,466,497,521,578,685, 378,418,485,594,659,721,832,1035, 408,466,596,843,1029,1151,1321,1642, 432,521,724,1153,1363,1512,1797,, 457,578,830,1330,1551,1782,2273,, '''), header=None) MAC40 = pd.read_csv(io.StringIO(''' 458,457,470,500,522,527,551,605,664 457,475,513,561,597,616,670,782,888 470,513,579,699,766,816,928,1150,1358 499,561,699,996,1161,1273,1499,1850,2189 527,612,818,1275,1545,1691,2054,2516, 549,670,927,1496,1798,2035,2490,3294, '''), header=None) MAC48 = pd.read_csv(io.StringIO(''' 595,550,566,594,659,624,642,694,745 551,566,607,654,727,707,763,881,984 566,607,679,794,871,921,1017,1270,1489 594,655,793,1097,1285,1401,1668,2101,2378 624,711,921,1397,1816,1950,2277,2763,3301 642,762,1015,1669,1974,2264,2718,3631,4415 '''), header=None) def mac_gates_polynomial_3d(xyz, a, b, c): """Using a 3d polynomial function to model MAC area. This function models the MAC area to be the sum of multipler, accumulator and a constant shift. Particularly, multiplier area is modeled to be linear # to input_bits * weight_bits, per ACE rule. Args: xyz: tuple includes input, weight and accumulator bits. a: polynomial coefficient 0. b: polynomial coefficient 1. c: polynomial coefficient 2. Returns: MAC area predicted by the function. """ x, y, z = xyz return a * x * y + b * z + c def gen_mac_gate_model(do_plot=False): """Generate the polynomial cost model coefficients using given data. Args: do_plot: Bool indicates whether plot the raw data and the fitted curve. Returns: params: The esitimated params of the polynomical function. mae_predict: Calculate the mean absolute error of the predictions. parameter_std_deviation: one standard deviation errors on the parameters, indicating the uncertainties of the params. """ # acc bits, 1st index abit = np.array([24, 32, 40, 48]) abit = np.repeat(abit, 54) # weight bits, 2nd index wbit = np.array([1, 2, 4, 8, 12, 16]) wbit = np.tile(np.repeat(wbit, 9), 4) # input bits, 3rd index xbit = np.array([1, 2, 4, 8, 10, 12, 16, 24, 32]) xbit = np.tile(xbit, 24) # Record all mac area data points associated with each accumulator bitwidth mac_arrs = [] # Record the start and end index of the mac area data points # associated with each accumulator bitwidth mac_arrs_index = {} # Record index of all valid data points valid_index = [] start_pos = 0 for (mac_acc, acc_bits) in zip( [MAC24, MAC32, MAC40, MAC48], [24, 32, 40, 48]): cur_mac = mac_acc.to_numpy().reshape(-1) # Filter out nan data points cur_valid_index = ~np.isnan(cur_mac) cur_valid_mac = cur_mac[cur_valid_index] # Record the data length for each accumulator bits end_pos = start_pos + len(cur_valid_mac) mac_arrs_index[acc_bits] = (start_pos, end_pos) # Append mac areas of each accumulator bits to a list mac_arrs += list(cur_valid_mac) start_pos = end_pos valid_index += list(cur_valid_index) # Filter out invalid data xbit = xbit[valid_index] wbit = wbit[valid_index] abit = abit[valid_index] # curve fitting for all data points params, covariance = curve_fit( mac_gates_polynomial_3d, (xbit, wbit, abit), mac_arrs) # Compute one standard deviation errors on the parameters. parameter_std_deviation = np.sqrt(np.diag(covariance)) # Calculate the mean absolute error between prediction and given data. mac_predict = mac_gates_polynomial_3d((xbit, wbit, abit), *params) mae = np.mean(np.abs(mac_predict - mac_arrs)) mae_predict = mae / np.mean(mac_arrs) if do_plot: # Plot all raw data points fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, projection='3d') ax.scatter(xbit, wbit, mac_arrs, label='Data') ax.set_xlabel('X_bits') ax.set_ylabel('W_bits') ax.set_zlabel('MAC') plt.title('MAC area data points') plt.show() # Generate a mesh grid for plotting. x_fit = np.linspace(min(xbit), max(xbit), 50) w_fit = np.linspace(min(wbit), max(wbit), 50) xmesh, wmesh = np.meshgrid(x_fit, w_fit) fig = plt.figure(figsize=(16, 16)) index = 1 # Plotting 3D fitting curve for each accumulator bitwidth for acc_bits in [24, 32, 40, 48]: ax = fig.add_subplot(2, 2, index, projection='3d') start_pos = mac_arrs_index[acc_bits][0] end_pos = mac_arrs_index[acc_bits][1] ax.scatter(xbit[start_pos:end_pos], wbit[start_pos:end_pos], mac_arrs[start_pos:end_pos], label='Data') amesh = np.full(shape=(50, 50), fill_value=acc_bits) poly_fit = mac_gates_polynomial_3d((xmesh, wmesh, amesh), *params) ax.plot_surface( xmesh, wmesh, poly_fit, cmap='viridis', alpha=0.8, label=f'Fitted Surface | acc_bits={acc_bits}') ax.set_xlabel('X') ax.set_ylabel('W') ax.set_zlabel('MAC') ax.set_title(f'accumulator bitwidth: {acc_bits}') index += 1 plt.show() return params, mae_predict, parameter_std_deviation def get_ace_mac_gates(xbit, wbit, abit, regen_params=False): """Function to estimate MAC area, including 1 multipler and 1 accumulator. Args: xbit: int. input bits. wbit: int. weight bits. abit: int. accumulator bits. regen_params: Bool. If True, regenerate the MAC cost model coefficients. If False, reuse the previously generated model coefficients. Returns: Estimated MAC gates. """ if regen_params: mac_params, _, _ = gen_mac_gate_model(do_plot=True) else: mac_params = MAC_POLY3D_PARAMS return mac_gates_polynomial_3d((xbit, wbit, abit), *mac_params) ================================================ FILE: qkeras/qtools/__init__.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Export qtools package.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from .run_qtools import QTools from .settings import cfg as qtools_cfg ================================================ FILE: qkeras/qtools/config_public.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """configuration file for external usage.""" config_settings = { "default_source_quantizer": "quantized_bits(8, 0, 1)", "default_interm_quantizer": "quantized_bits(8, 0, 1)", "horowitz": { "fpm_add": [0.003125, 0], "fpm_mul": [0.002994791667, 0.001041666667, 0], "fp16_add": [0.4], "fp16_mul": [1.1], "fp32_add": [0.9], "fp32_mul": [3.7], "sram_rd": [9.02427321e-04, -2.68847858e-02, 2.08900804e-01, 0.0], "dram_rd": [20.3125, 0] }, "include_energy": { "QActivation": ["outputs"], "QAdaptiveActivation": ["outputs"], "Activation": ["outputs"], "QBatchNormalization": ["parameters"], "BatchNormalization": ["parameters"], "Add": ["op_cost"], "Subtract": ["op_cost"], "MaxPooling2D": ["op_cost"], "default": ["inputs", "parameters", "op_cost"] } } ================================================ FILE: qkeras/qtools/examples/example_generate_json.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Example code to generate weight and MAC sizes in a json file.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow.keras as keras from qkeras import QActivation from qkeras import QDense from qkeras import quantizers from qkeras.qtools import run_qtools def hybrid_model(): """hybrid model that mixes qkeras and keras layers.""" x = x_in = keras.layers.Input((784,), name="input") x = keras.layers.Dense(300, name="d0")(x) x = keras.layers.Activation("relu", name="d0_act")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_po2(4), bias_quantizer=quantizers.quantized_po2(4), name="d1")(x) x = QActivation("quantized_relu(4,0)", name="d1_qr4")(x) x = QDense( 10, kernel_quantizer=quantizers.quantized_po2(4), bias_quantizer=quantizers.quantized_po2(4), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) return keras.Model(inputs=[x_in], outputs=[x]) def generate_json(in_model): """example to generate data type map for a given model. Args: in_model: qkeras model object Usage: input_quantizer_list: A list of input quantizers for the model. It could be in the form of: 1. a list of quantizers, each quantizer for each one of the model inputs 2. one single quantizer, which will be used for all of the model inputs 3. None. Default input quantizer defined in config_xxx.py will be used for all of the model inputs for_reference: get energy for a reference model/trial model 1. True: get baseline energy for a given model. Use keras_quantizer/keras_ accumulator (or default_interm_quantizer in config_xxx.py if keras_ quantizer/keras_accumulator not given) to quantizer all layers in a model in order to calculate its energy. It servers the purpose of setting up a baseline energy for a given model architecture. 2. False: get "real" energy for a given model use user-specified quantizers. For layers that are not quantized (keras layer) or have no user-specified quantizers (qkeras layers without quantizers specified), keras_quantizer and keras_accumulator(or default_interm_quantizer in config_xxx.py if keras_quantizer/keras_accumulator not given) will be used as their quantizers. process: technology process to use in configuration (horowitz, ...) weights_path: absolute path to the model weights is_inference: whether model has been trained already, which is needed to compute tighter bounds for QBatchNormalization Power estimation Other parameters (defined in config_xxx.py): 1. "default_source_quantizer" is used as default input quantizer if user do not specify any input quantizers, 2. "default_interm_quantizer": is used as default quantizer for any intermediate variables such as multiplier, accumulator, weight/bias in a qkeras layer if user do not secifiy the corresponding variable 3. process_name: energy calculation parameters for different processes. "horowitz" is the process we use by default. 4. "include_energy": what energy to include at each layer when calculation the total energy of the entire model. "parameters": memory access energy for loading model parameters. "inputs": memory access energy to reading inputs "outputs": memory access energy for writing outputs "op_cost": operation energy for multiplication and accumulation """ input_quantizer_list = [quantizers.quantized_bits(8, 0, 1)] reference_internal = "int8" reference_accumulator = "int32" # generate QTools object which contains model data type map in json format q = run_qtools.QTools( in_model, # energy calculation using a given process process="horowitz", # quantizers for model inputs source_quantizers=input_quantizer_list, # training or inference with a pre-trained model is_inference=False, # path to pre-trained model weights weights_path=None, # keras_quantizer to quantize weight/bias in non-quantized keras layers keras_quantizer=reference_internal, # keras_accumulator to quantize MAC in un-quantized keras layers keras_accumulator=reference_accumulator, # calculating baseline energy or not for_reference=False) # print data type map q.qtools_stats_print() # dump the layer data map to a json file # json_name = "output.json" # q.qtools_stats_to_json(json_name) if __name__ == "__main__": model = hybrid_model() model.summary() generate_json(model) ================================================ FILE: qkeras/qtools/examples/example_get_energy.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Example code to generate weight and MAC sizes in a json file.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow.keras as keras from qkeras import QActivation from qkeras import QDense from qkeras import quantizers from qkeras.qtools import run_qtools from qkeras.qtools import settings as qtools_settings def hybrid_model(): """hybrid model that mixes qkeras and keras layers.""" x = x_in = keras.layers.Input((784,), name="input") x = keras.layers.Dense(300, name="d0")(x) x = keras.layers.Activation("relu", name="d0_act")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QActivation("quantized_relu(4,0)", name="d1_qr4")(x) x = QDense( 10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) return keras.Model(inputs=[x_in], outputs=[x]) if __name__ == "__main__": # input parameters: # process: technology process to use in configuration (horowitz, ...) # weights_on_memory: whether to store parameters in dram, sram, or fixed # activations_on_memory: store activations in dram or sram # rd_wr_on_io: whether load data from dram to sram (consider sram as a cache # for dram. If false, we will assume data will be already in SRAM # source_quantizers: quantizers for model input # is_inference: whether model has been trained already, which is # needed to compute tighter bounds for QBatchNormalization Power estimation. # reference_internal: size to use for weight/bias/activation in # get_reference energy calculation (int8, fp16, fp32) # reference_accumulator: accumulator and multiplier type in get_reference # energy calculation model = hybrid_model() model.summary() reference_internal = "int8" reference_accumulator = "int32" # By setting for_reference=True, we create QTools object which uses # keras_quantizer to quantize weights/bias and # keras_accumulator to quantize MAC variables for all layers. Obviously, this # overwrites any quantizers that user specified in the qkeras layers. The # purpose of doing so is to enable user to calculate a baseline energy number # for a given model architecture and compare it against quantized models. q = run_qtools.QTools( model, # energy calculation using a given process process="horowitz", # quantizers for model input source_quantizers=[quantizers.quantized_bits(8, 0, 1)], is_inference=False, # absolute path (including filename) of the model weights weights_path=None, # keras_quantizer to quantize weight/bias in un-quantized keras layers keras_quantizer=reference_internal, # keras_quantizer to quantize MAC in un-quantized keras layers keras_accumulator=reference_accumulator, # whether calculate baseline energy for_reference=True) # caculate energy of the derived data type map. ref_energy_dict = q.pe( # whether to store parameters in dram, sram, or fixed weights_on_memory="sram", # store activations in dram or sram activations_on_memory="sram", # minimum sram size in number of bits min_sram_size=8*16*1024*1024, # whether load data from dram to sram (consider sram as a cache # for dram. If false, we will assume data will be already in SRAM rd_wr_on_io=False) # get stats of energy distribution in each layer reference_energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, ref_energy_dict) # extract sum of energy of each layer according to the rule specified in # qtools_settings.cfg.include_energy total_reference_energy = q.extract_energy_sum( qtools_settings.cfg.include_energy, ref_energy_dict) print("Baseline energy profile:", reference_energy_profile) print("Total baseline energy:", total_reference_energy) # By setting for_reference=False, we quantize the model using quantizers # specified by users in qkeras layers. For hybrid models where there are # mixture of unquantized keras layers and quantized qkeras layers, we use # keras_quantizer to quantize weights/bias and keras_accumulator to quantize # MAC variables for all keras layers. q = run_qtools.QTools( model, process="horowitz", source_quantizers=[quantizers.quantized_bits(8, 0, 1)], is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=False) trial_energy_dict = q.pe( weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8*16*1024*1024, rd_wr_on_io=False) trial_energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, trial_energy_dict) total_trial_energy = q.extract_energy_sum( qtools_settings.cfg.include_energy, trial_energy_dict) print("energy profile:", trial_energy_profile) print("Total energy:", total_trial_energy) ================================================ FILE: qkeras/qtools/generate_layer_data_type_map.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Generates MAC, input and output datatype for a qkeras model.""" import collections import copy import numpy as np import sys import networkx as nx from qkeras.qtools import qgraph from qkeras.qtools import qtools_util from qkeras.qtools import quantized_operators from qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module from qkeras.qtools.settings import cfg from qkeras.qtools.quantized_operators import adder_factory from qkeras.qtools.quantized_operators.fused_bn_factory import FusedBNFactory class TagMissingError(ValueError): pass LayerDataType = collections.namedtuple( "LayerDataType", [ "input_quantizer_list", "multiplier", "accumulator", "weight_quantizer", "w_shapes", "bias_quantizer", "b_shapes", "output_quantizer", "output_shapes", "operation_count", ], ) QKERAS_LAYERS = [ "QDense", "QConv1D", "QConv2D", "QDepthwiseConv2D", "QConv2D", "QConv2DTranspose", ] KERAS_LAYERS = [ "Dense", "Conv1D", "Conv2D", "DepthwiseConv2D", "Conv2DTranspose", ] def get_bn_quantizers(layer, quantizer_factory, cfg, keras_quantizer, input_quantizer, is_inference, for_reference, model_weights_already_quantized): """Extract quantizers from a given batchnorm layer.""" # QKeras layers might be mixed with keras layers. if for_reference or not hasattr(layer, "get_quantizers"): # Keras BatchNorm layer mixed with quantized model # -> no reference mode gamma_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) beta_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) mean_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) variance_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) inverse_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) if keras_quantizer: gamma_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) beta_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) mean_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) variance_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) inverse_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) else: (qkeras_gamma_quantizer, qkeras_beta_quantizer, qkeras_mean_quantizer, qkeras_variance_quantizer, qkeras_inverse_quantizer) = layer.get_quantizers() if not qkeras_beta_quantizer: beta_quantizer = quantizer_factory.clone_quantizer(input_quantizer) else: beta_quantizer = quantizer_factory.make_quantizer( qkeras_beta_quantizer) if not qkeras_mean_quantizer: mean_quantizer = quantizer_factory.clone_quantizer(input_quantizer) else: mean_quantizer = quantizer_factory.make_quantizer( qkeras_mean_quantizer) if not qkeras_variance_quantizer: variance_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) else: # If variance is float, convert to input_quantizer. variance_quantizer = quantizer_factory.make_quantizer( qkeras_variance_quantizer) if not qkeras_gamma_quantizer: gamma_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) else: gamma_quantizer = quantizer_factory.make_quantizer( qkeras_gamma_quantizer) if not qkeras_inverse_quantizer: inverse_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) else: inverse_quantizer = quantizer_factory.make_quantizer( qkeras_inverse_quantizer) # During inference, gamma, beta and variance are constants # if they are po2 quantizers, we need to modify their bits # with actual values and also update graph with the # corresponding output_quantizer on the edge. if is_inference: weights = qtools_util.get_weights( layer, model_weights_already_quantized) # If no scale(gamma), num_weights -- # If no center(beta_quantizer) num_weights -- num_weights = 4 if not layer.scale: num_weights -= 1 if not layer.center: num_weights -= 1 if (layer.scale and gamma_quantizer is not None and gamma_quantizer.is_po2): gamma_quantizer.update_inference_values(weights[0]) if (variance_quantizer is not None and variance_quantizer.is_po2): variance_quantizer.update_inference_values( weights[num_weights-1]) return (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer, inverse_quantizer) def update_output_quantizer_in_graph(graph, node_id, quantizer_factory, new_quantizer, for_reference): """update the edge with output quantizer type.""" node = graph.nodes[node_id] qkeras_output_quantizer = node["out_quantizer"] # If existing graph doesn't have a valid output quantizer # update graph with the new quantizer if (for_reference or not qkeras_output_quantizer or not quantizer_factory.is_quantizer_supported(qkeras_output_quantizer)): qkeras_output_quantizer = new_quantizer qgraph.GraphUpdateEdge(graph, node_id, qkeras_output_quantizer) # If activation specified, convert activation quantizer to qtools quantizer # If activation not secified, convert the new quantizer to qtools quantizer output_quantizer = quantizer_factory.make_quantizer(qkeras_output_quantizer) # Output_quantizer is used for updating dictionary in json return output_quantizer def generate_layer_data_type_map( graph, source_quantizer_list, is_inference, keras_quantizer=None, keras_accumulator=None, for_reference=False, debug=False, model_weights_already_quantized=True, hw_weight_dict=None): """main funciton to generate datatype for each layer. For each type of layer, this function calculates the sizes and minimum number of bits required to represent the parameters and variables (e.g., weights, bias, multiplier and accumulator - MAC, etc.) embedded in these layers. Args: graph: input graph that traverses the model source_quantizer_list: a list of quantizers for model inputs is_inference: whether model is pre-trained with weights available keras_quantizer: default quantizer used to quantize weights and bias keras_accumulator: default MAC quantizer to quantize multiplier, accumulator and output for_reference: whether to generate a map for a baseline model debug: whether to print debug messages model_weights_already_quantized: bool. If model weights are already quantized, no need to apply quantizer to weights here in this function. hw_weight_dict: weight dictonary for hardware inference. For example, fused bn op inference in hardware will need additional fused weights, which can be extracted from this dictionary. This dictionary is the output from utils.py/model_save_quantized_weights function. Returns: a result containing the following fields: source_quantizer_list similar as input output_layers: names of the layers that are output layers input_layers: names of the layers that are input_layers, layer_data_type_map: data type map of each layer """ quantizer_factory = quantizer_factory_module.QuantizerFactory() layer_data_type_map = collections.OrderedDict() # get the output layers output_layers = [] input_layers = [] predecessors = list(graph.predecessors(qgraph.SINK)) successors = list(graph.successors(qgraph.SOURCE)) for u in predecessors: if u == qgraph.SOURCE or u == qgraph.SINK: continue output_layers.append(graph.nodes[u]["layer"][0]) for u in successors: if u == qgraph.SOURCE or u == qgraph.SINK: continue input_layers.append(graph.nodes[u]["layer"][0]) for node_id in nx.topological_sort(graph): node = graph.nodes[node_id] node_type = node["type"][-1] layer = node["layer"][0] is_input_layer = layer in input_layers w_shapes = None b_shapes = None output_shapes = None qkeras_weight_quantizer = None if hasattr(layer, "output_shape"): output_shapes = layer.output_shape if hasattr(layer, "get_weights"): weights = layer.get_weights() if len(weights) != 0: w_shapes = layer.get_weights()[0].shape b_shapes = weights[0].shape[-1] if debug: print("########") if layer is not None: print(layer.name) else: print("None") # Deals with keras layer or lack of input quantizer in qkeras layer. input_qe_list = qtools_util.get_input_quantizers_advanced( graph, node_id, is_input_layer, quantizer_factory, cfg) if input_qe_list and node_id != qgraph.SINK: input_quantizer_list = [] for node in input_qe_list: input_quantizer_list.append(node[0]) # Calculates number of operations (multiplication/accumulation). # Previously Merge layers's inputs all have the same shape, however, in # MobilenetV3 we found that there is shape broadcast in the keras # Multiply layer. Therefore we use the shape with max size as the # input shape if len(input_qe_list) > 0: maxsize = -1 max_id = 0 for (idx, item) in enumerate(input_qe_list): shape = item[1]["shape"] size = np.prod(shape[1:]) if size > maxsize: maxsize = size max_id = idx input_shape = input_qe_list[max_id][1]["shape"] else: (_, edge_0) = input_qe_list[0] input_shape = edge_0["shape"] operation_count = qtools_util.get_operation_count( layer, input_shape) # Merges layers with multiple inputs. if qtools_util.is_merge_layers(layer): # merge_factory.make_quantizer automatically calculates the merge output # quantizer bitwidth according to input quantizer type. merge_factory = quantized_operators.MergeFactory() merge_quantizer = merge_factory.make_quantizer( input_qe_list, layer.__class__.__name__) if for_reference: # The for_reference option overwrites the auto-calculated merge output # quantizer if keras_accumulator: # gate_factor and gate_bits remain the same as previously # calculated; only change output quantizer as the keras_accumulator merge_quantizer.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) else: merge_quantizer.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, merge_quantizer.output, for_reference) layer_data_type_map[layer] = LayerDataType( input_quantizer_list, merge_quantizer, None, None, None, None, None, output_quantizer, output_shapes, operation_count ) # MaxPooling/reshape/flatten/UpSampling1D/2D/3D elif (qtools_util.is_shape_alternation_layers(layer) or "UpSampling" in layer.__class__.__name__): input_quantizer = input_quantizer_list[0] # Output quantizer output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, input_quantizer, for_reference) layer_data_type_map[layer] = LayerDataType( input_quantizer_list, None, None, None, None, None, None, output_quantizer, output_shapes, operation_count ) # AveragePooling and GlobalAveragePooling elif layer.__class__.__name__ in [ "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D", "GlobalAveragePooling2D", "QAveragePooling2D", "QGlobalAveragePooling2D"]: (input_quantizer, _) = input_qe_list[0] qtools_average_quantizer = None # This is a hack. We don't want to implement a new accumulator class # just for averagpooling. So we re-use accumulator type in conv/dense # layers which need multiplier and kernel as input parameters. # In order to do so, we fake a multiplier which treat the pool_size as # the kernel. since kernel needs 4 dimension, k_h, k_w, C_in, C_out, # we set the last two dimension as [1, 1] if layer.__class__.__name__ in ["AveragePooling2D", "AvgPool2D", "QAveragePooling2D"]: pool_size = tuple(list(layer.pool_size) + [1, 1]) else: pool_size = tuple(list(input_shape)[1:-1] + [1, 1]) # Automatically calculates the accumulator bitwidth according to input # quantizer type for both quantized pooling and regular pooling layers multiplier_factory = quantized_operators.MultiplierFactory() fake_multiplier = multiplier_factory.make_multiplier( input_quantizer, input_quantizer) fake_multiplier.output = input_quantizer accumulator_factory = quantized_operators.AccumulatorFactory() accumulator = accumulator_factory.make_accumulator( pool_size, fake_multiplier, use_bias=False) # For quantized pooling layers, we also need to consider the division # precision, which is controlled by the average quantizer if layer.__class__.__name__ in ["QAveragePooling2D", "QGlobalAveragePooling2D"]: # For the quantized layer, there is an average_quantizer used for # the inverse of division operation. qkeras_average_quantizer = layer.get_quantizers()[0] qtools_average_quantizer = quantizer_factory.make_quantizer( qkeras_average_quantizer) multiplier = multiplier_factory.make_multiplier( accumulator.output, qtools_average_quantizer) else: multiplier = None if debug: print("accumulator:", accumulator.output.bits) # Re-calcualte accumulator/multiplier type when it's using # for_reference option if for_reference: if keras_accumulator: # If keras_accumulator exists, use keras_accumulator as multiplier # or accumulator type if multiplier: # Quantized layers need to define multiplier type multiplier.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) accumulator.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) else: # If user didn't provide keras_accumulator, use the default settings # in cfg to define multiplier/accumulator type if multiplier: multiplier.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) accumulator.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) layer_quantizer = accumulator.output # set the output quantizer if layer.__class__.__name__ in ["QAveragePooling2D", "QGlobalAveragePooling2D"]: # If is quantized layer, last operation is multiply (averaging). layer_quantizer = multiplier.output else: layer_quantizer = accumulator.output output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, layer_quantizer, for_reference) layer_data_type_map[layer] = { "input_quantizer_list": input_quantizer_list, "average_quantizer": qtools_average_quantizer, "pool_sum_accumulator": accumulator, "pool_avg_multiplier": multiplier, "output_quantizer": output_quantizer, "output_shapes": output_shapes, "operation_count": operation_count } # If it's a Quantized Activation layer. elif node_type in ["QActivation", "QAdaptiveActivation", "Activation"]: if for_reference or not hasattr(layer, "quantizer"): # Keras activation layer -> use default_interm_quantizer layer_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) if keras_accumulator: layer_quantizer = quantizer_factory.make_default_quantizer( mode=keras_accumulator) else: layer_quantizer = layer.quantizer if not quantizer_factory.is_quantizer_supported(layer_quantizer): raise TagMissingError( "Unsupported activation quantizer {} on this layer: {}".format( layer_quantizer, layer)) if not layer_quantizer: layer_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, layer_quantizer, for_reference) layer_data_type_map[layer] = LayerDataType( input_quantizer_list, None, None, None, w_shapes, None, b_shapes, output_quantizer, output_shapes, operation_count ) elif node_type in ["QBatchNormalization", "BatchNormalization"]: # If this batchnorm layer needs to be fused with the previous layer, # we pass the input quantizer type as the output type in qraph. (input_quantizer, _) = input_qe_list[0] if (hw_weight_dict is not None and hw_weight_dict[layer.name]["enable_bn_fusing"]): if for_reference and keras_accumulator and not is_input_layer: input_quantizer = quantizer_factory.make_default_quantizer( mode=keras_accumulator) output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, input_quantizer, for_reference) layer_data_type_map[layer] = { "input_quantizer_list": input_quantizer_list, "output_quantizer": output_quantizer, "output_shapes": input_shape, "operation_count": operation_count } else: (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer, _) = get_bn_quantizers(layer, quantizer_factory, cfg, keras_quantizer, input_quantizer, is_inference, for_reference, model_weights_already_quantized) qbn = quantized_operators.QBNFactory() qbn.make_quantizer( input_quantizer, gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer, layer.scale, layer.center ) def set_output(op, output): if op: op.output = output if for_reference or not hasattr(layer, "get_quantizers"): set_output( qbn.internal_divide_quantizer, quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer)) set_output( qbn.internal_multiplier, quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer)) set_output( qbn.internal_accumulator, quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer)) set_output( qbn.internal_output, quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer)) if keras_accumulator: set_output( qbn.internal_divide_quantizer, quantizer_factory.make_default_quantizer( mode=keras_accumulator)) set_output( qbn.internal_multiplier, quantizer_factory.make_default_quantizer( mode=keras_accumulator)) set_output( qbn.internal_accumulator, quantizer_factory.make_default_quantizer( mode=keras_accumulator)) set_output( qbn.internal_output.output, quantizer_factory.make_default_quantizer( mode=keras_accumulator)) gamma_range = None if hasattr(layer, "gamma_range"): gamma_range = layer.gamma_range beta_range = None if hasattr(layer, "beta_range"): beta_range = layer.beta_range if not layer.center: qbn.beta_quantizer = None if not layer.scale: qbn.gamma_quantizer = None layer_quantizer = qbn.internal_output.output output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, layer_quantizer, for_reference) layer_data_type_map[layer] = { "input_quantizer_list": input_quantizer_list, "gamma_quantizer": gamma_quantizer, "beta_quantizer": beta_quantizer, "mean_quantizer": mean_quantizer, "variance_quantizer": variance_quantizer, "gamma_range": gamma_range, "beta_range": beta_range, "internal_divide_quantizer": qbn.internal_divide_quantizer, "internal_multiplier": qbn.internal_multiplier, "internal_accumulator": qbn.internal_accumulator, "output_quantizer": output_quantizer, "output_shapes": input_shape, "operation_count": operation_count } # If qdense, qconv, qpool, qoctave elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS: (input_quantizer, _) = input_qe_list[0] if for_reference or not hasattr(layer, "get_quantizers"): # for_reference: force all quantizers to keras_quantizer weight_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) bias_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) if keras_quantizer: weight_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) bias_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) else: # qkeras layer qkeras_weight_quantizer = layer.get_quantizers()[0] qkeras_bias_quantizer = layer.get_quantizers()[1] if not quantizer_factory.is_quantizer_supported( qkeras_weight_quantizer): raise TagMissingError( "Unsupported weight quantizer {} on this layer: {}".format( qkeras_weight_quantizer, layer)) if not quantizer_factory.is_quantizer_supported( qkeras_bias_quantizer): raise TagMissingError( "Unsupported bias quantizer {} on this layer: {}".format( qkeras_bias_quantizer, layer)) weight_quantizer = quantizer_factory.make_quantizer( qkeras_weight_quantizer) bias_quantizer = quantizer_factory.make_quantizer( qkeras_bias_quantizer) # TODO(lishanok): During inference, if weight and bias is po2, # need to update corresponding quantizer type with min and max # of the constant values. if is_inference: weights = qtools_util.get_weights( layer, model_weights_already_quantized) if weight_quantizer.is_po2: weight_quantizer.update_inference_values(weights[0]) if bias_quantizer.is_po2: bias_quantizer.update_inference_values(weights[1]) multiplier_factory = quantized_operators.MultiplierFactory() multiplier = multiplier_factory.make_multiplier( weight_quantizer, input_quantizer) enable_bn_fusing = ( hw_weight_dict is not None and hw_weight_dict.get(layer.name, None) and hw_weight_dict[layer.name].get("enable_bn_fusing", None)) if enable_bn_fusing and qkeras_weight_quantizer: # When conv layer is fused wiht bn, multiplier bitwidth is ajusted by # kernel quantizer scale values (for auto_po2 type of quantizer only). # For conv layer without fusing, multiplier bitwidth is not adjusted # even if auto_po2 is used in quantizer. Instead, we directly adjusted # the accumulator and store that in fused_accumulator. qtools_util.adjust_multiplier_for_auto_po2( multiplier, qkeras_weight_quantizer) weights = layer.get_weights() kernel = weights[0] kernel_shape = kernel.shape # depthwise_kernel_shape = kernel_size + (input_dim, depth_multiplier) # When computing accumulator bitwidth for dw conv2d layer, we do not # need to count the last two dimensions if node_type in ["QDepthwiseConv2D", "DepthwiseConv2D"]: kernel_shape = kernel.shape[:-2] + (1, 1) kernel_accumulator_factory = quantized_operators.AccumulatorFactory() # Sets use_bias=False so that the accumulator doesn't account for bias # bitwdith. kernel_accumulator = kernel_accumulator_factory.make_accumulator( kernel_shape, multiplier, use_bias=False) if not layer.use_bias: bias_quantizer = None accumulator = kernel_accumulator else: # Add bias quantizer bitwidth to the overall accumulator bias_accumulator_instance = adder_factory.IAdder() accumulator = bias_accumulator_instance.make_quantizer( kernel_accumulator.output, bias_quantizer) if debug: print(layer.name or "None") print("weight_quantizer:", weight_quantizer.bits) print("input_quantizer:", input_quantizer.bits) print("multiplier_quantizer:", multiplier.output.bits) print("multiplier_gate_bits:", multiplier.gate_bits) print("accumulator:", accumulator.output.bits) if for_reference or not hasattr(layer, "get_quantizers"): accumulator.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) multiplier.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) if keras_accumulator: accumulator.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) multiplier.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) if enable_bn_fusing: bn_layer_name = hw_weight_dict[layer.name]["fused_bn_layer_name"] successor_ids = list(graph.successors(node_id)) bn_layer = graph.nodes[successor_ids[0]]["layer"][0] assert bn_layer.name == bn_layer_name, ( "Batchnorm layer in the graph has different name from hw_weight" f"_dict: {layer.name} vs {bn_layer_name}. Check both places to " "ensure they are matching.") # Add additional datatype for bn fused weights (gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer, inverse_quantizer) = get_bn_quantizers( bn_layer, quantizer_factory, cfg, keras_quantizer, input_quantizer, is_inference, for_reference, model_weights_already_quantized) qkeras_inverse_quantizer = bn_layer.inverse_quantizer_internal fused_bn = FusedBNFactory() fused_bn.make_quantizer( prev_output_quantizer=kernel_accumulator.output, prev_bias_quantizer=bias_quantizer, beta_quantizer=beta_quantizer, mean_quantizer=mean_quantizer, inverse_quantizer=inverse_quantizer, use_beta=bn_layer.center, use_bias=layer.use_bias, qkeras_inverse_quantizer=qkeras_inverse_quantizer ) if for_reference or not hasattr(layer, "get_quantizers"): fused_bn.internal_accumulator.output = ( quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer)) if keras_accumulator: fused_bn.internal_accumulator.output = ( quantizer_factory.make_default_quantizer( mode=keras_accumulator)) fused_bn.internal_output.output = fused_bn.internal_accumulator.output layer_quantizer = fused_bn.internal_accumulator.output output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, layer_quantizer, for_reference) layer_data_type_map[layer] = { "input_quantizer_list": input_quantizer_list, "multiplier": multiplier, "accumulator": accumulator, "weight_quantizer": weight_quantizer, "w_shapes": w_shapes, "bias_quantizer": bias_quantizer, "b_shapes": b_shapes, "bn_inverse_quantizer": inverse_quantizer, "bn_mean_quantizer": mean_quantizer, "bn_beta_quantizer": beta_quantizer, "fused_accumulator": fused_bn.internal_accumulator, "output_quantizer": output_quantizer, "output_shapes": output_shapes, "operation_count": operation_count } else: # Correct accumulator bitwith with the scale values from # auto-po2 type of quantizers and store them in fused_accumulator. if ( hasattr(qkeras_weight_quantizer, "__str__") and "quantized_bits" in qkeras_weight_quantizer.__str__() and qkeras_weight_quantizer.alpha == "auto_po2"): fused_accumulator = qtools_util.adjust_accumulator_for_auto_po2( layer, multiplier, qkeras_weight_quantizer, bias_quantizer) else: fused_accumulator = accumulator layer_quantizer = accumulator.output output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, layer_quantizer, for_reference) layer_data_type_map[layer] = { "input_quantizer_list": input_quantizer_list, "multiplier": multiplier, "accumulator": accumulator, "weight_quantizer": weight_quantizer, "w_shapes": w_shapes, "bias_quantizer": bias_quantizer, "b_shapes": b_shapes, "fused_accumulator": fused_accumulator, "output_quantizer": output_quantizer, "output_shapes": output_shapes, "operation_count": operation_count } elif node_type in ["QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: # Datatype for Folded Conv/DepthwiseConv layer # TODO(lishanok): Add additional support for Folded Dense layer (input_quantizer, _) = input_qe_list[0] if for_reference or not hasattr(layer, "get_quantizers"): # For_reference: force all quantizers to keras_quantizer. weight_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) bias_quantizer = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) if keras_quantizer: weight_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) bias_quantizer = quantizer_factory.make_default_quantizer( mode=keras_quantizer) else: # QKeras layer qkeras_weight_quantizer = layer.get_quantizers()[0] qkeras_bias_quantizer = layer.get_quantizers()[1] if not quantizer_factory.is_quantizer_supported( qkeras_weight_quantizer): raise TagMissingError( "Unsupported weight quantizer {} on this layer: {}".format( qkeras_weight_quantizer, layer)) if not quantizer_factory.is_quantizer_supported( qkeras_bias_quantizer): raise TagMissingError( "Unsupported bias quantizer {} on this layer: {}".format( qkeras_bias_quantizer, layer)) weight_quantizer = quantizer_factory.make_quantizer( qkeras_weight_quantizer) if qkeras_bias_quantizer: bias_quantizer = quantizer_factory.make_quantizer( qkeras_bias_quantizer) else: bias_quantizer = None # TODO(lishanok): During inference, if weight and bias is po2, # need to update corresponding quantizer type with min and max # of the constant values if is_inference: weights = qtools_util.get_weights( layer, model_weights_already_quantized) if weight_quantizer.is_po2: weight_quantizer.update_inference_values(weights[0]) if bias_quantizer and bias_quantizer.is_po2: bias_quantizer.update_inference_values(weights[1]) multiplier_factory = quantized_operators.MultiplierFactory() multiplier = multiplier_factory.make_multiplier( weight_quantizer, input_quantizer) if qkeras_weight_quantizer: qtools_util.adjust_multiplier_for_auto_po2( multiplier, qkeras_weight_quantizer) weights = layer.get_weights() kernel = weights[0] accumulator_factory = quantized_operators.AccumulatorFactory() accumulator = accumulator_factory.make_accumulator( kernel.shape, multiplier, use_bias=True if bias_quantizer else False) if not bias_quantizer: # Set bias the same as accumulator type. bias_quantizer = copy.deepcopy(accumulator.output) if not accumulator.output.is_floating_point: # For fixed point accumulator, needs to add 1 to its bits to avoid # possible satuation. accumulator.output.bits += 1 accumulator.output.int_bits += 1 if for_reference or not hasattr(layer, "get_quantizers"): accumulator.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) multiplier.output = quantizer_factory.make_default_quantizer( mode=cfg.default_interm_quantizer) if keras_accumulator: accumulator.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) multiplier.output = quantizer_factory.make_default_quantizer( mode=keras_accumulator) layer_quantizer = accumulator.output output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, layer_quantizer, for_reference) layer_data_type_map[layer] = LayerDataType( input_quantizer_list, multiplier, accumulator, weight_quantizer, w_shapes, bias_quantizer, b_shapes, output_quantizer, output_shapes, operation_count ) elif node_type: # Any other unsupported layer types -> pass the input quantizer # type to output in qraph print(f"[WARNING] QTools cannot parse {node_type}. The input quatnizer" " of this layer is directly passed through to the output!", file=sys.stderr) (input_quantizer, _) = input_qe_list[0] if for_reference and keras_accumulator and not is_input_layer: input_quantizer = quantizer_factory.make_default_quantizer( mode=keras_accumulator) output_quantizer = update_output_quantizer_in_graph( graph, node_id, quantizer_factory, input_quantizer, for_reference) layer_data_type_map[layer] = LayerDataType(input_quantizer_list, None, None, None, None, None, None, output_quantizer, output_shapes, operation_count) result = { "source_quantizer_list": source_quantizer_list, "output_layers": output_layers, "input_layers": input_layers, "layer_data_type_map": layer_data_type_map } return result ================================================ FILE: qkeras/qtools/interface.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """I/O implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from qkeras.qtools import generate_layer_data_type_map from qkeras.qtools import qtools_util def print_qstats(graph): """Prints quantization statistics for the model.""" layer_data_type_map = generate_layer_data_type_map(graph) multipliers_counter = collections.Counter() print("") print("Number of operations in model:") for layer, data_type in layer_data_type_map.items(): multiplier = data_type.multiplier multiplier_detail_str = "{}_{}, total_bits:{}, int_bits:{}".format( "signed" if multiplier.output.is_signed == 1 else "unsigned", multiplier.implemented_as(), multiplier.output.bits, multiplier.output.int_bits, ) print("{}: {} x {}".format( layer.name, data_type.operation_count, multiplier_detail_str, )) multipliers_counter[ multiplier_detail_str] += data_type.operation_count print("") print("Number of operation types in model:") for (multiplier_detail_str, total_multiplier_operation_count) in multipliers_counter.items(): print("{}, x {}".format(multiplier_detail_str, total_multiplier_operation_count)) def populate_quantizer(quantizer, shape=None, implemented_as=None): """write all the needed fields in the quantizer to dictionary.""" mydict = collections.OrderedDict() if quantizer is not None: mydict["quantizer_type"] = quantizer.name # floats if quantizer.is_floating_point: mydict["bits"] = quantizer.bits # po2 elif quantizer.is_po2: mydict["bits"] = quantizer.bits mydict["is_signed"] = quantizer.is_signed mydict["max_value"] = quantizer.max_val_po2 # binary elif quantizer.mode in [3, 4]: mydict["bits"] = quantizer.bits mydict["int_bits"] = quantizer.int_bits mydict["is_signed"] = quantizer.is_signed if quantizer.mode == 4: mydict["values"] = [0, 1] else: mydict["values"] = [-1, 1] # ternary(-1, 0, 1) elif quantizer.mode == 2: mydict["bits"] = 2 mydict["int_bits"] = 2 mydict["is_signed"] = 1 mydict["values"] = [0, -1, 1] # quantized_bits elif quantizer.mode == 0: mydict["bits"] = quantizer.bits mydict["int_bits"] = quantizer.int_bits + quantizer.is_signed mydict["is_signed"] = quantizer.is_signed if shape is not None: if isinstance(shape, tuple) and shape[0] is None: shape = list(shape) shape[0] = -1 mydict["shape"] = tuple(shape) else: mydict["shape"] = shape if implemented_as is not None: mydict["op_type"] = implemented_as return mydict def map_to_json(mydict): """write the dictionary to json format.""" source_quantizer_list = mydict["source_quantizer_list"] layer_data_type_map = mydict["layer_data_type_map"] output_dict = collections.OrderedDict() q_list = [] for source_quantizer in source_quantizer_list: tmp = populate_quantizer(source_quantizer) q_list.append(tmp) if bool(q_list): output_dict["source_quantizers"] = q_list def set_layer_item(layer_item, key, feature, shape=None, is_compound_datatype=False, output_key_name=None): """Generates the quantizer entry to a given layer_item. This function extracts relevanant quantizer fields using the key ( quantizer name) from a given feature (layer entry from layer_data_type_map). Args: layer_item: Layer entry in the output dictionary. It includes the info such as quantizers, output shape, etc. of each layer key: Quantizer, such as kernel/bias quantizer, etc. If feature feature: layer_data_type_map entry of each layer. This feature will be parsed and converted to layer_item for the output dictionary. shape: quantizer input shape is_compound_datatype: Bool. Wether the quantizer is a compound or unitary quantizer type. For example, kernel quantizer and bias quantizer are unitary quantizer types, multiplier and accumulator are compound quantizer types. output_key_name: str. Change key to output_key_name in layer_item. If None, will use the existing key. Return: None """ val = qtools_util.get_val(feature, key) if val is not None: quantizer = val implemented_as = None if is_compound_datatype: quantizer = val.output implemented_as = val.implemented_as() if output_key_name is None: key_name = key else: key_name = output_key_name tmp = populate_quantizer( quantizer, shape=shape, implemented_as=implemented_as) if bool(tmp): layer_item[key_name] = tmp for layer, feature in layer_data_type_map.items(): layer_item = collections.OrderedDict() layer_item["layer_type"] = layer.__class__.__name__ layer_item["input_quantizer_list"] = [ populate_quantizer(q) for q in qtools_util.get_val( feature, "input_quantizer_list")] set_layer_item(layer_item, key="output_quantizer", feature=feature, shape=qtools_util.get_val(feature, "output_shapes")) if layer_item["layer_type"] in [ "QBatchNormalization", "BatchNormalization"]: for key in ["gamma_quantizer", "beta_quantizer", "mean_quantizer", "variance_quantizer", "variance_quantizer"]: set_layer_item(layer_item, key=key, feature=feature) for key in ["internal_divide_quantizer", "internal_multiplier", "internal_accumulator"]: set_layer_item(layer_item, key=key, feature=feature, is_compound_datatype=True) elif layer_item["layer_type"] in [ "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D", "GlobalAveragePooling2D", "QAveragePooling2D", "QGlobalAveragePooling2D"]: set_layer_item(layer_item, key="average_quantizer", feature=feature) for key in ["pool_sum_accumulator", "pool_avg_multiplier"]: set_layer_item(layer_item, key=key, feature=feature, is_compound_datatype=True) else: # populate the feature to dictionary set_layer_item(layer_item, key="weight_quantizer", feature=feature, shape=qtools_util.get_val(feature, "w_shapes")) set_layer_item(layer_item, key="bias_quantizer", feature=feature, shape=qtools_util.get_val(feature, "b_shapes")) output_key_name = None if qtools_util.is_merge_layers(layer): output_key_name = layer.__class__.__name__ + "_quantizer" set_layer_item(layer_item, key="multiplier", feature=feature, is_compound_datatype=True, output_key_name=output_key_name) set_layer_item(layer_item, key="accumulator", feature=feature, is_compound_datatype=True) if qtools_util.get_val(feature, "fused_accumulator"): # Add fused weights to the dictionary for key in ["bn_beta_quantizer", "bn_mean_quantizer", "bn_inverse_quantizer"]: set_layer_item(layer_item, key=key, feature=feature) set_layer_item(layer_item, key="fused_accumulator", feature=feature, is_compound_datatype=True) layer_item["operation_count"] = qtools_util.get_val( feature, "operation_count") output_dict[layer.name] = layer_item return output_dict ================================================ FILE: qkeras/qtools/qenergy/__init__.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Export qenergy package.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from .qenergy import energy_estimate ================================================ FILE: qkeras/qtools/qenergy/qenergy.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Calculate energy consumption of a given quantized model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from qkeras.qtools.generate_layer_data_type_map import KERAS_LAYERS from qkeras.qtools.generate_layer_data_type_map import QKERAS_LAYERS from qkeras.qtools.quantized_operators.quantizer_impl import IQuantizer from qkeras.qtools.settings import cfg from qkeras.qtools import qtools_util # Model based on: # Mark Horowitz, Computing’s Energy Problem (and what we can # do about it). IEEE ISSCC, pp. 10–14, 2014 # www.youtube.com/watch?v=eZdOkDtYMoo&feature=youtu.be&t=497 # all metrics converted to pJ/bit OP = { "fp32": { "add": lambda x: max(cfg.fp32_add(x), 0), "mul": lambda x: max(cfg.fp32_mul(x), 0) }, "fp16": { "add": lambda x: max(cfg.fp16_add(x), 0), "mul": lambda x: max(cfg.fp16_mul(x), 0) }, "fpm": { "add": lambda x: max(cfg.fpm_add(x), 0), "mux": lambda x: max(cfg.fpm_add(x), 0), "xor": lambda x: max(cfg.fpm_add(x), 0), "and": lambda x: max(cfg.fpm_add(x), 0), "or": lambda x: max(cfg.fpm_add(x), 0), "shifter": lambda x: max(cfg.fpm_add(x), 0), "mul": lambda x: max(cfg.fpm_mul(x), 0) }, "sram": {"rd": lambda x: max(cfg.sram_rd(x), 0), "wr": lambda x: max(cfg.sram_rd(x), 0), "mul_factor": cfg.sram_mul_factor}, "dram": {"rd": lambda x: max(cfg.dram_rd(x), 0), "wr": lambda x: max(cfg.dram_rd(x), 0), "mul_factor": cfg.dram_mul_factor} } def get_op_type(quantizer): assert isinstance(quantizer, IQuantizer) if quantizer.is_floating_point: return "fp" + str(quantizer.bits) else: return "fpm" def memory_read_energy(is_input_layer, tensor_shape, mode, min_sram_size, rd_wr_on_io, quantizer_bits, is_tensor=True): """compute energy to bring tensors from DRAM to SRAM.""" if is_input_layer: if rd_wr_on_io: mode = "dram" else: mode = "sram" energy_mem = 0 if is_tensor: tensor_shape = tensor_shape[1:] total_bits = np.prod(tensor_shape) * quantizer_bits total_bits_log2 = np.log2(max(total_bits, min_sram_size)) if mode == "dram": # load input from dram; wx_sizes[1]-> input x quantizer bits # total_bits * 20 energy_mem += OP["dram"]["rd"](total_bits) if rd_wr_on_io: # write input to sram # total_bits * sqrt(data_size/2^18)*0.3125 # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape)) # energy_mem += OP["sram"]["wr"](bits1) energy_mem += ( np.ceil(total_bits * OP["sram"]["mul_factor"]) * OP["sram"]["wr"](total_bits_log2) ) elif mode == "sram": # read input from sram # total_bits * sqrt(data_size/2^18)*0.3125 # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape)) # energy_mem += OP["sram"]["rd"](bits1) energy_mem += ( np.ceil(total_bits * OP["sram"]["mul_factor"]) * OP["sram"]["rd"](total_bits_log2) ) return energy_mem def parameter_read_energy( layer, layer_item, weights_on_memory, min_sram_size, rd_wr_on_io): """read weights/bias from memory.""" node_type = layer.__class__.__name__ rd_energy = 0 if node_type in ["QBatchNormalization", "BatchNormalization"]: gamma_quantizer = layer_item["gamma_quantizer"] beta_quantizer = layer_item["beta_quantizer"] mean_quantizer = layer_item["mean_quantizer"] variance_quantizer = layer_item["variance_quantizer"] # gamma, beta, mean, stddev weights = layer.get_weights() s = len(weights[0]) for q in [gamma_quantizer, beta_quantizer, mean_quantizer, variance_quantizer]: if q: rd_energy += memory_read_energy( False, (s), weights_on_memory, min_sram_size, rd_wr_on_io, q.bits, is_tensor=False) elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS: weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer") w_shapes = qtools_util.get_val(layer_item, "w_shapes") bias_quantizer = qtools_util.get_val(layer_item, "bias_quantizer") b_shapes = qtools_util.get_val(layer_item, "b_shapes") rd_energy += memory_read_energy( False, w_shapes, weights_on_memory, min_sram_size, rd_wr_on_io, weight_quantizer.bits, is_tensor=False ) if bias_quantizer: # if use_bias=0, no bias bias_shapes = (b_shapes) rd_energy += memory_read_energy( False, bias_shapes, weights_on_memory, min_sram_size, rd_wr_on_io, bias_quantizer.bits, is_tensor=False ) return rd_energy def memory_write_energy(is_output_layer, tensor_shape, mode, min_sram_size, rd_wr_on_io, quantizer_bits): """compute energy to bring tensors from SRAM to DRAM.""" if is_output_layer: if rd_wr_on_io: mode = "dram" else: mode = "sram" energy_mem = 0 tensor_shape = tensor_shape[1:] total_bits = np.prod(tensor_shape) * quantizer_bits total_bits_log2 = np.log2(max(total_bits, min_sram_size)) if mode == "dram": # load input from dram; wx_sizes[1]-> input x quantizer bits if rd_wr_on_io: # read input from sram # total_bits * sqrt(data_size/2^18)*0.3125 # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape)) # energy_mem += OP["sram"]["rd"](bits1) energy_mem += ( np.ceil(total_bits * OP["sram"]["mul_factor"]) * OP["sram"]["rd"](total_bits_log2) ) # write output to dram energy_mem += OP["dram"]["wr"](total_bits) elif mode == "sram": # write to sram # total_bits * sqrt(data_size/2^18)*0.3125 # bits1 = total_bits * OP["sram"]["mul_factor"](np.prod(tensor_shape)) # energy_mem += OP["sram"]["wr"](bits1) energy_mem += ( np.ceil(total_bits * OP["sram"]["mul_factor"]) * OP["sram"]["wr"](total_bits_log2) ) return energy_mem def energy_estimate(model, layer_map, weights_on_memory, activations_on_memory, min_sram_size, rd_wr_on_io): """estimate energy.""" output_layers = layer_map["output_layers"] input_layers = layer_map["input_layers"] layer_data_type_map = layer_map["layer_data_type_map"] result = {} total_energy = 0 # compute MAC and memory access energy for intermediate layers for layer in model.layers: if layer not in layer_data_type_map.keys(): continue layer_item = layer_data_type_map[layer] input_quantizer_list = qtools_util.get_val( layer_item, "input_quantizer_list") operation_count = qtools_util.get_val(layer_item, "operation_count") output_shapes = qtools_util.get_val(layer_item, "output_shapes") output_quantizer = qtools_util.get_val(layer_item, "output_quantizer") is_input_layer = layer in input_layers is_output_layer = layer in output_layers input_rd_energy = 0 energy_op = 0 input_shape = layer.input_shape if not isinstance(input_shape, list): input_shape = [input_shape] for (input_shape, input_quantizer) in zip( input_shape, input_quantizer_list): input_rd_energy += memory_read_energy( is_input_layer, input_shape, activations_on_memory, min_sram_size, rd_wr_on_io, input_quantizer.bits) parameter_rd_energy = parameter_read_energy( layer, layer_item, weights_on_memory, min_sram_size, rd_wr_on_io) output_wr_energy = memory_write_energy( is_output_layer, output_shapes, activations_on_memory, min_sram_size, rd_wr_on_io, output_quantizer.bits) # QActivation Layer if layer.__class__.__name__ in ["QActivation", "QAdaptiveActivation", "Activation"]: pass # QBN Layer elif layer.__class__.__name__ in [ "QBatchNormalization", "BatchNormalization"]: # assume QBN is embedded with conv/dense layers # -> no memory read/write cost divider = layer_item["internal_divide_quantizer"] if divider: gate_factor = divider.gate_factor mode = divider.implemented_as() energy_op += gate_factor * OP[ get_op_type(divider.output)][mode](divider.gate_bits) multiplier = layer_item["internal_multiplier"] if multiplier: gate_factor = multiplier.gate_factor mode = multiplier.implemented_as() energy_op += gate_factor * OP[ get_op_type(multiplier.output)][mode](multiplier.gate_bits) energy_op *= operation_count # Merge layer elif layer.__class__.__name__ in ["Add", "Multiply", "Subtract"]: # multiply or add operation energy # TODO(lishanok): check energy for concatenate merge_quantizer = qtools_util.get_val(layer_item, "multiplier") mode = merge_quantizer.implemented_as() number_of_inputs = len(qtools_util.get_val( layer_item, "input_quantizer_list")) gate_factor = merge_quantizer.gate_factor q = get_op_type(merge_quantizer.output) b = merge_quantizer.gate_bits energy_op = (number_of_inputs - 1) * operation_count * gate_factor * OP[ q][mode](b) # AveragePooling and GlobalAveragePooling elif layer.__class__.__name__ in [ "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D", "GlobalAveragePooling2D"]: # accumulation operation energy accumulator = qtools_util.get_val(layer_item, "accumulator") add_energy = OP[get_op_type(accumulator.output)]["add"]( accumulator.output.bits) energy_op = operation_count * add_energy # MAC energy calculation elif layer.__class__.__name__ in ["QConv2D", "QConv1D", "QDepthwiseConv2D", "QDense", "Conv2D", "Conv1D", "DepthwiseConv2D", "Dense"]: multiplier = qtools_util.get_val(layer_item, "multiplier") accumulator = qtools_util.get_val(layer_item, "accumulator") # implementation mode: xor/andgate/shift etc. mode = multiplier.implemented_as() gate_factor = multiplier.gate_factor op = get_op_type(multiplier.output) bits = multiplier.gate_bits c1 = gate_factor * OP[op][mode](bits) c2 = OP[get_op_type(accumulator.output)]["add"](accumulator.output.bits) energy_op = operation_count * (c1 + c2) else: pass result[layer.name] = { "class_name": layer.__class__.__name__, "energy": { "inputs": float("{0:.2f}".format(input_rd_energy)), "outputs": float("{0:.2f}".format(output_wr_energy)), "parameters": float("{0:.2f}".format(parameter_rd_energy)), "op_cost": float("{0:.2f}".format(energy_op)) } } total_energy += (input_rd_energy + output_wr_energy + parameter_rd_energy + energy_op) result["total_cost"] = int(total_energy) return result ================================================ FILE: qkeras/qtools/qgraph.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Creates networkx graph from a model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import logging import networkx as nx import tensorflow.keras.backend as K from tensorflow.keras.layers import InputLayer from qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module from qkeras.qtools.settings import cfg SOURCE = -1 SINK = -2 class WrongInputQuantizerError(ValueError): pass def GraphRemoveNode(graph, v): """Removes node "v" from u -> v -> w, connecting u -> w.""" incoming = [u for u in graph.predecessors(v) if u != v] outgoing = [w for w in graph.successors(v) if w != v] # add incoming edges for u in incoming: for w in outgoing: in_attr = graph[u][v] out_attr = graph[v][w] assert list(in_attr["shape"]) == list(out_attr["shape"]) graph.add_edges_from([(u, w, out_attr)]) graph.remove_node(v) def GraphRemoveNodeWithNodeType(graph, node_type): """Removes node with attribute node_type, reconnecting network.""" nodes_to_remove = [v for v in graph.nodes if graph.nodes[v]["type"][-1] == node_type] for v in nodes_to_remove: GraphRemoveNode(graph, v) def GraphAddHiddenInputLayer(model, graph, input_quantizer_map): """For Keras Sequential model api, input layer is hidden. Need to add it.""" node_id = -1 for (u, _) in graph.nodes.items(): if u >= node_id: node_id = u if u == SOURCE or u == SINK: continue if graph.nodes[u]["type"][-1] == "InputLayer": return # determine a node id for the newly added input layer node_id += 1 # find the first layer of the sequential model first_layer_nodes = [] for u in graph.nodes: if u == SOURCE or u == SINK: continue predecessors = list(graph.predecessors(u)) # find the first layer which doesn't have a parent if not predecessors: first_layer_nodes.append(u) assert len(first_layer_nodes) == 1 # since it is a sequential model, there is only one first layer v_id = first_layer_nodes[0] # create a input layer node node_type = "InputLayer" input_shape = model.layers[0].input_shape layer = InputLayer(input_shape=input_shape[1:]) o_shape = input_shape node = (node_id, {"layer": [layer], "type": [node_type], "out_quantizer": None}) graph.add_nodes_from([node]) # insert input_quantizers on the edge between input layer and its next layer for (a, _) in input_quantizer_map.items(): edge = (node_id, v_id, { "shape": [o_shape], "tensor": a, "quantizer": input_quantizer_map[a]}) graph.add_edges_from([edge]) def GraphAddSingleSourceSingleSink(graph): """Connects graph to source and sink nodes.""" edge_list = [] for u in graph.nodes: if u == SOURCE or u == SINK: continue if graph.nodes[u]["type"][-1] == "InputLayer": # If the layer has multiple nodes, you can use get_output_at(node_index) tensor = graph.nodes[u]["layer"][-1].get_output_at(0) # if tf 1.0+, we can do tensor.shape with the same effect shape = tuple(tensor.get_shape().as_list()) shape = [shape] edge_list.append((SOURCE, u, { "shape": shape, "tensor": tensor, "quantizer": None})) if graph.out_degree(u) == 0: tensor = graph.nodes[u]["layer"][-1].get_output_at(0) shape = tensor.shape edge_list.append((u, SINK, { "shape": shape, "tensor": tensor, "quantizer": None})) graph.add_edges_from(edge_list) def GenerateInputQuantizerList(input_quantizers, inputs_length, default_source_quantizer): """Generates the list of input quantizers.""" # generate a list of input quantizers input_quantizer_list = [] quantizer_factory = quantizer_factory_module.QuantizerFactory() if input_quantizers is None: logging.warning( "************ SOURCE has no quantizer type." " Use default quantizer instead") for _ in range(inputs_length): input_quantizer_list.append( quantizer_factory.make_default_quantizer( mode=default_source_quantizer)) else: if inputs_length == len(input_quantizers): for quantizer in input_quantizers: input_quantizer_list.append(quantizer_factory.make_quantizer( quantizer)) # pass a single quantizer which will be used for all q list. elif not isinstance(input_quantizers, list): for _ in range(inputs_length): input_quantizer_list.append(quantizer_factory.make_quantizer( input_quantizers)) else: raise WrongInputQuantizerError( "ERROR: Numer of input (%d) must be the same as number of source" " quantizers (%d)"%(inputs_length, len(input_quantizers))) return input_quantizer_list def AddToNodeDict(layer_items, layer, nodes_dict): """Adds layer to a node_dict, indexed by layer.(input or output).ref""" i_list = layer_items if not isinstance(layer_items, list): i_list = [i_list.ref()] else: i_list = [tmp.ref() for tmp in i_list] for i in i_list: # dict: tensor -> layers have this tensor as input if i not in nodes_dict.keys(): nodes_dict[i] = [layer] else: nodes_dict[i].append(layer) def GenerateGraphFromModel(model, input_quantizers, default_source_quantizer): """Generates single source, single sink graph from model.""" # node represents layers with attributes [layer, type(class_name)] # edge represents the tensor flowing between two layers, # attributes is [tensor, output_shape, QA(activation quantizer] # input_quantizers are tagged on the edge between input # layer and the following layer # generate a list of input quantizers input_quantizer_list = GenerateInputQuantizerList(input_quantizers, len(model.inputs), default_source_quantizer) # dict that map input_tensor to its quantizer input_quantizer_map = {} for (idx, tensor) in enumerate(model.inputs): input_quantizer_map[tensor.ref()] = input_quantizer_list[idx] graph = nx.DiGraph() source = SOURCE sink = SINK node_list = [ (source, {"layer": [None], "type": [None], "out_quantizer": None}), (sink, {"layer": [None], "type": [None], "out_quantizer": None}) ] for i, layer in enumerate(model.layers): node_type = layer.__class__.__name__ node = (i, {"layer": [layer], "type": [node_type], "out_quantizer": None}) node_list.append(node) node_dict = {layer: i for i, layer in enumerate(model.layers)} graph.add_nodes_from(node_list) # nodes = tensors in_nodes = {} out_nodes = {} for layer in model.layers: AddToNodeDict(layer.input, layer, in_nodes) AddToNodeDict(layer.output, layer, out_nodes) # union of all tensors; non-redundant attr_set = set(in_nodes.keys()) | set(out_nodes.keys()) # add edges. we want edges annotated with tensors and shapes edge_list = [] for a in attr_set: # for a given tensor a, find the layer u that outputs this tensor # and the layer v that has this tensor as input u_list = out_nodes.get(a, [None]) v_list = in_nodes.get(a, [None]) for u in u_list: for v in v_list: if not u or not v: continue o_shape = u.output_shape # layer -> layer_id u_id = node_dict[u] v_id = node_dict[v] # insert input_quantizers on the edge between # input layer and its next layer if a in input_quantizer_map.keys(): edge_list.append((u_id, v_id, { "shape": o_shape, "tensor": a, "quantizer": input_quantizer_map[a]})) else: edge_list.append((u_id, v_id, { "shape": o_shape, "tensor": a, "quantizer": None})) graph.add_edges_from(edge_list) GraphAddHiddenInputLayer(model, graph, input_quantizer_map) return (graph, input_quantizer_list) def GraphGetInputs(graph): """Returns edges SOURCE->u that are inputs.""" successors = list(graph.successors(SOURCE)) input_tensors = [] for u in successors: if u == SOURCE or u == SINK: continue input_tensors.append(graph[SOURCE][u]) return input_tensors def GraphGetOutputs(graph): """Returns edges u->SINK that are outputs.""" predecessors = list(graph.predecessors(SINK)) output_tensors = [] for u in predecessors: if u == SOURCE or u == SINK: continue output_tensors.append(graph[u][SINK]) return output_tensors def GraphPropagateActivationsToEdges(graph, debug=False): """Traverses graph and move activations to edges. 1.If current dense/conv layer is specified with QA: outgoing edge (output data type) will be QA type 2.If current dense/conv layer has no QA: default type (float32) is used as output 3.If current layer is QA layer: float32 is used by default as output type on the edge Args: graph: graph to inject activations to. debug: debug mode Returns: None """ scheduler = list(nx.topological_sort(graph)) for vertex in scheduler[1:-1]: # get rid of source and sink vertex if debug: print("########### GraphPropagateActivationsToEdges ############") print("vertex:", vertex) for u, v in graph.edges(vertex): # u=vertex, v: outgoing edge vertex if debug: print(" outgoing ->", v, graph.nodes[v]["layer"][0].name) layer = graph.nodes[u]["layer"][0] result = None # if current layer has no QA specified if not hasattr(layer, "activation"): result = None else: activation_name = layer.activation.__name__ if hasattr( layer.activation, "__name__") else None q_activation_class_name = layer.activation.__class__.__name__ if hasattr( layer.activation, "__class__") else None if debug: print(" layer type:", layer.__class__.__name__) print(" activation object:", layer.activation) print(" activation_name:", activation_name) print(" q_activation_class_name:", q_activation_class_name) # if current layer is QA if (graph.nodes[u]["type"][0] in ["QActivation"] or graph.nodes[u]["type"][0] in ["QAdaptiveActivation"]): result = layer.quantizer # if current layer is not QA layer but has QA specified within elif hasattr(layer, "activation"): if activation_name == "linear": result = None else: result = layer.activation if debug: print(" {}->{}: {}".format(u, v, result)) graph[u][v]["quantizer"] = result # all edge_quantizer is the same for all edges starting # from current vertex to different nodes graph.nodes[vertex]["out_quantizer"] = result def PrintGraph(graph, msg=""): """Print graph structure.""" print() print(msg) print() print("nodes:", [(u, graph.nodes[u]["layer"][ 0].name if graph.nodes[u]["layer"][0] is not None else "", graph.nodes[u]["type"]) for u in graph.nodes]) print() print("edges:", [(u, v, graph[u][v]["shape"], graph[u][v]["quantizer"]) for u, v in graph.edges]) def CreateGraph(model, input_quantizers=None, default_source_quantizer=cfg.default_source_quantizer, debug=False): """create graph.""" K.set_image_data_format("channels_last") (graph, source_quantizer_list) = GenerateGraphFromModel( model, input_quantizers, default_source_quantizer) GraphAddSingleSourceSingleSink(graph) GraphRemoveNodeWithNodeType(graph, "Dropout") GraphRemoveNodeWithNodeType(graph, "InputLayer") scheduler = list(nx.topological_sort(graph)) if debug: for vertex in scheduler[1:-1]: for _, v in graph.edges(vertex): if v == SINK: continue print("... calling", graph.nodes[v][ "layer"][0].name, graph.nodes[v]["type"]) return (graph, source_quantizer_list) def GraphUpdateEdge(graph, node_id, quantizer_on_edge): """update the graph edges outgoing from node_id with new quantizer.""" for u, v in graph.edges(node_id): graph[u][v]["quantizer"] = quantizer_on_edge ================================================ FILE: qkeras/qtools/qtools_util.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """utility functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import sys import numpy as np import tensorflow.keras.backend as K import tensorflow as tf from qkeras.qtools import quantized_operators def get_val(feature, key, default_val=None): # Return feature[key] or feature.key if isinstance(feature, dict): val = feature.get(key, default_val) else: val = getattr(feature, key, default_val) return val def is_shape_alternation_layers(layer): lname = layer.__class__.__name__ if lname: return "MaxPool" in lname or "Reshape" in lname or "Flatten" in lname return False def is_merge_layers(layer): if layer.__class__.__name__ in [ "Add", "Multiply", "Subtract", "Average", "Maximum", "Minimum", "Concatenate", "Dot"]: return True else: return False def get_input_quantizers(graph, node_id, quantizer_factory, debug=False): """get the current layer's input quantizer.""" # in merge layers, therea are more than 1 input output = [] for parent_node_id in graph.predecessors(node_id): edge = graph.edges[(parent_node_id, node_id)] if debug: print("parent_node_id:", parent_node_id) print(edge) quantizer_on_edge = edge["quantizer"] input_quantizer = quantizer_factory.make_quantizer(quantizer_on_edge) output.append((input_quantizer, edge)) return output def get_input_quantizers_advanced(graph, node_id, is_input_layer, quantizer_factory, cfg, debug=False): """get input quantizer, deal with keras layer or lack of input quantizer in qkeras layer.""" # in merge layers, therea are more than 1 input default_source_quantizer = cfg.default_source_quantizer default_interm_quantizer = cfg.default_interm_quantizer output = [] for parent_node_id in graph.predecessors(node_id): edge = graph.edges[(parent_node_id, node_id)] if debug: print("parent_node_id:", parent_node_id) print(edge) quantizer_on_edge = edge["quantizer"] input_quantizer = quantizer_factory.make_quantizer(quantizer_on_edge) if is_input_layer and not input_quantizer: # input layer without input_quantizer specified # ->use default_source_quantizer input_quantizer = quantizer_factory.make_default_quantizer( mode=default_source_quantizer) elif not input_quantizer: # if no input quantizer is available # -> use default quantizer from config.json input_quantizer = quantizer_factory.make_default_quantizer( mode=default_interm_quantizer) output.append((input_quantizer, edge)) return output def get_operation_count(layer, input_shape): """Determines number of multiplier operations in a qkeras layer.""" # Check if the inputs are a list of Dimensions if isinstance(input_shape, list): input_shape = input_shape[0] operation_count = 0 if is_merge_layers(layer) or is_shape_alternation_layers(layer): operation_count = np.prod(input_shape[1:]) elif layer.__class__.__name__ in [ "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D", "GlobalAveragePooling2D", "QGlobalAveragePooling2D" ]: if hasattr(layer, "pool_size"): pool_size = layer.pool_size else: pool_size = input_shape[1:-1] add_ops = np.prod(pool_size) output_shape = layer.compute_output_shape(input_shape) channels_o = output_shape[-1] # total number of add ops operation_count = channels_o * add_ops elif "UpSampling" in layer.__class__.__name__: # UpSampling1D/2D/3D output_shape = layer.compute_output_shape(input_shape) operation_count = np.prod(output_shape[1:]) elif ("Activation" in layer.__class__.__name__ or "BatchNormalization" in layer.__class__.__name__): operation_count = np.prod(input_shape[1:]) elif layer.__class__.__name__ in [ "QConv2D", "Conv2D", "QConv2DBatchnorm", "QConv2DTranspose", "Conv2DTranspose"]: output_shape = layer.compute_output_shape(input_shape) _, _, _, channels_i = input_shape _, height_o, width_o, channels_o = output_shape weight = layer.get_weights()[0] kernel_h, kernel_w, _, _ = weight.shape operation_count = ( height_o * width_o * channels_o * kernel_h * kernel_w * channels_i) elif layer.__class__.__name__ in ["QConv1D", "Conv1D"]: output_shape = layer.compute_output_shape(input_shape) _, _, channels_i = input_shape _, time_o, channels_o = output_shape weight = layer.get_weights()[0] kernel_length, _, _ = weight.shape operation_count = ( time_o * channels_o * kernel_length * channels_i) elif layer.__class__.__name__ in ["QDepthwiseConv2D", "DepthwiseConv2D"]: output_shape = layer.compute_output_shape(input_shape) _, _, _, channels_i = input_shape _, height_o, width_o, channels_o = output_shape weight_1 = layer.get_weights()[0] kernel_h, kernel_w, _, _ = weight_1.shape operation_count = ( kernel_h * kernel_w * height_o * width_o * channels_i) elif layer.__class__.__name__ in ["QDense", "Dense"]: output_shape = layer.compute_output_shape(input_shape) # Find the input and output shapes out of all possible dimensions. # Usually, the first shape dimension will be the batch size, and the second # shape dimension will be the number of channels. However, if the # Dense layer is in Squeeze-and-Excite, the first shape dimension # will be the batch size, the second and third shape dimension will be the # spatial sizes (should both be 1), and the fourth shape dimensions will # be the number of channels # # Note: asserts have been changed to sum(*shape > 1) <= 1 to avoid the case # when the dense layer has an output with shape (None, 1), which results in # sum(oshape > 1) = 0. ishape = np.array([i for i in input_shape if i is not None]) assert sum(ishape > 1) <= 1, ("Input Tensor shape in %s has " "multiple >1 size dims") % layer.name size_i = np.max(ishape) oshape = np.array([i for i in output_shape if i is not None]) assert sum(oshape > 1) <= 1, ("Output Tensor shape in %s has " + "multiple >1 size dims") % layer.name size_o = np.max(oshape) operation_count = (size_i * size_o) else: print("operation count for {} is defaulted to 0".format( layer)) return int(operation_count) def get_weights(layer, model_weights_already_quantized=True): """Get layer weights. Args: layer: given qkeras/keras layer model_weights_already_quantized: bool. whether the given layer's weights are already quantized. This is necessary because with certain quantizers, eg., quantized_bits(alpha="auto_po2"), we cannot quantize the same weights more than once, as it will lead to different results. Returns: Quantized layer weights. """ weights = layer.get_weights() out = copy.deepcopy(weights) if not model_weights_already_quantized: for j, weight in enumerate(weights): if hasattr(layer, "get_quantizers") and layer.get_quantizers()[j]: out[j] = K.eval( layer.get_quantizers()[j](K.constant(weight))) return out def get_scale_from_quantized_bits_with_auto_po2(quantizer): """Get scale from quantized_bits with alpha=auto_po2.""" if hasattr(quantizer.scale, "numpy"): return quantizer.scale.numpy() elif isinstance(quantizer.scale, np.ndarray): return quantizer.scale else: return None def adjust_multiplier_for_auto_po2(multiplier, qkeras_weight_quantizer): """Adjust multiplier when weight quantizer is auto_po2 type. Multiplier_bits = bits_x + bits_w Multiplier_intbits = log2(scale) + intbits_x + intbits_w Because we might have different scale for auto_po2 quantizer at different output channels, multiplier will have different integer bits at different output channel accordingly, which is not desirable in hardware implementation. Therefore we set a general multiplier quantizers so that it provides enough fractional bits and integer bits for all output channels. """ print("adjust multiplier for auto_po2 ...") output_quantizer = multiplier.output if (hasattr(qkeras_weight_quantizer, "__str__") and "quantized_bits" in qkeras_weight_quantizer.__str__() and qkeras_weight_quantizer.alpha == "auto_po2"): bits = output_quantizer.bits int_bits = output_quantizer.int_bits scale = get_scale_from_quantized_bits_with_auto_po2( qkeras_weight_quantizer) if scale is not None: if isinstance(scale, np.ndarray): scale = np.squeeze(scale) max_shift = int(np.log2(np.max(scale))) min_shift = int(np.log2(np.min(scale))) elif isinstance(scale, float): max_shift = int(np.log2(scale)) min_shift = max_shift else: raise ValueError(f"Scale should be either numpy array or float," f"{type(scale)} is found instead!") # In order to set a general quantizer for different output channels, # we need to set both fractional bits and integer bits as the max required # bits for different output channels max_fractional_bits = bits - int_bits - min_shift max_int_bits = int_bits + max_shift total_bits = max_int_bits + max_fractional_bits output_quantizer.bits = total_bits output_quantizer.int_bits = max_int_bits else: # If scale is None, it means the quantizer has # never being called. Therfore we skip the bitwidth adjustment steps print("[WARNING] The weight quantizer is never called even though it has " "alpha=auto_po2. In this case we do not adjust the multiplier and " "accumulator bit width since we don't know the exact values of " "scale", file=sys.stderr) elif hasattr(qkeras_weight_quantizer, "alpha") and ( qkeras_weight_quantizer.alpha == "auto_po2"): print("[WARNING] auto_po2 is detected on a non-quantized_bits quantizer." "Currently in QTools we do not yet support the auto_po2 with the " f" given quantizer type: {type(qkeras_weight_quantizer)}." "Therefore we do not adjust the multiplier and accumulator bit width") def adjust_accumulator_for_auto_po2( layer, multiplier, qkeras_weight_quantizer, bias_quantizer): """Adjust accumulator when weight quantizer is auto_po2 type.""" fused_multiplier = copy.deepcopy(multiplier) adjust_multiplier_for_auto_po2(fused_multiplier, qkeras_weight_quantizer) weights = layer.get_weights() kernel = weights[0] kernel_shape = kernel.shape # depthwise_kernel_shape = kernel_size + (input_dim, depth_multiplier) # When computing accumulator bitwidth for dw conv2d layer, we do not # need to count the last two dimensions if layer.__class__.__name__ in ["QDepthwiseConv2D", "DepthwiseConv2D"]: assert kernel_shape[-1] == 1, ("depth_multiplier must be 1, " f"{kernel_shape[-1]} found instead!") kernel_shape = kernel.shape[:-2] + (1, 1) kernel_accumulator_factory = quantized_operators.AccumulatorFactory() # Sets use_bias=False so that the accumulator doesn't account for bias # bitwdith. fused_kernel_accumulator = kernel_accumulator_factory.make_accumulator( kernel_shape, fused_multiplier, use_bias=False) if not layer.use_bias: bias_quantizer = None fused_accumulator = fused_kernel_accumulator else: # Add bias quantizer bitwidth to the overall accumulator bias_accumulator_instance = quantized_operators.adder_factory.IAdder() fused_accumulator = bias_accumulator_instance.make_quantizer( fused_kernel_accumulator.output, bias_quantizer) return fused_accumulator def find_divisors(num): return [i for i in range(1, num + 1) if num % i == 0] def get_layer_info(layer: tf.keras.layers.Layer, attr_name: str): layer_type = layer.__class__.__name__ supported_layer_types = [ "QDense", "QConv2D", "QDepthwiseConv2D", "MaxPooling2D", "GlobalMaxPooling2D", "QAveragePooling2D", "QGlobalAveragePooling2D", "UpSampling2D", "Concatenate", "QBatchNormalization", "QActivation", "Activation", "Dropout", "Reshape", "ZeroPadding2D"] assert layer_type in supported_layer_types, ( f"For now only {supported_layer_types} layers are supported. " f"Found {layer_type} instead.") # Get layer info such as input/output channels, kernel size and quantizers. input_channel = layer.input_shape[-1] output_channel = layer.output_shape[-1] # Change default kernel_size to 1 to represent Dense Layer with Conv Layers. kernel_height, kernel_width = layer.kernel_size if hasattr( layer, "kernel_size") else (1, 1) layer_dict = { "layer_type": layer_type, "input_channel": input_channel, "output_channel": output_channel, "kernel_height": kernel_height, "kernel_width": kernel_width } return layer_dict.get(attr_name, None) def is_upsampled(layer: tf.keras.layers.Layer): # Evaluate if a given layer is doing upsampling. return "UpSampling" in layer.__class__.__name__ ================================================ FILE: qkeras/qtools/quantized_operators/__init__.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Export quantizer package.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from .accumulator_factory import AccumulatorFactory from .multiplier_factory import MultiplierFactory from .multiplier_impl import IMultiplier, FloatingPointMultiplier, FixedPointMultiplier, Mux, AndGate, Adder, XorGate, Shifter from .accumulator_impl import IAccumulator, FloatingPointAccumulator, FixedPointAccumulator from .quantizer_impl import IQuantizer, QuantizedBits, Binary, QuantizedRelu, Ternary, FloatingPoint, PowerOfTwo, ReluPowerOfTwo from .quantizer_factory import QuantizerFactory from .qbn_factory import QBNFactory from .fused_bn_factory import FusedBNFactory from .merge_factory import MergeFactory from .divider_factory import IDivider from .subtractor_factory import ISubtractor ================================================ FILE: qkeras/qtools/quantized_operators/accumulator_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Create accumulator quantizers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy from qkeras.qtools.quantized_operators import accumulator_impl from qkeras.qtools.quantized_operators import multiplier_impl class AccumulatorFactory: """interface for accumulator type.""" def make_accumulator( self, kernel_shape, multiplier: multiplier_impl.IMultiplier, use_bias=True ) -> accumulator_impl.IAccumulator: """Create an accumulator instance.""" # Creates a local deep copy so that any changes we made to the multiplier # will not impact the input multiplier type. This is necessary in case # we call this function multiple times to get different multipliers. local_multiplier = copy.deepcopy(multiplier) # The type and bit width of the accumulator is deteremined from the # multiplier implementation, and the shape of both kernel and bias if local_multiplier.output.is_floating_point: accumulator = accumulator_impl.FloatingPointAccumulator( local_multiplier) # po2*po2 is implemented as Adder; output type is po2 # in multiplier, po2 needs to be converted to FixedPoint elif local_multiplier.output.is_po2: accumulator = accumulator_impl.Po2Accumulator( kernel_shape, local_multiplier, use_bias) # fixed point else: accumulator = accumulator_impl.FixedPointAccumulator( kernel_shape, local_multiplier, use_bias) return accumulator ================================================ FILE: qkeras/qtools/quantized_operators/accumulator_impl.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Accumulator operation implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc from absl import logging import numpy as np from qkeras.qtools.quantized_operators import multiplier_impl from qkeras.qtools.quantized_operators import quantizer_impl def po2_to_qbits(quantizer: quantizer_impl.IQuantizer): """convert po2 type to qbits type.""" (min_exp, max_exp) = quantizer.get_min_max_exp() # min_exp is number of bits needed on the right in qbits # max_exp is number of bits needed on the left in qbits unsigned_bits = min_exp + max_exp int_bits = max_exp sign_bit = quantizer.is_signed bits = sign_bit + unsigned_bits return (int(bits), int(int_bits)) class IAccumulator(abc.ABC): """abstract class for accumulator.""" @staticmethod @abc.abstractmethod def implemented_as(): pass class FloatingPointAccumulator(IAccumulator): """class for floating point accumulator.""" def __init__( self, multiplier: multiplier_impl.IMultiplier ): super().__init__() self.multiplier = multiplier self.output = quantizer_impl.FloatingPoint( bits=self.multiplier.output.bits) self.output.bits = self.multiplier.output.bits self.output.int_bits = -1 self.output.is_signed = self.multiplier.output.is_signed self.output.is_floating_point = True self.output.op_type = "accumulator" @staticmethod def implemented_as(): return "add" class FixedPointAccumulator(IAccumulator): """class for fixed point accumulator.""" def __init__( self, kernel_shape, multiplier: multiplier_impl.IMultiplier, use_bias=True ): super().__init__() if len(kernel_shape) not in ( 2, 4, ): logging.fatal( "unsupported kernel shape, " "it is neither a dense kernel of length 2," " nor a convolution kernel of length 4") kernel_shape_excluding_output_dim = kernel_shape[:-1] kernel_add_ops = np.prod(kernel_shape_excluding_output_dim) # bias are associate with filters; each filter adds 1 bias bias_add = 1 if use_bias else 0 add_ops = kernel_add_ops + bias_add self.log_add_ops = int(np.ceil(np.log2(add_ops))) self.multiplier = multiplier self.output = quantizer_impl.QuantizedBits() self.output.bits = self.log_add_ops + self.multiplier.output.bits self.output.int_bits = self.log_add_ops + self.multiplier.output.int_bits self.output.is_signed = self.multiplier.output.is_signed self.output.op_type = "accumulator" assert not self.multiplier.output.is_floating_point self.output.is_floating_point = False @staticmethod def implemented_as(): return "add" class Po2Accumulator(FixedPointAccumulator): """accumulator for po2.""" # multiplier is po2. multiplier output needs to convert # to Fixedpoint before Accumulator. def __init__( self, kernel_shape, multiplier: multiplier_impl.IMultiplier, use_bias=True ): super().__init__(kernel_shape, multiplier, use_bias) assert multiplier.output.is_po2 # convert multiplier output from po2 to quantized_bits (bits_from_po2multiplier, int_bits_from_po2multiplier) = po2_to_qbits( multiplier.output) self.output.bits = self.log_add_ops + int(bits_from_po2multiplier) self.output.int_bits = self.log_add_ops + int(int_bits_from_po2multiplier) self.output.op_type = "accumulator" @staticmethod def implemented_as(): return "add" ================================================ FILE: qkeras/qtools/quantized_operators/adder_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """implement adder quantizer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import copy from absl import logging from qkeras.qtools.quantized_operators import adder_impl from qkeras.qtools.quantized_operators import quantizer_impl class IAdder(abc.ABC): """abstract class for adder.""" def __init__(self): self.adder_impl_table = [ [ adder_impl.FixedPointAdder, adder_impl.Po2FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FloatingPointAdder ], [ adder_impl.Po2FixedPointAdder, adder_impl.Po2Adder, adder_impl.Po2FixedPointAdder, adder_impl.Po2FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FloatingPointAdder ], [ adder_impl.FixedPointAdder, adder_impl.Po2FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FloatingPointAdder ], [ adder_impl.FixedPointAdder, adder_impl.Po2FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FloatingPointAdder ], [ adder_impl.FixedPointAdder, adder_impl.Po2FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FixedPointAdder, adder_impl.FloatingPointAdder ], [ adder_impl.FloatingPointAdder, adder_impl.FloatingPointAdder, adder_impl.FloatingPointAdder, adder_impl.FloatingPointAdder, adder_impl.FloatingPointAdder, adder_impl.FloatingPointAdder ] ] def make_quantizer(self, quantizer_1: quantizer_impl.IQuantizer, quantizer_2: quantizer_impl.IQuantizer): """make adder quantizer.""" local_quantizer_1 = copy.deepcopy(quantizer_1) local_quantizer_2 = copy.deepcopy(quantizer_2) mode1 = local_quantizer_1.mode mode2 = local_quantizer_2.mode adder_impl_class = self.adder_impl_table[mode1][mode2] logging.debug( "qbn adder implemented as class %s", adder_impl_class.implemented_as()) return adder_impl_class( local_quantizer_1, local_quantizer_2 ) ================================================ FILE: qkeras/qtools/quantized_operators/adder_impl.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """adder operation implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc from qkeras.qtools.quantized_operators import accumulator_impl from qkeras.qtools.quantized_operators import quantizer_impl def po2_qbits_converter(po2_quantizer: quantizer_impl.IQuantizer): """convert a po2 quantizer to fixedpoint quantizer.""" (bits_from_po2, int_bits_from_po2) = accumulator_impl.po2_to_qbits( po2_quantizer) qbits_quantizer = quantizer_impl.QuantizedBits() qbits_quantizer.bits = bits_from_po2 qbits_quantizer.int_bits = int_bits_from_po2 qbits_quantizer.is_signed = po2_quantizer.is_signed return qbits_quantizer class IAdderImpl(abc.ABC): """abstract class for adder.""" @staticmethod @abc.abstractmethod def implemented_as(): pass class FixedPointAdder(IAdderImpl): """adder for fixed point.""" def __init__(self, quantizer_1, quantizer_2): self.output = quantizer_impl.QuantizedBits() self.output.int_bits = max(quantizer_1.int_bits, quantizer_2.int_bits) + 1 fractional_bits1 = (quantizer_1.bits - int(quantizer_1.is_signed) - quantizer_1.int_bits) fractional_bits2 = (quantizer_2.bits - int(quantizer_2.is_signed) - quantizer_2.int_bits) fractional_bits = max(fractional_bits1, fractional_bits2) self.output.is_signed = quantizer_1.is_signed | quantizer_2.is_signed self.output.bits = (self.output.int_bits + int(self.output.is_signed) + fractional_bits) self.output.mode = 0 self.output.is_floating_point = False self.output.is_po2 = 0 @staticmethod def implemented_as(): return "add" class FloatingPointAdder(IAdderImpl): """floating point adder.""" def __init__(self, quantizer_1, quantizer_2): bits = max(quantizer_1.bits, quantizer_2.bits) self.output = quantizer_impl.FloatingPoint( bits=bits) @staticmethod def implemented_as(): return "add" class Po2FixedPointAdder(IAdderImpl): """adder between po2 and fixed point.""" def __init__(self, quantizer_1, quantizer_2): if quantizer_1.is_po2: po2_quantizer = quantizer_1 fixedpoint_quantizer = quantizer_2 else: po2_quantizer = quantizer_2 fixedpoint_quantizer = quantizer_1 # convert po2 to qbits first po2_qbits_quantizer = po2_qbits_converter(po2_quantizer) # qbits + qbits -> FixedPointAdder self.output = FixedPointAdder(po2_qbits_quantizer, fixedpoint_quantizer).output @staticmethod def implemented_as(): return "add" class Po2Adder(IAdderImpl): """adder for po2 type.""" def __init__(self, quantizer_1, quantizer_2): qbits_quantizer_1 = po2_qbits_converter(quantizer_1) qbits_quantizer_2 = po2_qbits_converter(quantizer_2) self.output = FixedPointAdder(qbits_quantizer_1, qbits_quantizer_2).output @staticmethod def implemented_as(): return "add" ================================================ FILE: qkeras/qtools/quantized_operators/divider_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """"create divider quantizer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import copy from absl import logging from qkeras.qtools.quantized_operators import divider_impl from qkeras.qtools.quantized_operators import quantizer_impl class UnacceptedQuantizerError(ValueError): pass class IDivider(abc.ABC): """abstract class for divider.""" def __init__(self): # also attached the output datatype in the table self.divider_impl_table = [ [ # when qbits is denominator, use default bits for float result (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=quantizer_impl.FLOATINGPOINT_BITS)), (divider_impl.Shifter, quantizer_impl.QuantizedBits()), (None, None), (None, None), (None, None), # when bits sets to None, will decide f16/f32 according # to input quantizer (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)) ], [ (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=quantizer_impl.FLOATINGPOINT_BITS)), (divider_impl.Subtractor, quantizer_impl.PowerOfTwo()), (None, None), (None, None), (None, None), (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)) ], [ (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=quantizer_impl.FLOATINGPOINT_BITS)), (divider_impl.Shifter, quantizer_impl.QuantizedBits()), (None, None), (None, None), (None, None), (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)) ], [ (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=quantizer_impl.FLOATINGPOINT_BITS)), (divider_impl.Shifter, quantizer_impl.PowerOfTwo()), (None, None), (None, None), (None, None), (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)) ], [ (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=quantizer_impl.FLOATINGPOINT_BITS)), (divider_impl.Shifter, quantizer_impl.PowerOfTwo()), (None, None), (None, None), (None, None), (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)) ], [ (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)), (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)), (None, None), (None, None), (None, None), (divider_impl.FloatingPointDivider, quantizer_impl.FloatingPoint( bits=None)) ] ] def make_quantizer(self, numerator_quantizer: quantizer_impl.IQuantizer, denominator_quantizer: quantizer_impl.IQuantizer): """make the quantizer.""" # Create a local copy so that the changes made here won't change the input local_numerator_quantizer = copy.deepcopy(numerator_quantizer) local_denominator_quantizer = copy.deepcopy(denominator_quantizer) mode1 = local_numerator_quantizer.mode mode2 = local_denominator_quantizer.mode (divider_impl_class, output_quantizer) = self.divider_impl_table[ mode1][mode2] local_output_quantizer = copy.deepcopy(output_quantizer) if divider_impl_class is None: raise UnacceptedQuantizerError( "denominator quantizer {} not accepted!".format( denominator_quantizer.name)) logging.debug( "qbn adder implemented as class %s", divider_impl_class.implemented_as()) return divider_impl_class( local_numerator_quantizer, local_denominator_quantizer, local_output_quantizer ) ================================================ FILE: qkeras/qtools/quantized_operators/divider_impl.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Divider operation implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import numpy as np class IDividerImpl(abc.ABC): """abstract class for divider.""" def __init__(self, numerator_quantizer, denominator_quantizer, output_quantizer): self.numerator_quantizier = numerator_quantizer self.denominator_quantizer = denominator_quantizer self.output = output_quantizer @staticmethod @abc.abstractmethod def implemented_as(): pass class FloatingPointDivider(IDividerImpl): """floating point divider.""" def __init__(self, numerator_quantizer, denominator_quantizer, output_quantizer): super().__init__(numerator_quantizer, denominator_quantizer, output_quantizer) if self.output.bits is None: # decide f16/f32 according to numerator/denominator type bits = 0 if numerator_quantizer.is_floating_point: bits = max(bits, numerator_quantizer.bits) if denominator_quantizer.is_floating_point: bits = max(bits, denominator_quantizer.bits) self.output.bits = bits self.gate_bits = self.output.bits self.gate_factor = 1 @staticmethod def implemented_as(): # TODO(lishanok): change cost from "mul" to "divide" return "mul" class Shifter(IDividerImpl): """shifter type.""" # other_datatype/po2 def __init__(self, numerator_quantizer, denominator_quantizer, output_quantizer): super().__init__(numerator_quantizer, denominator_quantizer, output_quantizer) qbit_quantizer = numerator_quantizer po2_quantizer = denominator_quantizer (min_exp, max_exp) = po2_quantizer.get_min_max_exp() # since it's a divider, min_exp and max_exp swap # for calculating right and left shift tmp = min_exp min_exp = max_exp max_exp = tmp qbits_bits = qbit_quantizer.bits qbits_int_bits = qbit_quantizer.int_bits self.output.bits = int(qbits_bits + max_exp + min_exp) if (not qbit_quantizer.is_signed) and po2_quantizer.is_signed: # if qbit is signed, qbits_bits already has the sign_bit, # no need to +1, # if qbit is un_signed, po2 is unsigned, no need to +1 # if qbit is un_signed, po2 is signed, min_exp and max_exp # didnot include sign_bit, # therefore need to +1 self.output.bits += 1 self.output.int_bits = int(qbits_int_bits + max_exp) self.output.is_signed = qbit_quantizer.is_signed |\ po2_quantizer.is_signed self.output.is_floating_point = False if po2_quantizer.inference_value_counts > 0: # during qbn inference, count number of unique values self.gate_factor = po2_quantizer.inference_value_counts * 0.3 self.gate_bits = qbits_bits else: # programmable shifter, similar to sum gate self.gate_factor = 1 b = np.sqrt(2 ** po2_quantizer.bits * qbits_bits) self.gate_bits = b * np.log10(b) @staticmethod def implemented_as(): return "shifter" class Subtractor(IDividerImpl): """subtractor quantizer.""" # subtractor is only possible when numerator and denominator # are both po2 quantizers. def __init__(self, numerator_quantizer, denominator_quantizer, output_quantizer): super().__init__(numerator_quantizer, denominator_quantizer, output_quantizer) self.output.bits = max(numerator_quantizer.bits, denominator_quantizer.bits) + 1 self.output.int_bits = max(numerator_quantizer.int_bits, denominator_quantizer.int_bits) + 1 self.output.is_signed = 1 self.output.is_floating_point = False self.output.is_po2 = 1 if (numerator_quantizer.max_val_po2 == -1 or denominator_quantizer.max_val_po2 == -1): self.output.max_val_po2 = -1 else: # Adder is two po2_value multiply with each other self.output.max_val_po2 = numerator_quantizer.max_val_po2 /\ denominator_quantizer.max_val_po2 if "po2" in output_quantizer.name: # po2 * po2 if self.output.is_signed: output_quantizer.name = "quantized_po2" else: output_quantizer.name = "quantized_relu_po2" self.gate_bits = self.output.bits self.gate_factor = 1 @staticmethod def implemented_as(): return "add" ================================================ FILE: qkeras/qtools/quantized_operators/fused_bn_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """quantized batch normliaztion quantizer implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import math import numpy as np from qkeras import base_quantizer from qkeras.qtools import qtools_util from qkeras.qtools.quantized_operators import adder_factory from qkeras.qtools.quantized_operators import divider_factory from qkeras.qtools.quantized_operators import multiplier_factory from qkeras.qtools.quantized_operators import quantizer_impl class FusedBNFactory: """determine which quantizer implementation to use. Create an fused bn instance. The type and bit width of the output_quantizer is deteremined from both the previous layer and batchnorm weight types: z = bn(y) = bn_inv * x - fused_bias is the output of the previous layer and the following bn layer, with: bn_inv = gamma * rsqrt(variance^2+epsilon) is computed from the bn layer weights with inverse_quantizer datatype x is the previous layer's output fused_bias = bn_inv * bias + beta - bn_inv*mean where bias is the bias term from the previous layer, beta and mean are the bn layer weights. """ def make_quantizer( self, prev_output_quantizer: quantizer_impl.IQuantizer, beta_quantizer: quantizer_impl.IQuantizer, mean_quantizer: quantizer_impl.IQuantizer, inverse_quantizer: quantizer_impl.IQuantizer, prev_bias_quantizer: quantizer_impl.IQuantizer, use_beta: bool, use_bias: bool, qkeras_inverse_quantizer: base_quantizer.BaseQuantizer, ): """Makes a fused_bn quantizer. Args: prev_output_quantizer: IQuantizer type. Previous layer output quantizer beta_quantizer: IQuantizer type. bn layer beta quantizer mean_quantizer: IQuantizer type. layer mean quantizer inverse_quantizer: IQuantizer type. bn layer inverse quantizer prev_bias_quantizer: IQuantizer type. conv layer bias quantizer use_beta: Bool. whether enabling beta in batch_normalization layer use_bias: Bool. Whether bias is used in conv layer. qkeras_inverse_quantizer: QKeras quantizer type. bn layer inverse quantizer with QKeras quantizer type Returns: None """ assert not isinstance(inverse_quantizer, quantizer_impl.FloatingPoint), ( "inverse_quantizer in batchnorm layer has to be set for " "fused bn inference in hardware!") # bn_inv * x multiplier_instance = multiplier_factory.MultiplierFactory() multiplier_x = multiplier_instance.make_multiplier( inverse_quantizer, prev_output_quantizer) qtools_util.adjust_multiplier_for_auto_po2( multiplier_x, qkeras_inverse_quantizer) # fused_bias = bn_inv * bias + beta - bn_inv*mean # This step derives the datatype for bn_inv * mean multiplier_mean = multiplier_instance.make_multiplier( inverse_quantizer, mean_quantizer) qtools_util.adjust_multiplier_for_auto_po2( multiplier_mean, qkeras_inverse_quantizer) adder_instance = adder_factory.IAdder() if use_bias: # Derives datatype of bn_inv*bias multiplier_bias = multiplier_instance.make_multiplier( inverse_quantizer, prev_bias_quantizer) qtools_util.adjust_multiplier_for_auto_po2( multiplier_bias, qkeras_inverse_quantizer) # Derives datatype of bn_inv*bias - bn_inv*mean adder_1 = adder_instance.make_quantizer( multiplier_bias.output, multiplier_mean.output) else: # There is no bias from the previous layer, # therefore datatype of bn_inv*bias - bn_inv*mean is the same # as bn_inv*mean adder_1 = multiplier_mean if use_beta: # Derives datatype of fused_bias = bn_inv * bias + beta - bn_inv*mean adder_bias = adder_instance.make_quantizer( adder_1.output, beta_quantizer) else: # Since beta is not used, fused_bias = bn_inv * bias - bn_inv*mean adder_bias = adder_1 # bn_inv * x - fused_bias adder = adder_instance.make_quantizer( multiplier_x.output, adder_bias.output) self.internal_accumulator = adder self.internal_output = adder ================================================ FILE: qkeras/qtools/quantized_operators/merge_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """create merge layer output quantizers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc from qkeras.qtools.quantized_operators import adder_impl from qkeras.qtools.quantized_operators import multiplier_factory from qkeras.qtools.quantized_operators import quantizer_impl class MergeFactory: """determine which merge implementation to use.""" def make_quantizer(self, input_qe_list, layer_type): """make quantier.""" if layer_type == "Add": return Add(input_qe_list) elif layer_type == "Multiply": return Multiply(input_qe_list) elif layer_type == "Maximum": return Maximum(input_qe_list) elif layer_type == "Minimum": return Minimum(input_qe_list) elif layer_type == "Average": return Average(input_qe_list) elif layer_type == "Concatenate": return Concatenate(input_qe_list) elif layer_type == "Dot": return Dot(input_qe_list) class IMerger(abc.ABC): """abstract class for merge quantizer.""" def __init__(self, input_qe_list): self.input_quantizers = [] self.edges = [] for node in input_qe_list: self.input_quantizers.append(node[0]) self.edges.append(node[1]) class Add(IMerger): """add a list of inputs.""" # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). def __init__(self, input_qe_list): super().__init__(input_qe_list) max_bits = -1 max_int_bits = -1 is_signed = False bits = 0 is_floating_point = False for quantizer in self.input_quantizers: if quantizer.is_floating_point: is_floating_point = True bits = max(bits, quantizer.bits) else: if quantizer.is_po2: qbits_quantizer = adder_impl.po2_qbits_converter( quantizer) else: qbits_quantizer = quantizer if qbits_quantizer.bits > max_bits: max_bits = qbits_quantizer.bits if qbits_quantizer.int_bits > max_int_bits: max_int_bits = qbits_quantizer.int_bits is_signed |= quantizer.is_signed if is_floating_point: self.output = quantizer_impl.FloatingPoint( bits=bits) else: self.output = quantizer_impl.QuantizedBits() self.output.bits = max_bits + 1 self.output.int_bits = max_int_bits + 1 self.output.is_signed = is_signed self.output.mode = 0 self.output.is_floating_point = False self.output.is_po2 = 0 self.gate_factor = 1 self.gate_bits = self.output.bits def implemented_as(self): return "add" class Multiply(IMerger): """multiplies (element-wise) a list of inputs.""" # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). def __init__(self, input_qe_list): super().__init__(input_qe_list) multiplier_instance = multiplier_factory.MultiplierFactory() quantizer = self.input_quantizers[0] for cur in self.input_quantizers[1:]: tmp = multiplier_instance.make_multiplier(quantizer, cur) quantizer = tmp.output self.output = quantizer # TODO(lishanok): only use the last multiplier here self.impl_class = tmp self.gate_factor = tmp.gate_factor self.gate_bits = tmp.gate_bits def implemented_as(self): return self.impl_class.implemented_as() class Maximum(IMerger): """maximum of a list of inputs.""" # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). def __init__(self, input_qe_list): super().__init__(input_qe_list) is_same = True is_floating_point = False bits = 0 quantizer = self.input_quantizers[0] for cur in self.input_quantizers[1:]: if (quantizer.name != cur.name or quantizer.bits != cur.bits or quantizer.int_bits != cur.int_bits or quantizer.is_signed != cur.is_signed): is_same = False break if is_same: self.output = quantizer else: max_bits = -1 max_int_bits = -1 is_signed = False for quantizer in self.input_quantizers: if quantizer.is_floating_point: is_floating_point = True bits = max(bits, quantizer.bits) else: if quantizer.is_po2: qbits_quantizer = adder_impl.po2_qbits_converter( quantizer) else: qbits_quantizer = quantizer if qbits_quantizer.bits > max_bits: max_bits = qbits_quantizer.bits if qbits_quantizer.int_bits > max_int_bits: max_int_bits = qbits_quantizer.int_bits is_signed |= quantizer.is_signed if is_floating_point: self.output = quantizer_impl.FloatingPoint( bits=bits) else: self.output = quantizer_impl.QuantizedBits() self.output.bits = max_bits self.output.int_bits = max_int_bits self.output.is_signed = is_signed self.output.mode = 0 self.output.is_floating_point = False self.output.is_po2 = 0 self.gate_factor = 0.2 self.gate_bits = self.output.bits @staticmethod def implemented_as(): return "add" class Minimum(Maximum): """minimum (element-wise) a list of inputs.""" # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). pass class Average(Maximum): """average (element-wise) a list of inputs.""" # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). def __init__(self, input_qe_list): super().__init__(input_qe_list) self.gate_factor = 1 self.gate_bits = self.output.bits class Concatenate(Maximum): """Layer that concatenates a list of inputs.""" # It takes as input a list of tensors, all of the same # shape except for the concatenation axis, and returns # a single tensor, the concatenation of all inputs.. def __init__(self, input_qe_list): super().__init__(input_qe_list) self.gate_factor = 0 self.gate_bits = self.output.bits # TODO(lishanok): finish DOT ndimension tensor logic class Dot(IMerger): """dot product between samples in two tensors.""" # E.g. if applied to a list of two tensors a and b # of shape (batch_size, n), the # output will be a tensor of shape (batch_size, 1) # where each entry i will be\ # the dot product between a[i] and b[i]. pass ================================================ FILE: qkeras/qtools/quantized_operators/multiplier_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Create multiplier quantizer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl import logging import copy from qkeras.qtools.quantized_operators import multiplier_impl from qkeras.qtools.quantized_operators import quantizer_impl class MultiplierFactory: """determine which multiplier implementation to use.""" def __init__(self): # the table below is found in this slides: # https://docs.google.com/presentation/d/1pcmoB6ZpX0IqjhSwgzO-oQwpMRYwIcDe/edit#slide=id.p40 # also attached the output datatype in the table self.multiplier_impl_table = [ [ ( multiplier_impl.FixedPointMultiplier, quantizer_impl.QuantizedBits() ), (multiplier_impl.Shifter, quantizer_impl.QuantizedBits()), (multiplier_impl.Mux, quantizer_impl.QuantizedBits()), (multiplier_impl.Mux, quantizer_impl.QuantizedBits()), (multiplier_impl.AndGate, quantizer_impl.QuantizedBits()), ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint( bits=None) ) ], [ (multiplier_impl.Shifter, quantizer_impl.QuantizedBits()), (multiplier_impl.Adder, quantizer_impl.PowerOfTwo()), (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()), (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()), (multiplier_impl.AndGate, quantizer_impl.PowerOfTwo()), (multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ) ], [ (multiplier_impl.Mux, quantizer_impl.QuantizedBits()), (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()), (multiplier_impl.Mux, quantizer_impl.Ternary()), (multiplier_impl.Mux, quantizer_impl.Ternary()), (multiplier_impl.AndGate, quantizer_impl.Ternary()), (multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None)) ], [ (multiplier_impl.Mux, quantizer_impl.QuantizedBits()), (multiplier_impl.Mux, quantizer_impl.PowerOfTwo()), (multiplier_impl.Mux, quantizer_impl.Ternary()), (multiplier_impl.XorGate, quantizer_impl.Binary( use_01=False)), (multiplier_impl.AndGate, quantizer_impl.Ternary()), (multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None)) ], [ (multiplier_impl.AndGate, quantizer_impl.QuantizedBits()), (multiplier_impl.AndGate, quantizer_impl.PowerOfTwo()), (multiplier_impl.AndGate, quantizer_impl.Ternary()), (multiplier_impl.AndGate, quantizer_impl.Ternary()), (multiplier_impl.AndGate, quantizer_impl.Binary( use_01=True)), (multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None)) ], [ ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ), ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ), ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ), ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ), ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ), ( multiplier_impl.FloatingPointMultiplier, quantizer_impl.FloatingPoint(bits=None) ) ] ] def make_multiplier( self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer ) -> multiplier_impl.IMultiplier: """Create a multiplier instance. The type and bit width of the multiplier is deteremined from the quantizer type of both the kernel (weight) and input tensor. The table below illustrates the rule of inferring multiplier type from the quantizer type of both the kernel (weight) and input tensor x qb(n) +/-,exp t(-1,0,+1) b(-1,+1) b(0,1) float32 qb(n) * << >>,- ?,- ?,- ? +/-,exp << >>,- + ?,- ^ ?,- w t(-1,0,+1) ?,- ?,- ?,^ ?,^ ^ b(-1,+1) ?,- ^ ?,^ ^ ^ b(0,1) ? ?,- ^ ^ ^ & float32 Args: weight_quantizer: weight quantizer type input_quantizer: input quantizer type Returns: An IMultiplier instance. """ assert weight_quantizer is not None assert input_quantizer is not None (multiplier_impl_class, output_quantizer) = self.multiplier_impl_table[ weight_quantizer.mode][input_quantizer.mode] # Need to create local copies becuase different multiplier instances # created from the factory might make changes to these quantizers. local_weight_quantizer = copy.deepcopy(weight_quantizer) local_input_quantizer = copy.deepcopy(input_quantizer) local_output_quantizer = copy.deepcopy(output_quantizer) logging.debug( "multiplier implemented as class %s", multiplier_impl_class.implemented_as()) assert issubclass(multiplier_impl_class, multiplier_impl.IMultiplier) return multiplier_impl_class( local_weight_quantizer, local_input_quantizer, local_output_quantizer ) ================================================ FILE: qkeras/qtools/quantized_operators/multiplier_impl.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """multiplier operation implementations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import numpy as np from qkeras.qtools.quantized_operators import quantizer_impl class IMultiplier(abc.ABC): """abstract class for multiplier. This class is about how multiplier is implemented in hardware, which can be mux gate, shifter, adder, etc. """ def __init__(self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer): self.input = input_quantizer self.weights = weight_quantizer self.output = output_quantizer self.output.op_type = "multiplier" @staticmethod @abc.abstractmethod def implemented_as(): pass def name(self) -> str: return self.output.name def output_quantizer(self): return self.output def assert_neither_input_and_weights_is_floating_point( multiplier: IMultiplier): """assert non float type.""" assert not multiplier.input.is_floating_point assert not multiplier.weights.is_floating_point class Mux(IMultiplier): """Use mux for the hardware implementation of multiplier.""" # binary(1,-1)/ternary * other_datatype def __init__(self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer): super().__init__(weight_quantizer, input_quantizer, output_quantizer) self.output.is_signed = self.input.is_signed | self.weights.is_signed if any(s in weight_quantizer.name for s in ["binary", "ternary"]): self.output.bits = input_quantizer.bits self.output.int_bits = input_quantizer.int_bits if not input_quantizer.is_signed and weight_quantizer.is_signed: self.output.bits += 1 # multiplier factor for gate counts # gate_factor is the relative energy of given gate comparing # to an Add gate, giving that Add gate is 1 if "binary" in weight_quantizer.name: self.gate_factor = 0.3 else: self.gate_factor = 2 * 0.3 self.gate_bits = input_quantizer.bits else: self.output.bits = weight_quantizer.bits self.output.int_bits = weight_quantizer.int_bits if not weight_quantizer.is_signed and input_quantizer.is_signed: self.output.bits += 1 # multiplier factor for gate counts if input_quantizer.name == "binary": self.gate_factor = 0.3 else: self.gate_factor = 2 * 0.3 self.gate_bits = weight_quantizer.bits if "po2" in output_quantizer.name: if self.output.is_signed: output_quantizer.name = "quantized_po2" else: output_quantizer.name = "quantized_relu_po2" if "po2" in weight_quantizer.name: self.output.max_val_po2 = weight_quantizer.max_val_po2 else: self.output.max_val_po2 = input_quantizer.max_val_po2 self.output.int_bits = self.output.bits @staticmethod def implemented_as(): return "mux" class XorGate(IMultiplier): """Use XorGate for hardware implementation of a multiplier.""" def __init__(self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer): super().__init__(weight_quantizer, input_quantizer, output_quantizer) if output_quantizer.name != "ternary": self.output.bits = max(self.input.bits, self.weights.bits) self.output.int_bits = max(self.input.int_bits, self.weights.int_bits) self.output.is_signed = self.input.is_signed | self.weights.is_signed assert_neither_input_and_weights_is_floating_point(self) self.output.is_floating_point = False self.gate_factor = 0.3 self.gate_bits = 1 @staticmethod def implemented_as(): return "xor" class Shifter(IMultiplier): """shifter gate. po2*qbit is implemented as a shifter. output is qbits type. determin number of bits in the output qbits type: 1. min_exp in po2: number of bits to be expanded on the right (decimal bits) in qbits for example, min_exp = -2 -> po2 =2^min_exp = 2^(-2) : this means, po2*qbit -> qbit value right shifted for 2 bits 2. max_exp in po2: number of bits to be expanded on the left (int_bits) in qbits How to calculate min_exp and max_exp: 1.if po2 is_signed (quantized_po2) *one bit for sign for the entire po2 value; *exp has non_sign_bits = bits - 1 number of bits, *furthermore, 1 bit from non_sign_bits is used as sign bit in exp; *value range for exp is [-2 ** (non_sign_bits - 1), 2 ** (non_sign_bits - 1) - 1] 2.if not_signed (quantized_relu_po2) * 0 bit for the entire po2 value * exp has non_sign_bits = bits * rest is the same as above determine sign bit in the output qbits: 1. qbits no_sign and po2 is_sign: since max_exp and min_exp are computed without sign bit we need to add 1 sign bit to the final result; 2. qbits is_sign: since qbits already has a sign bit, no extra sign bit needed 3. qbits no_sign and po2 no_sign: no extra sign bit needed Attributes: input: input_quantizer weight: weight_quantizer output: output_quantizer gate_factor: relative energy comparing to an Adder gate_bits: number of bits for energy calculation. """ def __init__( self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer ): super().__init__(weight_quantizer, input_quantizer, output_quantizer) # locate the po2 quantizer mode_w = weight_quantizer.mode if mode_w == 1: po2_quantizer = weight_quantizer qbit_quantizer = input_quantizer else: po2_quantizer = input_quantizer qbit_quantizer = weight_quantizer # find min_exp and max_exp of po2 quantizer (min_exp, max_exp) = po2_quantizer.get_min_max_exp() qbits_bits = qbit_quantizer.bits qbits_int_bits = qbit_quantizer.int_bits self.output.bits = int(qbits_bits + max_exp + min_exp) if (not qbit_quantizer.is_signed) and po2_quantizer.is_signed: # if qbit is signed, qbits_bits already has the sign_bit, no need to +1 # if qbit is un_signed, po2 is unsigned, no need to +1 # if qbit is un_signed, po2 is signed, min_exp and max_exp # didnot include sign_bit, # therefore need to +1 self.output.bits += 1 self.output.int_bits = int(qbits_int_bits + max_exp) self.output.is_signed = qbit_quantizer.is_signed | po2_quantizer.is_signed assert_neither_input_and_weights_is_floating_point(self) self.output.is_floating_point = False if po2_quantizer.inference_value_counts > 0: self.gate_factor = po2_quantizer.inference_value_counts * 0.3 self.gate_bits = qbits_bits else: self.gate_factor = 1 b = np.sqrt(2 ** po2_quantizer.bits * qbits_bits) self.gate_bits = b * np.log10(b) @staticmethod def implemented_as(): return "shifter" class AndGate(IMultiplier): """and gate implementation.""" # binary(0,1) * any_datatype def __init__( self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer ): super().__init__(weight_quantizer, input_quantizer, output_quantizer) # if output is ternary, no need for further computation if self.output.name != "ternary": self.output.bits = max(self.input.bits, self.weights.bits) self.output.is_signed = self.input.is_signed | self.weights.is_signed self.output.is_floating_point = self.input.is_floating_point |\ self.weights.is_floating_point if weight_quantizer.name == "binary" and weight_quantizer.use_01: # binary(0,1) * datatype -> int_bits = datatype.int_bits self.output.int_bits = input_quantizer.int_bits else: self.output.int_bits = weight_quantizer.int_bits if "po2" in output_quantizer.name: # binary * po2 if self.output.is_signed: output_quantizer.name = "quantized_po2" else: output_quantizer.name = "quantized_relu_po2" if "po2" in weight_quantizer.name: self.output.max_val_po2 = weight_quantizer.max_val_po2 else: self.output.max_val_po2 = input_quantizer.max_val_po2 self.gate_bits = self.output.bits self.gate_factor = 0.1 @staticmethod def implemented_as(): return "and" class Adder(IMultiplier): """adder implementation.""" def __init__(self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer): super().__init__(weight_quantizer, input_quantizer, output_quantizer) self.output.bits = max(self.input.bits, self.weights.bits) + 1 self.output.int_bits = max(self.input.int_bits, self.weights.int_bits) + 1 self.output.is_signed = self.input.is_signed | self.weights.is_signed assert_neither_input_and_weights_is_floating_point(self) self.output.is_floating_point = False self.output.is_po2 = 1 if self.input.max_val_po2 == -1 or self.weights.max_val_po2 == -1: self.output.max_val_po2 = -1 else: # Adder is two po2_value multiply with each other self.output.max_val_po2 = self.input.max_val_po2 * self.weights.max_val_po2 if "po2" in output_quantizer.name: # po2 * po2 if self.output.is_signed: output_quantizer.name = "quantized_po2" else: output_quantizer.name = "quantized_relu_po2" self.gate_bits = self.output.bits self.gate_factor = 1 @staticmethod def implemented_as(): return "add" class FloatingPointMultiplier(IMultiplier): """multiplier for floating point.""" def __init__(self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer): super().__init__(weight_quantizer, input_quantizer, output_quantizer) self.output.bits = max( self.input.bits * self.input.is_floating_point, self.weights.bits * self.weights.is_floating_point, ) self.output.int_bits = -1 self.output.is_signed = 1 assert self.input.is_floating_point | self.weights.is_floating_point self.output.is_floating_point = True self.gate_factor = 1 self.gate_bits = self.output.bits @staticmethod def implemented_as(): return "mul" class FixedPointMultiplier(IMultiplier): """multiplier for fixed point.""" def __init__(self, weight_quantizer: quantizer_impl.IQuantizer, input_quantizer: quantizer_impl.IQuantizer, output_quantizer: quantizer_impl.IQuantizer): super().__init__(weight_quantizer, input_quantizer, output_quantizer) # Total int bits is the sum of individual int bits. self.output.int_bits = self.input.int_bits + self.weights.int_bits # Total fractional bits is the sum of individual fractional bits fractional_bits1 = (self.input.bits - int(self.input.is_signed) - self.input.int_bits) fractional_bits2 = (self.weights.bits - int(self.weights.is_signed) - self.weights.int_bits) fractional_bits = fractional_bits1 + fractional_bits2 self.output.is_signed = self.input.is_signed | self.weights.is_signed # Total bits is the sum of int bits, fractional bits and sign bit self.output.bits = self.output.int_bits + fractional_bits + int( self.output.is_signed) assert_neither_input_and_weights_is_floating_point(self) self.output.is_floating_point = False self.gate_factor = 1 self.gate_bits = np.sqrt(self.input.bits * self.weights.bits) @staticmethod def implemented_as(): return "mul" ================================================ FILE: qkeras/qtools/quantized_operators/qbn_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """quantized batch normliaztion quantizer implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import math import numpy as np from qkeras.qtools.quantized_operators import adder_factory from qkeras.qtools.quantized_operators import divider_factory from qkeras.qtools.quantized_operators import multiplier_factory from qkeras.qtools.quantized_operators import quantizer_impl class QBNFactory: """determine which quantizer implementation to use. Create an qbn instance. The type and bit width of the output_quantizer is deteremined from gamma, beta, mean and variance quantizer y = gamma * (x - mean)/stddev + beta """ def make_quantizer( self, input_quantizer: quantizer_impl.IQuantizer, gamma_quantizer: quantizer_impl.IQuantizer, beta_quantizer: quantizer_impl.IQuantizer, mean_quantizer: quantizer_impl.IQuantizer, variance_quantizer: quantizer_impl.IQuantizer, use_scale, use_center ): """make a qbn quantizer.""" self.input_quantizer = input_quantizer self.gamma_quantizer = gamma_quantizer self.beta_quantizer = beta_quantizer self.mean_quantizer = mean_quantizer self.variance_quantizer = variance_quantizer self.use_scale = use_scale self.use_center = use_center multiplier = None accumulator = None # convert variance po2 quantizer to stddev po2 quantizer stddev_quantizer = copy.deepcopy(variance_quantizer) if stddev_quantizer.is_po2: if variance_quantizer.max_val_po2 >= 0: stddev_quantizer.max_val_po2 = np.round(math.sqrt( variance_quantizer.max_val_po2)) else: stddev_quantizer.max_val_po2 = variance_quantizer.max_val_po2 stddev_quantizer.bits = variance_quantizer.bits - 1 stddev_quantizer.int_bits = stddev_quantizer.bits divider_instance = divider_factory.IDivider() if use_scale: # gamma/var divider = divider_instance.make_quantizer( gamma_quantizer, stddev_quantizer) # update the actual number of values in divider quantizer during inference count = -1 if gamma_quantizer.is_po2 and gamma_quantizer.inference_value_counts > 0: count = gamma_quantizer.inference_value_counts if stddev_quantizer.is_po2 and stddev_quantizer.inference_value_counts > 0: count *= stddev_quantizer.inference_value_counts else: count = -1 if count > 0: divider.output.inference_value_counts = count # gamma/var * x multiplier_instance = multiplier_factory.MultiplierFactory() multiplier = multiplier_instance.make_multiplier( divider.output, input_quantizer) accumulator_input = multiplier else: # x/var divider = divider_instance.make_quantizer( input_quantizer, stddev_quantizer) accumulator_input = divider if use_center: # y = gamma/var * x + beta accumulator_instance = adder_factory.IAdder() accumulator = accumulator_instance.make_quantizer( accumulator_input.output, beta_quantizer) output_q = accumulator else: output_q = accumulator_input self.internal_divide_quantizer = divider self.internal_multiplier = multiplier self.internal_accumulator = accumulator self.internal_output = output_q ================================================ FILE: qkeras/qtools/quantized_operators/quantizer_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """decides which quantizer implementation to use.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy from qkeras import quantizers # from qkeras.google_internals import experimental_quantizers # from qkeras.google_internals import experimental_quantizer_impl from qkeras.qtools.quantized_operators import quantizer_impl from qkeras.qtools.settings import cfg class QuantizerFactory: """Convert qkeras quantizer to qtools quantizer type.""" def __init__(self): self.quantizer_lookup = { quantizers.quantized_bits: quantizer_impl.QuantizedBits, quantizers.binary: quantizer_impl.Binary, quantizers.quantized_relu: quantizer_impl.QuantizedRelu, quantizers.ternary: quantizer_impl.Ternary, quantizers.quantized_relu_po2: quantizer_impl.ReluPowerOfTwo, quantizers.quantized_po2: quantizer_impl.PowerOfTwo, quantizers.stochastic_ternary: quantizer_impl.StochasticTernary, quantizers.stochastic_binary: quantizer_impl.StochasticBinary, quantizers.bernoulli: quantizer_impl.Bernoulli, quantizers.quantized_tanh: quantizer_impl.QuantizedTanh, quantizers.quantized_ulaw: quantizer_impl.QuantizedUlaw, # experimental_quantizers.quantized_bits_learnable_scale: # experimental_quantizer_impl.QuantizedBitsLearnableScale, # experimental_quantizers.parametric_quantizer_d_xmax: # experimental_quantizer_impl.ParametricQuantizer, # add following quantizer types for the use in GraphUpdateEdge quantizer_impl.QuantizedBits: quantizer_impl.QuantizedBits, quantizer_impl.Binary: quantizer_impl.Binary, quantizer_impl.QuantizedRelu: quantizer_impl.QuantizedRelu, quantizer_impl.Ternary: quantizer_impl.Ternary, quantizer_impl.ReluPowerOfTwo: quantizer_impl.ReluPowerOfTwo, quantizer_impl.PowerOfTwo: quantizer_impl.PowerOfTwo, quantizer_impl.FloatingPoint: quantizer_impl.FloatingPoint, quantizer_impl.StochasticTernary: quantizer_impl.StochasticTernary, quantizer_impl.StochasticBinary: quantizer_impl.StochasticTernary, quantizer_impl.Bernoulli: quantizer_impl.StochasticTernary, quantizer_impl.QuantizedTanh: quantizer_impl.StochasticTernary, quantizer_impl.QuantizedUlaw: quantizer_impl.StochasticTernary, # experimental_quantizer_impl.QuantizedBitsLearnableScale: # experimental_quantizer_impl.QuantizedBitsLearnableScale, #experimental_quantizer_impl.ParametricQuantizer: # experimental_quantizer_impl.ParametricQuantizer, } self._default_interm_quantizer = cfg.default_interm_quantizer def _make_quantizer_util(self, quantizer) -> quantizer_impl.IQuantizer: """make quantizer util function.""" if quantizer in ["int8", "int16", "int32", "fp16", "fp32"]: return self.make_default_quantizer(mode=quantizer) elif isinstance(quantizer, tuple(self.quantizer_lookup.keys())): quantizer_class = self.quantizer_lookup[type(quantizer)] if quantizer_class == type(quantizer): return self.clone_quantizer(quantizer) else: q = quantizer_class() q.convert_qkeras_quantizer(quantizer) return q return None def make_quantizer(self, quantizer) -> quantizer_impl.IQuantizer: """create quantizer according to input qkeras quantizer.""" q = None if quantizer is not None: q = self._make_quantizer_util(quantizer) if q is None: return self.make_default_quantizer( mode=self._default_interm_quantizer) return q def is_quantizer_supported(self, quantizer) -> bool: if quantizer is None: # if None, will use default quantizer defined in config.json return True return isinstance(quantizer, tuple(self.quantizer_lookup.keys())) def make_default_quantizer(self, mode) -> quantizer_impl.IQuantizer: """make quantizer given qkeras quantizer type.""" if mode == "fp32": return quantizer_impl.FloatingPoint( bits=32) elif mode == "fp16": return quantizer_impl.FloatingPoint( bits=16) elif mode == "int8": qbits = quantizer_impl.QuantizedBits() qbits.convert_qkeras_quantizer( quantizers.quantized_bits(8, 0, 1)) return qbits elif mode == "int16": qbits = quantizer_impl.QuantizedBits() qbits.convert_qkeras_quantizer( quantizers.quantized_bits(16, 7, 1)) return qbits elif mode == "int32": qbits = quantizer_impl.QuantizedBits() qbits.convert_qkeras_quantizer( quantizers.quantized_bits(32, 10, 1)) return qbits else: try: # string to quantizer object q_name = "quantizers." + mode qkeras_object = eval(q_name) # pylint: disable=eval-used return self._make_quantizer_util(qkeras_object) except: # pylint: disable=bare-except raise ValueError("unaccepted quantizer {}!".format(mode)) def clone_quantizer( self, quantizer: quantizer_impl.IQuantizer) -> quantizer_impl.IQuantizer: """clone the given quantizer.""" return copy.deepcopy(quantizer) ================================================ FILE: qkeras/qtools/quantized_operators/quantizer_impl.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """atomic quantizer implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import math import numpy as np from qkeras import quantizers FLOATINGPOINT_BITS = 32 def get_np_value(val): if hasattr(val, "numpy"): val = val.numpy() if isinstance(val, np.ndarray) and len(val) == 1: return val[0] else: return val else: return val def get_exp(quantizer): """get max/min exp value for relu_po2 or quantized_po2.""" if quantizer.is_signed: non_sign_bits = quantizer.bits - 1 else: non_sign_bits = quantizer.bits min_exp = -2 ** (non_sign_bits - 1) max_exp_orig = 2 ** (non_sign_bits - 1) - 1 max_exp = max_exp_orig # max_value caps how many int_bits actually allowed if quantizer.max_val_po2 != -1: if quantizer.max_val_po2 <= 0: max_exp = 0 else: max_exp = math.ceil(np.log2(quantizer.max_val_po2)) max_exp = min(max_exp, max_exp_orig) # if max_exp<0. no need to expand int_bits max_exp = max(0, max_exp) return (-min_exp, max_exp) class IQuantizer(abc.ABC): """abstract class for quantizer.""" def __init__(self): self.mode = -1 self.bits = -1 self.int_bits = -1 self.is_signed = 0 self.is_floating_point = False self.max_val_po2 = -1 self.is_po2 = 0 self.name = None self.op_type = "quantizer" class QuantizedBits(IQuantizer): """quantized bits. Attributes: mode: index of the current quantizer in MultiplierFactory.multiplier_impl_table bits: total bits int_bits: integer bits is_signed: if a signed number name: quantizer name """ def __init__(self): super().__init__() self.mode = 0 self.is_signed = 1 self.name = "quantized_bits" def convert_qkeras_quantizer( self, quantizer: quantizers.quantized_bits): self.mode = 0 self.bits = quantizer.bits self.int_bits = get_np_value(quantizer.integer) self.is_signed = quantizer.keep_negative def convert_to_qkeras_quantizer( self, symmetric=1, alpha=None, use_stochastic_rounding=False, scale_axis=None, qnoise_factor=1.0, elements_per_scale=None, min_po2_exponent=None, max_po2_exponent=None): """convert qtools quantizer to qkeras quantizer.""" return quantizers.quantized_bits( bits=self.bits, integer=self.int_bits, keep_negative=self.is_signed, symmetric=symmetric, alpha=alpha, use_stochastic_rounding=use_stochastic_rounding, scale_axis=scale_axis, qnoise_factor=qnoise_factor, elements_per_scale=elements_per_scale, min_po2_exponent=min_po2_exponent, max_po2_exponent=max_po2_exponent) class QuantizedTanh(QuantizedBits): """same as quantized bits.""" def __init__(self): super().__init__() self.name = "quantized_tanh" def convert_qkeras_quantizer( self, quantizer: quantizers.quantized_tanh): self.mode = 0 self.bits = quantizer.bits self.is_signed = 1 def convert_to_qkeras_quantizer( self, symmetric=False, use_stochastic_rounding=False): """convert qtools quantizer to qkeras quantizer.""" return quantizers.quantized_tanh( bits=self.bits, use_stochastic_rounding=use_stochastic_rounding, symmetric=symmetric) class QuantizedUlaw(QuantizedBits): """quantized ulaw type.""" # same as quantized bits def __init__(self): super().__init__() self.name = "quantized_ulaw" def convert_qkeras_quantizer( self, quantizer: quantizers.quantized_ulaw): self.mode = 0 self.bits = quantizer.bits self.int_bits = get_np_value(quantizer.integer) self.is_signed = 1 def convert_to_qkeras_quantizer(self, symmetric=0, u=255.0): """convert qtools quantizer to qkeras quantizer.""" return quantizers.quantized_ulaw( bits=self.bits, integer=self.int_bits, symmetric=symmetric, u=u) class Binary(IQuantizer): """binary quantizer.""" def __init__(self, use_01=False): super().__init__() if use_01: self.mode = 4 self.is_signed = 0 else: self.mode = 3 self.is_signed = 1 self.bits = 1 self.int_bits = 1 self.use_01 = use_01 self.name = "binary" def convert_qkeras_quantizer(self, quantizer: quantizers.binary): if quantizer.use_01: self.mode = 4 self.is_signed = 0 else: self.mode = 3 self.is_signed = 1 self.use_01 = quantizer.use_01 def convert_to_qkeras_quantizer(self, alpha=None, use_stochastic_rounding=False): """convert qtools quantizer to qkeras quantizer.""" return quantizers.binary(use_01=self.use_01, alpha=alpha, use_stochastic_rounding=use_stochastic_rounding) class StochasticBinary(Binary): """stochastic binary quantizer.""" # same as binary(-1, 1) def __init__(self): super().__init__(use_01=False) self.name = "stochastic_binary" def convert_qkeras_quantizer( self, quantizer: quantizers.stochastic_binary): """convert qkeras quantizer to qtools quantizer.""" pass def convert_to_qkeras_quantizer(self, alpha=None, temperature=6.0, use_real_sigmoid=True): """convert qtools quantizer to qkeras quantizer.""" return quantizers.stochastic_binary(alpha=alpha, temperature=temperature, use_real_sigmoid=use_real_sigmoid) class Bernoulli(Binary): """bernoulli quantizer. same as binary(0, 1).""" def __init__(self): super().__init__(use_01=True) self.name = "bernoulli" def convert_qkeras_quantizer(self, quantizer: quantizers.bernoulli): pass def convert_to_qkeras_quantizer(self, alpha=None, temperature=6.0, use_real_sigmoid=True): """convert qtools quantizer to qkeras quantizer.""" return quantizers.bernoulli(alpha=alpha, temperature=temperature, use_real_sigmoid=use_real_sigmoid) class QuantizedRelu(IQuantizer): """quantized relu quantizer.""" def __init__(self): super().__init__() self.is_signed = 0 self.name = "quantized_relu" def convert_qkeras_quantizer( self, quantizer: quantizers.quantized_relu): """convert from qkeras quantizer.""" bits = quantizer.bits int_bits = get_np_value(quantizer.integer) if bits == 1 and int_bits == 1: mode = 4 else: mode = 0 self.mode = mode self.bits = bits self.int_bits = int_bits if hasattr(quantizer, "negative_slope") and quantizer.negative_slope != 0: self.is_signed = 1 def convert_to_qkeras_quantizer( self, use_sigmoid=0, negative_slope=0.0, use_stochastic_rounding=False, relu_upper_bound=None, is_quantized_clip=True, qnoise_factor=1.0): """convert qtools quantizer to qkeras quantizer.""" return quantizers.quantized_relu( bits=self.bits, integer=self.int_bits, use_sigmoid=use_sigmoid, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, relu_upper_bound=relu_upper_bound, is_quantized_clip=is_quantized_clip, qnoise_factor=qnoise_factor) class Ternary(IQuantizer): """ternary(0, 1, -1).""" def __init__(self): super().__init__() self.mode = 2 self.bits = 2 self.int_bits = 2 self.is_signed = 1 self.name = "ternary" def convert_qkeras_quantizer( self, quantizer: quantizers.ternary): pass def convert_to_qkeras_quantizer( self, alpha=None, threshold=None, use_stochastic_rounding=False, number_of_unrolls=5): """convert qtools quantizer to qkeras quantizer.""" return quantizers.ternary( alpha=alpha, threshold=threshold, use_stochastic_rounding=use_stochastic_rounding, number_of_unrolls=number_of_unrolls) class StochasticTernary(Ternary): """stochastic ternary.""" def __init__(self): super().__init__() self.name = "stochastic_ternary" # same as ternary def convert_qkeras_quantizer( self, quantizer: quantizers.stochastic_ternary): pass def convert_to_qkeras_quantizer( self, alpha=None, threshold=None, temperature=8.0, use_real_sigmoid=True, number_of_unrolls=5): """convert qtools quantizer to qkeras quantizer.""" return quantizers.stochastic_ternary( alpha=alpha, threshold=threshold, temperature=temperature, use_real_sigmoid=use_real_sigmoid, number_of_unrolls=number_of_unrolls) class FloatingPoint(IQuantizer): """float32.""" def __init__(self, bits): super().__init__() self.mode = 5 self.bits = bits self.int_bits = -1 self.is_signed = 1 self.is_floating_point = True self.name = "floating_point" def convert_qkeras_quantizer(self, bits): pass def convert_to_qkeras_quantizer(self, bits): pass class PowerOfTwo(IQuantizer): """po2.""" def __init__(self, is_signed=True): super().__init__() self.mode = 1 self.is_po2 = 1 self.is_signed = is_signed self.inference_value_counts = -1 if is_signed: self.name = "quantized_po2" else: self.name = "quantized_relu_po2" def convert_qkeras_quantizer(self, quantizer): """convert qkeras quantizer to qtools quantizer.""" assert "po2" in quantizer.__class__.__name__ if quantizer.__class__.__name__ == "quantized_po2": self.is_signed = 1 self.name = "quantized_po2" elif quantizer.__class__.__name__ == "quantized_relu_po2": super().__init__() self.is_signed = 0 self.name = "quantized_relu_po2" bits = quantizer.bits max_val_po2 = quantizer.max_value if not max_val_po2: self.max_val_po2 = -1 else: self.max_val_po2 = max_val_po2 self.bits = bits self.int_bits = bits def convert_to_qkeras_quantizer( self, negative_slope=0, use_stochastic_rounding=False, quadratic_approximation=False): """convert qtools quantizer to qkeras quantizer.""" if self.is_signed: # quantized_po2 return quantizers.quantized_po2( bits=self.bits, max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None, use_stochastic_rounding=use_stochastic_rounding, quadratic_approximation=quadratic_approximation) else: # quantized_relu_po2 return quantizers.quantized_relu_po2( bits=self.bits, max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, quadratic_approximation=quadratic_approximation) def get_min_max_exp(self): return get_exp(self) def quantizer_bits_calculator(self, val): """calculate how many bits needed.""" # calculate how many bits are required to represent a po2 value. # val can be +/- values, can be integer or franctional number. # needs to be dealt seperately. sign_bit = val < 0 # get rid of sign val = abs(val) if val == 0: # val of 0 is special case; qkeras uses mininmum # number to represent 0 non_sign_bits = self.bits - sign_bit else: exp_value = np.log2(val) # exp_value should be integer if abs(np.round(exp_value) - exp_value) > 0: raise ValueError("ERROR: {} is not a po2 value!".format(val)) exp_value = int(exp_value) # for n bits, the range of values it can represent is: # min_val = -2 ** (n - 1) # max_val = 2 ** (n - 1) - 1 if exp_value == 0: non_sign_bits = 1 elif exp_value > 0: # e.g., 16 needs 5 bits + 1 exp sign bit, # 15 needs 4 bits + 1 exp sign bit non_sign_bits = math.floor(np.log2(exp_value)) + 1 + 1 else: # e.g., -16 needs 4 bits + 1 exp sign bit non_sign_bits = math.ceil(np.log2(abs(exp_value))) + 1 return (sign_bit, non_sign_bits) def update_quantizer(self, val, reset=False): """update quantizer bits according to the input value. Args: val: input value reset: True->disregard current quantizer bits and reset it according to the given value; False-> update the quantizer bits with given value. quantizer.bits = min(existing_bits, bits required by val) Returns: Update existing po2 quantizer bits by val. quantizer.bits = min(existing_bits, bits required by val) """ (sign_bit, non_sign_bits) = self.quantizer_bits_calculator(val) if reset: self.bits = sign_bit + non_sign_bits else: # avoid input value exceeding quantizer limit self.bits = min(self.bits, sign_bit + non_sign_bits) self.int_bits = self.bits self.max_val_po2 = min(val, self.max_val_po2) self.is_signed = sign_bit if sign_bit: self.name = "quantized_po2" else: self.name = "quantized_relu_po2" def update_inference_values(self, weights): """find how many different values in weights in the po2 quantizer.""" inference_value_counts = len(set(weights.flatten())) self.inference_value_counts = inference_value_counts class ReluPowerOfTwo(PowerOfTwo): """relu po2.""" def __init__(self): super().__init__() self.mode = 1 self.is_po2 = 1 self.is_signed = 0 self.name = "quantized_relu_po2" def convert_qkeras_quantizer( self, quantizer: quantizers.quantized_relu_po2): self.bits = quantizer.bits self.int_bits = quantizer.bits if not quantizer.max_value: self.max_val_po2 = -1 else: self.max_val_po2 = quantizer.max_value def convert_to_qkeras_quantizer( self, negative_slope=0, use_stochastic_rounding=False, quadratic_approximation=False): """convert qtools quantizer to qkeras quantizer.""" # quantized_relu_po2 return quantizers.quantized_relu_po2( bits=self.bits, max_value=self.max_val_po2 if self.max_val_po2 >= 0 else None, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, quadratic_approximation=quadratic_approximation) ================================================ FILE: qkeras/qtools/quantized_operators/subtractor_factory.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """"create subtractor quantizer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from qkeras.qtools.quantized_operators import adder_factory from qkeras.qtools.quantized_operators import adder_impl from qkeras.qtools.quantized_operators import quantizer_impl class ISubtractor(adder_factory.IAdder): """Create a subtractor instance. The methods in subtractor is mostly inherited from adder with a few exceptions. """ def make_quantizer(self, quantizer_1: quantizer_impl.IQuantizer, quantizer_2: quantizer_impl.IQuantizer): """make an ISubtractor instance. if quantizer1 and quantizer2 are both non-signed, result should change to signed; else since a sign bit is already present, no need to add extra sign bit Args: quantizer_1: first operand quantizer_2: second operand Returns: An ISubtractor instance """ quantizer = super().make_quantizer(quantizer_1, quantizer_2) if not isinstance(quantizer, adder_impl.FloatingPoint_Adder): if not quantizer_1.is_signed and not quantizer_2.is_signed: quantizer.output.is_signed = 1 quantizer.output.bits += 1 return quantizer ================================================ FILE: qkeras/qtools/run_qtools.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Interface for running qtools and qenergy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import numpy as np from qkeras.qtools import generate_layer_data_type_map from qkeras.qtools import interface from qkeras.qtools import qgraph from qkeras.qtools import qtools_util from qkeras.qtools.config_public import config_settings from qkeras.qtools.qenergy import qenergy from qkeras.qtools.settings import cfg class QTools: """integration of different qtools functions.""" def __init__(self, model, process, source_quantizers=None, is_inference=False, weights_path=None, keras_quantizer=None, keras_accumulator=None, for_reference=False, model_weights_already_quantized=True, hw_weight_dict=None): if model is not None: self._model = model if weights_path is not None: self._model.load_weights(weights_path) cfg.update(process, config_settings) # if source_quantizers is None, CreateGraph will use # default_source_quantizers defined in cfg (graph, source_quantizer_list) = qgraph.CreateGraph( model, source_quantizers, cfg.default_source_quantizer) # qgraph.PrintGraph(graph) qgraph.GraphPropagateActivationsToEdges(graph) self._layer_map = generate_layer_data_type_map.generate_layer_data_type_map( graph, source_quantizer_list, is_inference, keras_quantizer, keras_accumulator, for_reference, model_weights_already_quantized=model_weights_already_quantized, hw_weight_dict=hw_weight_dict) self._output_dict = interface.map_to_json(self._layer_map) self.source_quantizer_list = source_quantizer_list def qtools_stats_to_json(self, json_name): """dump the layer stats to a json file.""" with open(json_name, "w") as outfile: json.dump(self._output_dict, outfile, indent=4) def qtools_stats_print(self): """print out the layer stats.""" dict_to_json = json.dumps(self._output_dict, indent=4) print(dict_to_json) def pe(self, weights_on_memory="dram", activations_on_memory="dram", min_sram_size=0, rd_wr_on_io=True, verbose=False): """energy consumption calculation.""" assert weights_on_memory in ["dram", "sram", "fixed"] energy_dict = qenergy.energy_estimate( self._model, self._layer_map, weights_on_memory, activations_on_memory, min_sram_size, rd_wr_on_io) if verbose: print("COST:") dict_to_json = json.dumps(energy_dict, indent=4) print(dict_to_json) return energy_dict def extract_energy_sum(self, cfg_setting, energy_dict): """extracted energy needed in caculating sum.""" value = 0 for layer in energy_dict.keys(): if layer == "total_cost": continue class_name = energy_dict[layer]["class_name"] keys = cfg_setting.get(class_name, cfg_setting.get("default", [])) value += sum([energy_dict[layer]["energy"][key] for key in keys]) return int(value) def extract_energy_profile(self, cfg_setting, energy_dict): """extract energy consumption in each layer.""" energy = {} for layer in energy_dict.keys(): if layer == "total_cost": continue class_name = energy_dict[layer]["class_name"] keys = cfg_setting.get(class_name, cfg_setting.get("default", [])) energy[layer] = {} energy[layer]["energy"] = energy_dict[layer]["energy"] energy[layer]["total"] = sum( [energy_dict[layer]["energy"][key] for key in keys]) return energy def calculate_ace(self, default_float_bits): """Computes ACE numbers from conv/dense layers.""" def _get_ace(layer): ace = 0 ace_float = 0 if layer.name in self._output_dict: layer_item = self._output_dict[layer.name] # Here we only consider the number of multiplication as the # operation count. To include the number of # accumulators, we should multiply the value by 2, assuming # accumulation count ~= multiplication count. operation_count = layer_item["operation_count"] # Input bitwidth. input_quantizer_list = layer_item["input_quantizer_list"] input_bits = input_quantizer_list[0]["bits"] # Weight bitwidth weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer") if weight_quantizer: # Only layers such as Conv/Dense have weight_quantizers. w_bits = weight_quantizer["bits"] ace = operation_count * input_bits * w_bits ace_float = operation_count * default_float_bits * default_float_bits return (ace, ace_float) print("WARNING: ACE are computed from conv/dense layers only!") return (sum([_get_ace(l)[0] for l in self._model.layers]), sum([_get_ace(l)[1] for l in self._model.layers])) def calculate_output_bytes(self, include_model_input_size, default_float_bits): """Computes activation layers' output size in bytes.""" def _get_activation_size(layer): # Since in hardare previous conv/dense layers will be fused with # the following activation layers, we only consider the output of # Activation layers when calculating output sizes. if layer.__class__.__name__ in ["QActivation"]: layer_item = self._output_dict[layer.name] output_quantizer = layer_item["output_quantizer"] output_shape = output_quantizer["shape"] o_bits = output_quantizer["bits"] return (int(np.prod(output_shape[1:]) * o_bits / 8.0), int(np.prod(output_shape[1:]) * default_float_bits / 8.0)) else: return (0, 0) output_bytes = sum([_get_activation_size(l)[0] for l in self._model.layers]) output_bytes_float = sum([_get_activation_size(l)[1] for l in self._model.layers]) if include_model_input_size: # Include model input size. output_bytes += (np.prod(self._model.input_shape[1:]) * self.source_quantizer_list[0].bits / 8.0) output_bytes_float += (np.prod(self._model.input_shape[1:]) * default_float_bits/ 8.0) return (output_bytes, output_bytes_float) def calculate_weight_bytes(self, default_float_bits): """Computes weight size in bytes from conv/dense layers.""" def _get_weight_size(layer): weight_bytes = 0 weight_bytes_float = 0 if layer.name in self._output_dict: layer_item = self._output_dict[layer.name] weight_quantizer = qtools_util.get_val(layer_item, "weight_quantizer") if weight_quantizer: # Calculates kernel bytes. w_bits = weight_quantizer["bits"] weight_bytes += int(np.prod(layer.weights[0].shape) * w_bits / 8.0) weight_bytes_float += int(np.prod(layer.weights[0].shape) * default_float_bits / 8.0) # Calculates bias bytes. if hasattr(layer, "use_bias") and layer.use_bias: bias_quantizer = qtools_util.get_val(layer_item, "bias_quantizer") assert bias_quantizer is not None, ( f"{layer.name} has no bias_quantizer!") b_bits = bias_quantizer["bits"] weight_bytes += int(np.prod(layer.weights[1].shape) * b_bits / 8.0) weight_bytes_float += int(np.prod(layer.weights[1].shape) * default_float_bits / 8.0) return (weight_bytes, weight_bytes_float) return (sum([_get_weight_size(l)[0] for l in self._model.layers]), sum([_get_weight_size(l)[1] for l in self._model.layers])) def get_roofline_numbers(self, include_model_input_size=True, default_float_bits=32): """Extracts model numbers for roofline model analysis.""" return {"ACE": self.calculate_ace(default_float_bits)[0], "weight_in_bytes": self.calculate_weight_bytes( default_float_bits)[0], "activation_in_bytes": self.calculate_output_bytes( include_model_input_size, default_float_bits)[0], "ACE_float": self.calculate_ace( default_float_bits)[1], "weight_in_bytes_float": self.calculate_weight_bytes( default_float_bits)[1], "activation_in_bytes_float": self.calculate_output_bytes( include_model_input_size, default_float_bits)[1]} ================================================ FILE: qkeras/qtools/settings.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """configurations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np class ConfigClass: """configuration class.""" def __init__(self): self.default_source_quantizer = "quantized_bits(8, 0, 1)" self.default_interm_quantizer = "fp32" # Horowitz estimates from ISSCC 2014 self.fpm_add = np.poly1d([0.003125, 0]) self.fpm_mul = np.poly1d([0.002994791667, 0.001041666667, 0]) self.fp16_add = np.poly1d([0.4]) self.fp16_mul = np.poly1d([1.1]) self.fp32_add = np.poly1d([0.9]) self.fp32_mul = np.poly1d([3.7]) self.sram_rd = np.poly1d([0.02455, -0.2656, 0.8661]) self.dram_rd = np.poly1d([20.3125, 0]) self.sram_mul_factor = 1/64. self.dram_mul_factor = 1.0 self.include_energy = {} self.include_energy["default"] = ["inputs", "parameters", "op_cost"] self.include_energy["QActivation"] = ["outputs"] self.include_energy["QAdaptiveActivation"] = ["outputs"] self.include_energy["Activation"] = ["outputs"] self.include_energy["QBatchNormalization"] = ["parameters"] self.include_energy["BatchNormalization"] = ["parameters"] self.include_energy["Add"] = ["op_cost"] self.include_energy["Subtract"] = ["op_cost"] self.include_energy["MaxPooling2D"] = ["op_cost"] self.include_energy["default"] = ["inputs", "parameters", "op_cost"] def update(self, process, cfg_setting): """update config.""" # pylint: disable=bare-except try: self.default_source_quantizer = cfg_setting[ "default_source_quantizer"] except: pass try: self.default_interm_quantizer = cfg_setting[ "default_interm_quantizer"] except: pass try: self.fpm_add = np.poly1d(cfg_setting[process]["fpm_add"]) except: pass try: self.fpm_mul = np.poly1d(cfg_setting[process]["fpm_mul"]) except: pass try: self.fp16_add = np.poly1d(cfg_setting[process]["fp16_add"]) except: pass try: self.fp16_mul = np.poly1d(cfg_setting[process]["fp16_mul"]) except: pass try: self.fp32_add = np.poly1d(cfg_setting[process]["fp32_add"]) except: pass try: self.fp32_mul = np.poly1d(cfg_setting[process]["fp32_mul"]) except: pass try: self.sram_rd = np.poly1d(cfg_setting[process]["sram_rd"]) except: pass try: self.dram_rd = np.poly1d(cfg_setting[process]["dram_rd"]) except: # pylint: disable=broad-except pass try: for key in cfg_setting["include_energy"]: self.include_energy[key] = cfg_setting["include_energy"][key] if "Q" == key[0]: # use the same rule for keras layer and qkeras layer self.include_energy[key[1:]] = cfg_setting["include_energy"][key] except: pass cfg = ConfigClass() ================================================ FILE: qkeras/quantizer_imports.py ================================================ # Copyright 2025 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Imports for QKeras quantizers.""" from .quantizers import bernoulli from .quantizers import binary from .quantizers import quantized_bits from .quantizers import quantized_hswish from .quantizers import quantized_linear from .quantizers import quantized_po2 from .quantizers import quantized_relu from .quantizers import quantized_relu_po2 from .quantizers import quantized_sigmoid from .quantizers import quantized_tanh from .quantizers import quantized_ulaw from .quantizers import stochastic_binary from .quantizers import stochastic_ternary from .quantizers import ternary ================================================ FILE: qkeras/quantizer_registry.py ================================================ # Copyright 2024 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Registry for QKeras quantizers.""" from . import registry # Global registry for all QKeras quantizers. _QUANTIZERS_REGISTRY = registry.Registry() def register_quantizer(quantizer): """Decorator for registering a quantizer.""" _QUANTIZERS_REGISTRY.register(quantizer) # Return the quantizer after registering. This ensures any registered # quantizer class is properly defined. return quantizer def lookup_quantizer(name): """Retrieves a quantizer from the quantizers registry.""" return _QUANTIZERS_REGISTRY.lookup(name) ================================================ FILE: qkeras/quantizers.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import re from typing import Any, List, Tuple, cast import numpy as np import six from six.moves import range import tensorflow.compat.v2 as tf from tensorflow.keras import initializers import tensorflow.keras.backend as K from tensorflow.keras.utils import deserialize_keras_object from . import base_quantizer from . import quantizer_registry # from .google_internals.experimental_quantizers import parametric_quantizer_d_xmax # from .google_internals.experimental_quantizers import quantized_bits_learnable_scale from .safe_eval import safe_eval from tensorflow.python.framework import smart_cond as tf_utils # # Library of auxiliary functions # def get_weight_scale(quantizer, x=None): """Gets the scales of weights for (stochastic_)binary and ternary quantizers. Arguments: quantizer: A binary or teneray quantizer class. x: A weight tensor. We keep it here for now for backward compatibility. Returns: Weight scale per channel for binary and ternary quantizers with auto or auto_po2 alpha/threshold. """ if hasattr(quantizer, "scale") and quantizer.scale is not None: return K.eval(quantizer.scale) return 1.0 def _get_integer_bits(min_value, max_value, bits=8, symmetric=False, keep_negative=False, is_clipping=True): """Estimates the integer bit(number of bits to the left of the binary point) satisfying the input argument constraints. Args: min_value: A tensor object. Its elements are in float representing the minimum values of ranges. max_value: A tensor object. Its elements are in float representing the maximum values of ranges. bits: number of bits to perform quantization. symmetric: boolean type. if true, it enforces negative and positive ranges to be symmetric. keep_negative: boolean type. if true, we do not clip negative numbers. is_clipping: boolean type. if true, the min_value and max_value are clipped to nearest powers-of-2. Returns: integer_bits : number of bits to the left of the binary point. """ # Max the min and max values positive if only using positive values if not keep_negative: min_value = K.maximum(min_value, 0) max_value = K.maximum(max_value, 0) # The number of bits excluding the sign bit unsigned_bits = bits - keep_negative # log2 of absolute min_value and max_value min_value_log2 = K.log(K.abs(min_value)) / np.log(2.0) max_value_log2 = K.log(K.abs(max_value)) / np.log(2.0) # Estimate integer_bits if is_clipping: min_int_bits = tf.math.round( tf.where(min_value_log2 > 0, min_value_log2, 0)) max_int_bits = tf.math.round( tf.where(max_value_log2 > 0, max_value_log2, 0)) else: min_int_bits = tf.math.ceil(tf.where(min_value_log2 > 0, min_value_log2, 0)) max_int_bits = tf.math.ceil(tf.where(max_value_log2 > 0, max_value_log2, 0)) # Checks max_value is bounded by the maximum positive value of # pow(2,integer_bits) - pow(2,-fractional_bits). max_value_po2 = pow(2.0, max_int_bits) - pow( 2.0, K.minimum(max_int_bits - unsigned_bits, 0)) max_int_bits = tf.where(max_value <= max_value_po2, max_int_bits, max_int_bits + 1) if symmetric: # Checks min_value is bounded by the minimum negative value of # - pow(2,integer_bits) + pow(2,-fractional_bits). min_value_po2 = -pow(2.0, min_int_bits) + pow( 2.0, K.minimum(min_int_bits - unsigned_bits, 0)) min_int_bits = tf.where(min_value_po2 <= min_value, min_int_bits, min_int_bits + 1) # To cover both negative and positive ranges with integer_bits. # (For keep_negative=False, min_int_bits is 0.) integer_bits = tf.cast(K.maximum(min_int_bits, max_int_bits), dtype=tf.int32) # It assumes that integer_bits cannot be greater than unsigned_bits integer_bits = K.minimum(unsigned_bits, integer_bits) return integer_bits def _get_scaling_axis(scale_axis: Any, len_axis: int) -> List[int]: """Get the axis/axes to perform auto scaling at. Args: scale_axis: int or List[int] representing which axis/axes to calculate scale at. len_axis: int representing the shape of the tensor on which scaling is performed. Returns: List[int] representing the scaling axes. """ if scale_axis is not None: # if scale_axis is set, scale over all axis except the scale_axis. if isinstance(scale_axis, list): axis = [i for i in range(len_axis) if i not in scale_axis] else: axis = tf.range(scale_axis) axis = tf.concat([axis, tf.range(scale_axis + 1, len_axis)], axis=0) else: # if scale_axis is not set, scale over all axis except the channel axis. if K.image_data_format() == "channels_last": axis = tf.range(tf.math.maximum(len_axis - 1, 0)) else: axis = tf.range(1, len_axis) return axis def _get_unrolled_shape(input_shape: List[int], unroll_factor: Any, unroll_axis: Any) -> Tuple[List[int], Any]: """Gets the shape of the unrolled tensor given unroll_factor and unroll_axis. Both unroll_factor and unroll_axis can either be ints or List[int]. If they are List[int], their lengths must match, and their values represent every unroll axis and its corresponding unroll factor. Examples: 1. If input_shape = [16, 32], the unroll_factor = 4, and unroll_axis = 1. This means that axis 1 of the input should be unrolled by a factor of 4. This function would return a tuple; the first element represents the unrolled shape [16, 8, 4], and the second element represents the updated unroll axis in the unrolled shape which, in this case, is still 1. 2. If input_shape = [16, 32], the unroll_factor = [2, 4], and unroll_axis = [0, 1]. This means that axis 0 of the input should be unrolled by a factor of 2, while axis 1 of the input should be unrolled by a factor of 4. This function would return a tuple; the first element represents the unrolled shape [4, 2, 8, 4], and the second element represents the updated unroll axis in the unrolled shape which, in this case, will be [0, 2]. Args: input_shape: List[int]. The shape of the input tensor to be unrolled. unroll_factor: int or List[int] representing the unrolling factor(s) across various dimensions of the input tensors. If a list is used, its length has to match unroll_axis. unroll_axis: int or List[int] representing which axis/axes to unroll. If a list is used, its length has to match unroll_factor. Returns: Tuple of (List of ints representing the shape of the unrolled tensor, Int or List[int] representing updated scale_axis after unrolling. """ def _unroll_one_axis(shape, factor, axis): shape[axis] = shape[axis] // factor shape.insert(axis + 1, factor) unrolled_shape = input_shape.copy() if isinstance(unroll_factor, int) and isinstance(unroll_axis, int): unrolled_scale_axis = unroll_axis _unroll_one_axis(unrolled_shape, unroll_factor, unroll_axis) elif isinstance(unroll_factor, list) and isinstance(unroll_axis, list): # axis_shift shifts the pre-defined axis every time we add a new # unrolled axis assert len(unroll_axis) == len(unroll_factor), ( "unroll_axis and unroll_factor must have the same length") unrolled_scale_axis = unroll_axis.copy() axis_shift = 0 for idx, (axis, factor) in enumerate(zip(unroll_axis, unroll_factor)): unrolled_scale_axis[idx] += axis_shift _unroll_one_axis(unrolled_shape, factor, axis+axis_shift) axis_shift += 1 else: raise ValueError( "Both unroll_factor and unroll_axis has to be either ints or lists" ) return unrolled_shape, unrolled_scale_axis def _get_rolled_back_shape(input_shape: List[int], roll_axis: Any) -> List[int]: """Gets the shape of the rolled back tensor given roll_axis. If roll_axis is an int, the input shape will be rolled back once along the roll_axis. If roll_axis is List[int], the input shape will be rolled back len(roll_axis) times. Examples: 1. If input_shape = [4, 2, 8, 4] and roll_axis = 1. This means that the axis following axis 1 will be rolled back to axis 1. This function would return a the rolled back shape which is [4, 16, 4] in this case. 2. If input_shape = [4, 2, 8, 4] and roll_axis = [0, 2]. This means that the axis following axis 0 will be rolled back to axis 0, and the axis following axis 2 will be rolled back to axis 2. This function would return the rolled back shape which is [16, 32] in this case. Args: input_shape: List[int]. The shape of the input tensor to be rolled back. roll_axis: int or List[int] representing which axis/axes of the tensor to roll back. Returns: List of ints representing the shape of the rolled back tensor. """ def _roll_back_one_axis(shape, axis): shape[axis] *= shape[axis+1] shape.pop(axis + 1) rolled_shape = input_shape.copy() if isinstance(roll_axis, int): _roll_back_one_axis(rolled_shape, roll_axis) elif isinstance(roll_axis, list): # axis_shift shifts the pre-defined axis every time we roll back an axis. axis_shift = 0 for axis in roll_axis: _roll_back_one_axis(rolled_shape, axis+axis_shift) axis_shift -= 1 return rolled_shape def _validate_axis_and_eps(x_shape: List[int], scale_axis: Any, elements_per_scale: Any) -> Tuple[Any, Any]: """Validates scale_axis and elements_per_scale. This function verifies that the values for scale_axis and elements_per_scale are valid and perform any required transformations returning a Tuple of verified (scale_axis, elements_per_scale) This fuction accepts scale_axis and elements_per_scale to be either ints or list of ints, so it verifies 4 different scenarios: 1. If both scale_axis and elements_per_scale are ints. The function verifies that the x_shape is divisible by elements_per_scale at the scale_axis. 2. If scale_axis is an int while elements_per_scale is a list. The function raises an error since this is an ambigious state. 3. If scale_axis is a list and elements_per_scale is an int. The function modifies elements_per_scale to a list of length scale_axis, and it verifies that the x_shape is divisible by the elements_per_scale at the corresponding scale_axis. 4. If scale_axis is a list and elements_per_scale is a list. The function verifies that the length of the two lists match, and that the x_shape is divisible by the corresponding elements_per_scale at the corresponding scale_axis. Examples: - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=4 --> Valid - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=3 --> Invalid - Input_shape=[16, 32, 4], scale_axis=0, and elements_per_scale=[2, 4] --> Invalid - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=2 --> Valid - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=[2, 4] --> Valid - Input_shape=[16, 32, 4], scale_axis=[0, 1], and elements_per_scale=[1, 2, 4] --> Invalid Args: x_shape: List[int] representing the shape of the input tensor. scale_axis: Int or List[int] representing the axis/axes to perform auto scaling at. elements_per_scale: Int or List[int] representing the number of elements/values associated with every scale along the corresponding scale_axis. Returns: A Tuple of verified (scale_axis, elements_per_scale). """ assert ( scale_axis is not None ), "scale_axis must be set if elements_per_scale is used." # if both are ints if isinstance(scale_axis, int) and isinstance(elements_per_scale, int): assert x_shape[scale_axis] % elements_per_scale == 0, ( f"scaling axis of dimension {x_shape[scale_axis]} has to be divisible " f"by thenumber of elements per scale, given {elements_per_scale}." ) # if scale_axis is int and elements_per_scale is a list of ints elif isinstance(scale_axis, int) and isinstance(elements_per_scale, list): raise ValueError( f"scale_axis is an integer {scale_axis}, " f"while {elements_per_scale} is a list of values which is ambigious." ) # if scale_axis is list of ints and elements_per_scale is an int elif isinstance(scale_axis, list) and isinstance(elements_per_scale, int): for axis in scale_axis: assert x_shape[axis] % elements_per_scale == 0, ( f"scaling axis of dimension {x_shape[axis]} has to be divisible by " f"number of elements per scale, given {elements_per_scale}." ) # duplicate the elements_per_scale to match length of scale_axis elements_per_scale = [elements_per_scale] * len(scale_axis) # if both scale_axis and elements_per_scale are lists else: assert len(scale_axis) == len( elements_per_scale ), (f"both scale_axis and elements_per_scale lists must match in length; " f"Got {len(scale_axis)} and {len(elements_per_scale)}") for axis, eps in zip(scale_axis, elements_per_scale): assert x_shape[axis] % eps == 0, ( f"scaling axis of dimension {x_shape[axis]} has to be divisible by" f" the corresponding number of elements per scale, given {eps}." ) assert ( isinstance(scale_axis, int) and isinstance(elements_per_scale, int) ) or (isinstance(scale_axis, list) and isinstance(elements_per_scale, list)) return scale_axis, elements_per_scale def _repeat_along_axis(x: tf.Tensor, axis: int, repeats: int) -> tf.Tensor: """Repeats the elements in a tensor along the specified axis.""" return tf.repeat(x, repeats=repeats, axis=axis) def _repeat_along_axes(x: tf.Tensor, axis: Any, repeats: Any) -> tf.Tensor: """Repeats the elements in a tensor along the specified axes.""" if isinstance(axis, int) and isinstance(repeats, int): x = _repeat_along_axis(x, axis, repeats) elif isinstance(axis, list) and isinstance(repeats, list): for a, r in zip(axis, repeats): x = _repeat_along_axis(x, axis=a, repeats=r) return x def _get_scale_mean( scale_axis: Any, x: tf.Tensor, q: tf.Tensor, elements_per_scale: Any ): """Gets the mean of the tensor along the specified scaling axis/axes. Args: scale_axis: int or List[int] representing which axis/axes to calculate scale at. x: A tensor object. Its elements are in float. q: A tensor object. Its elements are in quantized format of x. elements_per_scale: if set to an int or List[int], we create multiple scales per axis across scale_axis, where 'elements_per_scale' represents the number of elements/values associated with every separate scale value. Returns: A tuple of two tensors representing the mean of x and its quantized format along the specified scaling axis/axes. """ if elements_per_scale is not None: # Get the input shape x_shape = x.shape.as_list() scale_axis, elements_per_scale = _validate_axis_and_eps( x_shape, scale_axis, elements_per_scale) # get the shape of unrolled tensors x and q unrolled_shape, unrolled_scale_axis = _get_unrolled_shape( x_shape, elements_per_scale, scale_axis) # Unroll x and q x1 = tf.reshape(x, unrolled_shape) q1 = tf.reshape(q, unrolled_shape) # Get the mean along the unroll axis/axes axes_of_mean = _get_scaling_axis(unrolled_scale_axis, len(unrolled_shape)) qx = K.mean(tf.math.multiply(x1, q1), axis=axes_of_mean, keepdims=True) qq = K.mean(tf.math.multiply(q1, q1), axis=axes_of_mean, keepdims=True) # Reshape qx and qq to be divisible by the input shape. # To achieve this, qx and qq are first rolled back along unroll axis. # Then, the values along the scale_axis are repeated "elements_per_scale" # times to match the original shape. rolled_back_shape = _get_rolled_back_shape(qx.shape.as_list(), roll_axis=unrolled_scale_axis) qx = tf.reshape(qx, rolled_back_shape) qx = _repeat_along_axes(qx, repeats=elements_per_scale, axis=scale_axis) qq = tf.reshape(qq, rolled_back_shape) qq = _repeat_along_axes(qq, repeats=elements_per_scale, axis=scale_axis) else: len_axis = len(x.shape) axis = _get_scaling_axis(scale_axis, len_axis) qx = K.mean(tf.math.multiply(x, q), axis=axis, keepdims=True) qq = K.mean(tf.math.multiply(q, q), axis=axis, keepdims=True) return qx, qq def _clip_po2_scale(scale: tf.Tensor, min_po2_exponent: Any, max_po2_exponent: Any): """Clip power-of-two scales given minimum and maximum po2 exponenets.""" min_po2 = None if min_po2_exponent is None else 2**min_po2_exponent max_po2 = None if max_po2_exponent is None else 2**max_po2_exponent scale = K.clip(scale, min_value=min_po2, max_value=max_po2) return scale def _get_least_squares_scale( alpha: Any, x: tf.Tensor, q: tf.Tensor, scale_axis: Any = None, per_channel_scale: bool = True, elements_per_scale: Any = None, min_po2_exponent: Any = None, max_po2_exponent: Any = None): """Gets scaling factor for scaling the tensor per channel. It uses the least squares method to find the scaling factor. (https://en.wikipedia.org/wiki/Linear_least_squares) Arguments: alpha: A float or string. When it is string, it should be either "auto" or "auto_po2", and scale = sum(x * q, axis=all but last) / sum(q * q, axis=all but last) x: A tensor object. Its elements are in float. q: A tensor object. Its elements are in quantized format of x. scale_axis: int or List[int] representing which axis/axes to calculate scale from. per_channel_scale: A bool. Whether to perform per-channel scaling or not. elements_per_scale: if set to an int or List[int], we create multiple scales per axis across scale_axis, where 'elements_per_scale' represents the number of elements/values associated with every separate scale value. min_po2_exponent: if set while using "auto_po2", it represents the minimum allowed power of two exponent. max_po2_exponent: if set while using "auto_po2", it represents the maximum allowed power of two exponent. Returns: A scaling factor tensor or scalar for scaling tensor per channel. """ if isinstance(alpha, six.string_types) and "auto" in alpha: assert alpha in ["auto", "auto_po2"] # in different tensorflow version (e.g., 2.4) # x.shape is a tuple which doesn't have as_list() method try: x_shape = x.shape.as_list() except AttributeError: x_shape = list(x.shape) len_axis = len(x_shape) if not per_channel_scale: qx = K.mean(x * q, keepdims=True) qq = K.mean(q * q, keepdims=True) else: if len_axis > 1: qx, qq = _get_scale_mean(scale_axis, x, q, elements_per_scale) else: # No summing (averaging) along the channel axis to get per-channel # scales. qx = x * q qq = q * q scale = qx / (qq + K.epsilon()) if alpha == "auto_po2": scale = K.pow(2.0, tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0))) if min_po2_exponent is not None or max_po2_exponent is not None: scale = _clip_po2_scale(scale, min_po2_exponent, max_po2_exponent) elif alpha is None: scale = 1.0 elif isinstance(alpha, np.ndarray): scale = alpha else: scale = float(alpha) return scale def _get_scale(*args, **kwargs): """Old name for _get_least_squares_scale. Kept for backwards compatibility.""" return _get_least_squares_scale(*args, **kwargs) def smooth_sigmoid(x): """Implements a linear approximation of a sigmoid function.""" # if we use 2.65 as the clipping point, MSE w.r.t. original sigmoid is # smaller than hard_simoid but the arithmetic for it is (x >> 3) + # (x >> 4) + 0.5, which is also not bad. return tf.keras.backend.clip(0.1875 * x + 0.5, 0.0, 1.0) def hard_sigmoid(x): """Computes hard_sigmoid function that saturates between 0 and 1.""" return tf.keras.backend.clip(0.5 * x + 0.5, 0.0, 1.0) def binary_sigmoid(x): """Computes binary_sigmoid.""" return _round_through(hard_sigmoid(x)) # we use a version of approximated sigmoid everywhere in this code. # we can set it to hard_sigmoid(x) or smooth_sigmoid(x). _default_sigmoid_type = "hard" _sigmoid = None def set_internal_sigmoid(mode): """Sets _sigmoid to either real, hard or smooth.""" global _sigmoid if mode not in ["real", "hard", "smooth"]: raise ValueError("mode has to be 'real', 'hard' or 'smooth'.") if mode == "hard": _sigmoid = hard_sigmoid elif mode == "smooth": _sigmoid = smooth_sigmoid elif mode == "real": _sigmoid = tf.keras.backend.sigmoid set_internal_sigmoid(_default_sigmoid_type) def binary_tanh(x): """Computes binary_tanh function that outputs -1 and 1.""" return 2.0 * binary_sigmoid(x) - 1.0 def hard_tanh(x): """Computes hard_tanh function that saturates between -1 and 1.""" return 2.0 * hard_sigmoid(x) - 1.0 def smooth_tanh(x): """Computes smooth_tanh function that saturates between -1 and 1.""" return 2.0 * smooth_sigmoid(x) - 1.0 def stochastic_round(x, precision=0.5): """Performs stochastic rounding to the first decimal point.""" scale = 1.0 / precision scale_x = x * scale fraction = scale_x - tf.floor(scale_x) result = tf.where(fraction < tf.random.uniform(tf.shape(x)), tf.math.floor(scale_x), tf.math.ceil(scale_x)) return result / scale def stochastic_round_po2(x): """Performs stochastic rounding for the power of two.""" # TODO(b/237832905): test stochastic_round_po2 and constraint. # because quantizer is applied after constraint. y = tf.abs(x) eps = tf.keras.backend.epsilon() log2 = tf.keras.backend.log(2.0) x_log2 = tf.round(tf.keras.backend.log(y + eps) / log2) po2 = tf.cast(pow(2.0, tf.cast(x_log2, dtype="float32")), dtype="float32") left_val = tf.where(po2 > y, x_log2 - 1, x_log2) right_val = tf.where(po2 > y, x_log2, x_log2 + 1) # sampling in [2**left_val, 2**right_val]. minval = 2 ** left_val maxval = 2 ** right_val val = tf.random.uniform(tf.shape(y), minval=minval, maxval=maxval) # use y as a threshold to keep the probabliy [2**left_val, y, 2**right_val] # so that the mean value of the sample should be y x_po2 = tf.where(y < val, left_val, right_val) """ x_log2 = stochastic_round(tf.keras.backend.log(y + eps) / log2) sign = tf.sign(x) po2 = ( tf.sign(x) * tf.cast(pow(2.0, tf.cast(x_log2, dtype="float32")), dtype="float32") ) """ return x_po2 def _round_through(x, use_stochastic_rounding=False, precision=0.5): """Rounds x but using straight through estimator. We use the trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182). Straight through estimator is a biased estimator for the rounding operation defined by Hinton"s Coursera Lecture 9c where dL/dx is made equal to dL/dy for y = f(x) during gradient computation, where f(x) is a non-derivable function. In that case, we assume df/dx = 1 in: dL dL df dL -- = -- -- = -- dx df dx dy (https://www.youtube.com/watch?v=LN0xtUuJsEI&list=PLoRl3Ht4JOcdU872GhiYWf6jwrk_SNhz9&index=41) Arguments: x: tensor to perform round operation with straight through gradient. use_stochastic_rounding: if true, we perform stochastic rounding. precision: by default we will use 0.5 as precision, but that can overriden by the user. Returns: Rounded tensor. """ if use_stochastic_rounding: output = tf_utils.smart_cond( K.learning_phase(), lambda: x + tf.stop_gradient(-x + stochastic_round(x, precision)), lambda: x + tf.stop_gradient(-x + tf.round(x))) else: output = x + tf.stop_gradient(-x + tf.round(x)) return output def _sign_through(x): """Computes the sign operation using the straight through estimator.""" # tf.sign generates -1, 0 or +1, so it should not be used when we attempt # to generate -1 and +1. k_sign = tf.sign(x) return x + tf.stop_gradient(-x + k_sign) def _ceil_through(x): """Computes the ceiling operation using straight through estimator.""" return x + tf.stop_gradient(-x + tf.ceil(x)) def _floor_through(x): """Computes the floor operation using straight through estimator.""" return x + tf.stop_gradient(-x + tf.floor(x)) # # Activation functions for quantized networks. # # Please note some of these functions can be used as well # as quantizer functions for weights of dense and convolutional # layers. # @quantizer_registry.register_quantizer class quantized_linear(base_quantizer.BaseQuantizer): """Linear quantization with fixed number of bits. This quantizer maps inputs to the nearest value of a fixed number of outputs that are evenly spaced, with possible scaling and stochastic rounding. This is an updated version of the legacy quantized_bits. The core computation is: 1. Divide the tensor by a quantization scale 2. Clip the tensor to a specified range 3. Round to the nearest integer 4. Multiply the rounded result by the quantization scale This clip range is determined by - The number of bits we have to represent the number - Whether we want to have a symmetric range or not - Whether we want to keep negative numbers or not The quantization scale is defined by either the quantizer parameters or the data passed to the __call__ method. See documentation for the `alpha` parameter to find out more. For backprop purposes, the quantizer uses the straight-through estimator for the rounding step (https://arxiv.org/pdf/1903.05662.pdf). Thus the gradient of the __call__ method is 1 on the interval [quantization_scale * clip_min, quantization_scale * clip_max] and 0 elsewhere. The quantizer also supports a number of other optional features: - Stochastic rounding (see the `stochastic_rounding` parameter) - Quantization noise (see the `qnoise_factor` parameter) Notes on the various "scales" in quantized_linear: - The quantization scale is the scale used in the core computation (see above). You can access it via the `quantization_scale` attribute. - The data type scale is the scale is determined by the type of data stored on hardware on a small device running a true quantized model. It is the quantization scale needed to represent `bits` bits, `integer` of which are integer bits, and one bit is reserved for the sign if `keep_negative` is True. It can be calculated as 2 ** (integer - bits + keep_negative). You can access it via the `data_type_scale` attribute. - The `scale` attribute stores the quotient of the quantization scale and the data type scale. This is also the scale that can be directly specified by the user, via the `alpha` parameter. These three quantities are related by the equation scale = quantization_scale / data_type_scale. See the diagram below of scale usage in a quantized conv layer. +------------------------------------------------------------------------+ | data_type_scale ---------------> stored_weights | | (determines decimal point) | | | V | | conv op | | | | | V | | accumulator | | | | | determines quantization V | | range and precision ---------------> quantization_scale | | (per channel) | | | V | | activation | +------------------------------------------------------------------------+ # TODO: The only fundamentally necessary scale is the quantization scale. # We should consider removing the data type scale and scale attributes, # but know that this will require rewriting much of how qtools and HLS4ML # use these scale attributes. Note on binary quantization (bits=1): The core computation is modified here when `keep_negative` is True to perform a scaled sign function. This is needed because the core computation as defined above requires that 0 be mapped to 0, which does not allow us to keep both positive and negative outputs for binary quantization. Special shifting operations are used to achieve this. Example usage: # 8-bit quantization with 3 integer bits >>> q = quantized_linear(8, 3) >>> x = tf.constant([0.0, 0.5, 1.0, 1.5, 2.0]) >>> q(x).numpy() array([0., 0., 1., 2., 2.], dtype=float32) # 2-bit quantization with "auto" and tensor alphas >>> q_auto = quantized_linear(2, alpha="auto") >>> x = tf.constant([0.0, 0.5, 1.0, 1.5, 2.0]) >>> q_auto(x).numpy() array([0., 0., 0., 2., 2.], dtype=float32) >>> q_auto.scale.numpy() array([4.], dtype=float32) >>> q_auto.quantization_scale.numpy() array([2.], dtype=float32) >>> q_fixed = quantized_linear(2, alpha=q_auto.scale) >>> q_fixed(x) array([0., 0., 0., 2., 2.], dtype=float32) Args: bits (int): Number of bits to represent the number. Defaults to 8. integer (int): Number of bits to the left of the decimal point, used for data_type_scale. Defaults to 0. symmetric (bool): If true, we will have the same number of values for positive and negative numbers. Defaults to True. alpha (str, Tensor, None): Instructions for determining the quantization scale. Defaults to None. - If None: the quantization scale is the data type scale, determined by `integer`, `bits`, and `keep_negative`. - If "auto", the quantization scale is calculated as the minimum floating point scale per-channel that does not clip the max of x. - If "auto_po2", the quantization scale is chosen as the power of two per-channel that minimizes squared error between the quantized x and the original x. - If Tensor: The quantization scale is the Tensor passed in multiplied by the data type scale. keep_negative (bool): If false, we clip negative numbers. Defaults to True. use_stochastic_rounding (bool): If true, we perform stochastic rounding (https://arxiv.org/pdf/1502.02551.pdf). scale_axis (int, None): Which axis to calculate scale from. If None, we perform per-channel scaling based off of the image data format. Note that each entry of a rank-1 tensor is considered its own channel by default. See `_get_scaling_axis` for more details. Defaults to None. qnoise_factor (float): A scalar from 0 to 1 that represents the level of quantization noise to add. This controls the amount of the quantization noise to add to the outputs by changing the weighted sum of (1 - qnoise_factor) * unquantized_x + qnoise_factor * quantized_x. Defaults to 1.0, which means that the result is fully quantized. use_variables (bool): If true, we use tf.Variables to store certain parameters. See the BaseQuantizer implementation for more details. Defaults to False. If set to True, be sure to use the special attribute update methods detailed in the BaseQuantizer. var_name (str or None): A variable name shared between the tf.Variables created in on initialization, if use_variables is true. If None, the variable names are generated automatically based on the parameter names along with a uid. Defaults to None. Returns: function: Function that computes linear quantization. Raises: ValueError: - If `bits` is not positive, or is too small to represent `integer`. - If `integer` is negative. - If `alpha` is a string but not one of ("auto", "auto_po2"). """ # string options for alpha parameter ALPHA_STRING_OPTIONS = ("auto", "auto_po2") def __init__( self, bits=8, integer=0, symmetric=1, keep_negative=True, alpha=None, use_stochastic_rounding=False, scale_axis=None, qnoise_factor=1.0, var_name=None, use_variables=False, ): super().__init__() self.var_name = var_name # Error checking self._check_bits(bits) self._check_alpha(alpha) # Set non-modifyable attributes self._bits = bits self._integer = integer self._keep_negative = keep_negative self._use_stochastic_rounding = use_stochastic_rounding self._scale_axis = scale_axis self._use_variables = use_variables # Set modifyable attributes self.alpha = alpha self.qnoise_factor = qnoise_factor self.symmetric = symmetric # Set default quantization scale self.quantization_scale = self.default_quantization_scale def _check_bits(self, bits): """Error checking for bits parameter""" err_msg = f"Bit count {bits} must be positive" if bits <= 0: raise ValueError(err_msg) def _check_alpha(self, alpha): """Error checking for alpha parameter""" if isinstance(alpha, six.string_types): # Check the quantizer has been given a valid alpha string if not alpha in self.ALPHA_STRING_OPTIONS: raise ValueError( f"Invalid alpha '{alpha}' for auto alpha computation. " f"Must be one of {self.ALPHA_STRING_OPTIONS}") elif alpha is not None: # alpha is a tensor try: # any allowable array type can be cast as a numpy array np.array(alpha) except TypeError: raise TypeError( f"alpha must be, a string, an array, or None, not {type(alpha)}") @property def bits(self): return self._bits @property def integer(self): return self._integer @property def keep_negative(self): return self._keep_negative @property def use_stochastic_rounding(self): return self._use_stochastic_rounding @property def scale_axis(self): return self._scale_axis @property def use_variables(self): return self._use_variables @property def scale(self): return self.quantization_scale / self.data_type_scale @property def data_type_scale(self): """Quantization scale for the data type""" # integer is sometimes cast as int32, so cast to float32 to avoid errors integer = tf.cast(self.integer, tf.float32) return K.pow(2.0, integer - self.bits + self.keep_negative) @property def auto_alpha(self): """Returns true if using a data-dependent alpha""" return isinstance(self.alpha, six.string_types) @property def use_sign_function(self): """Return true if using sign function for quantization""" return (self.bits == 1.0) and self.keep_negative @property def default_quantization_scale(self): """Calculate and set quantization_scale default""" # Set default quantization scale quantization_scale = self.data_type_scale # Quantization scale given by alpha if self.alpha is not None and not self.auto_alpha: quantization_scale = self.alpha * self.data_type_scale return quantization_scale def get_clip_bounds(self): """Get bounds of clip range""" if self.use_sign_function: clip_min = K.cast_to_floatx(-0.5) clip_max = K.cast_to_floatx(0.5) else: unsigned_bits_po2 = K.pow(2.0, self.bits - self.keep_negative) # if symmetric, clip_min is negative of clip_max. Otherwise clip_min is # lowered by 1, giving us one more representable number clip_min = self.keep_negative * (-unsigned_bits_po2 + self.symmetric) clip_max = unsigned_bits_po2 - K.cast_to_floatx(1.0) return clip_min, clip_max def __call__(self, x): """Core quantization function""" # Build if not already built self._build() # Data type conversion x = K.cast_to_floatx(x) shape = x.shape if self.auto_alpha: # get data-dependent quantization scale quantization_scale = self._get_auto_quantization_scale(x) else: # quantization scale determined by quantizer params, not data # see default_quantization_scale property for more info quantization_scale = self.quantization_scale scaled_xq = self._scale_clip_and_round(x, quantization_scale) xq = scaled_xq * quantization_scale res = x + self.qnoise_factor * (xq - x) res.set_shape(shape) return res def _scale_clip_and_round(self, x, quantization_scale): """Scale, clip, and round x to an integer value in a limited range Note that the internal shift is needed for 1-bit quantization to ensure that a sign function is used. Otherise, the binary quantizer would have three output values""" # special shifting needed to compute a sign function. shift = self.use_sign_function * 0.5 clip_min, clip_max = self.get_clip_bounds() scaled_x = x / quantization_scale clipped_scaled_x = K.clip(scaled_x, clip_min, clip_max) # Round through to nearest integer, using straight-through estimator # for gradient computations. scaled_xq = _round_through( clipped_scaled_x - shift, use_stochastic_rounding=self.use_stochastic_rounding, precision=1.0, # using 1.0 precision so that we round to a nearby integer ) return scaled_xq + shift def _get_auto_quantization_scale(self, x): """Get quantization_scale, either from self or from input x""" # Get the minimum floating point scale that does not clip the max of x # This is the quantization scale for alpha="auto" quantization_scale = self._get_quantization_scale_from_max_data(x) if self.alpha == "auto_po2": quantization_scale = self._po2_autoscale(x, quantization_scale) # update quantization_scale variable # stop_gradient on quantization_scale to ignore dependence on x self.quantization_scale = tf.stop_gradient(quantization_scale) # very important that return value is a tf.Variable with shape None return self.quantization_scale def _get_quantization_scale_from_max_data(self, x): """Get the minimum floating point scale that does not clip the max of x""" axis = _get_scaling_axis(self.scale_axis, tf.rank(x)) clip_min, clip_max = self.get_clip_bounds() clip_range = clip_max - clip_min # get quantization scale- depends on whether we are keeping negative # divide by clip range to ensure that we clip right at the max of x if self.keep_negative: data_max = K.max(tf.math.abs(x), axis=axis, keepdims=True) quantization_scale = (data_max * 2) / clip_range else: data_max = K.max(x, axis=axis, keepdims=True) quantization_scale = data_max / clip_range return tf.math.maximum(quantization_scale, K.epsilon()) def _po2_autoscale(self, x, quantization_scale): """Get an approximation of the "best" po2 scale using least squares""" # set alpha scale to a near power of two quantization_scale = K.pow( 2.0, tf.math.round(K.log(quantization_scale + K.epsilon()) / K.log(2.0))) def loop_body(_, quantization_scale): """Loop body for least squares autoscaling""" scaled_xq = self._scale_clip_and_round(x, quantization_scale) new_quantization_scale = _get_least_squares_scale( alpha="auto_po2", x=x, q=scaled_xq, scale_axis=self.scale_axis, ) return quantization_scale, new_quantization_scale def loop_cond(last_quantization_scale, quantization_scale): """Loop condition for least squares autoscaling- stop when the scale converges""" tensors_not_equal = tf.math.reduce_any( tf.not_equal(last_quantization_scale, quantization_scale)) return tensors_not_equal # Need a tensor of the same shape as quantization_scale that # does not equal quantization_scale dummy_quantization_scale = -tf.ones_like(quantization_scale) # For 1-bit quantization, po2 autoscale loop is guaranteed to converge # after 1 iteration max_iterations = 1 if self.use_sign_function else 5 _, quantization_scale = tf.while_loop( loop_cond, loop_body, (dummy_quantization_scale, quantization_scale), maximum_iterations=max_iterations, ) return quantization_scale def _build(self): """Build if not done so already""" if not self.built: self.build(var_name=self.var_name, use_variables=self.use_variables) def max(self): """Get maximum value that quantized_linear class can represent.""" _, clip_max = self.get_clip_bounds() return clip_max * self.quantization_scale def min(self): """Get minimum value that quantized_linear class can represent.""" clip_min, _ = self.get_clip_bounds() return clip_min * self.quantization_scale def range(self): """Returns a list of all values that quantized_linear can represent }.""" if self.use_sign_function: return K.cast_to_floatx([self.max(), self.min()]) else: clip_min, clip_max = self.get_clip_bounds() clip_max = tf.cast(clip_max, tf.int32) clip_min = tf.cast(clip_min, tf.int32) pos_array = K.cast_to_floatx(tf.range(clip_max + 1)) neg_array = K.cast_to_floatx(tf.range(clip_min, 0)) return self.quantization_scale * tf.concat([pos_array, neg_array], axis=0) def __str__(self): # Main parameters always printed in string flags = [ str(int(self.bits)), str(int(self.integer)), str(int(self.symmetric))] # Optional parameters only printed if not default if not self.keep_negative: flags.append("keep_negative=False") if self.auto_alpha: alpha = "'" + self.alpha + "'" flags.append("alpha=" + alpha) elif self.alpha is not None: alpha = np.array(alpha) flags.append("alpha=" + str(alpha)) if self.use_stochastic_rounding: flags.append("use_stochastic_rounding=" + str(int(self.use_stochastic_rounding))) return "quantized_linear(" + ",".join(flags) + ")" def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" self.symmetric = True @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "bits": self.bits, "integer": self.integer, "symmetric": self.symmetric, "alpha": self.alpha, "keep_negative": self.keep_negative, "use_stochastic_rounding": self.use_stochastic_rounding, "qnoise_factor": self.qnoise_factor, } return config @quantizer_registry.register_quantizer class quantized_bits(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Legacy quantizer: Quantizes the number to a number of bits. In general, we want to use a quantization function like: a = (pow(2,bits) - 1 - 0) / (max(x) - min(x)) b = -min(x) * a in the equation: xq = a x + b This requires multiplication, which is undesirable. So, we enforce weights to be between -1 and 1 (max(x) = 1 and min(x) = -1), and separating the sign from the rest of the number as we make this function symmetric, thus resulting in the following approximation. 1) max(x) = +1, min(x) = -1 2) max(x) = -min(x) a = pow(2,bits-1) b = 0 Finally, just remember that to represent the number with sign, the largest representation is -pow(2,bits) to pow(2, bits-1) Symmetric and keep_negative allow us to generate numbers that are symmetric (same number of negative and positive representations), and numbers that are positive. Note: the behavior of quantized_bits is different than Catapult HLS ac_fixed or Vivado HLS ap_fixed. For ac_fixed, when signed = true, it is equavlent to quantized_bits(word_length, integer_length-1, keep_negative=True) Attributes: bits: number of bits to perform quantization. integer: number of bits to the left of the decimal point. symmetric: if true, we will have the same number of values for positive and negative numbers. alpha: a tensor or None, the scaling factor per channel. If None, the scaling factor is 1 for all channels. keep_negative: if true, we do not clip negative numbers. use_stochastic_rounding: if true, we perform stochastic rounding. scale_axis: int or List[int] which axis/axes to calculate scale from. qnoise_factor: float. a scalar from 0 to 1 that represents the level of quantization noise to add. This controls the amount of the quantization noise to add to the outputs by changing the weighted sum of (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x. var_name: String or None. A variable name shared between the tf.Variables created in the build function. If None, it is generated automatically. use_ste: Bool. Whether to use "straight-through estimator" (STE) method or not. use_variables: Bool. Whether to make the quantizer variables to be dynamic tf.Variables or not. elements_per_scale: if set to an int or List[int], we create multiple scales per axis across scale_axis, where 'elements_per_scale' represents the number of elements/values associated with every separate scale value. It is only supported when using "auto_po2". min_po2_exponent: if set while using "auto_po2", it represents the minimum allowed power of two exponent. max_po2_exponent: if set while using "auto_po2", it represents the maximum allowed power of two exponent. post_training_scale: if set, it represents the scale value to be used for quantization. Returns: Function that computes fixed-point quantization with bits. """ def __init__(self, bits=8, integer=0, symmetric=0, keep_negative=True, alpha=None, use_stochastic_rounding=False, scale_axis=None, qnoise_factor=1.0, var_name=None, use_ste=True, use_variables=False, elements_per_scale=None, min_po2_exponent=None, max_po2_exponent=None, post_training_scale=None): super().__init__() self.bits = bits self.integer = integer self.symmetric = symmetric self.keep_negative = keep_negative self.alpha = alpha self.use_stochastic_rounding = use_stochastic_rounding self.post_training_scale = post_training_scale # "auto*" |-> symmetric if isinstance(self.alpha, six.string_types): self.freeze_scale = False self.symmetric = True if post_training_scale is not None: self.scale = np.array(post_training_scale) self.freeze_scale = True else: if post_training_scale is not None: raise ValueError(f"alpha={alpha} doesn't support post_training_scale: " f"{post_training_scale}") self.scale = None # If alpha is not "auto*", then scale is fixed and not trainable. self.freeze_scale = True self.scale_axis = scale_axis self.qnoise_factor = qnoise_factor self.use_ste = use_ste self.var_name = var_name self.use_variables = use_variables self.elements_per_scale = elements_per_scale self.min_po2_exponent = min_po2_exponent self.max_po2_exponent = max_po2_exponent def __str__(self): # Convert Tensors to printable strings by converting to a numpy array and # then using regex to remove brackets when there is only one integer bit integer_bits = re.sub( r"\[(\d)\]", r"\g<1>", str(self.integer.numpy() if isinstance(self.integer, tf.Variable ) else self.integer)) flags = [str(self.bits), integer_bits, str(int(self.symmetric))] if not self.keep_negative: flags.append("keep_negative=False") if self.alpha: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.use_stochastic_rounding: flags.append("use_stochastic_rounding=" + str(int(self.use_stochastic_rounding))) return "quantized_bits(" + ",".join(flags) + ")" def __call__(self, x): """Computes fixedpoint quantization of x.""" if not self.built: self.build(var_name=self.var_name, use_variables=self.use_variables) x = K.cast_to_floatx(x) # quantized_bits with "1" bit becomes a binary implementation. unsigned_bits = self.bits - self.keep_negative # In pow function, use float datatype instead of integer, so that # K.pow() results will use float32 instead of int32 as the default datatype. # float32 has a much larger value range (2^128) than int32 (2^32), this is # particularly important when quantizing very large values, and when integer # bits are set much larger than total bits. m = K.pow(2.0, K.cast_to_floatx(unsigned_bits)) m_i = K.pow(2.0, K.cast_to_floatx(self.integer)) # Verify that "elements_per_scale", "min_po2_exponent", # and "max_po2_exponent" are only set when alpha is "auto_po2" if self.alpha != "auto_po2": assert ( self.elements_per_scale is None ), "elements_per_scale is only supported when using auto_po2" assert ( self.min_po2_exponent is None ), "min_po2_exponent is only supported when using auto_po2" assert ( self.max_po2_exponent is None ), "max_po2_exponent is only supported when using auto_po2" if self.alpha is None: scale = 1.0 elif isinstance(self.alpha, six.string_types): # We only deal with the symmetric case right now. assert self.symmetric, "Only symmetric quantizers are implemented" len_axis = len(x.shape) if len_axis > 1: axis = _get_scaling_axis(self.scale_axis, len_axis) else: axis = [0] x = x / m_i # Using 2's complement, we can represent 2**(bits-1)-1 positive values # If we wish to maintain symmetry, we can double 2**(bits-1)-1 to get # the total number of possible values we can represent. # If symmetry is not enforced, then we can represent (2**bits)-1 values # using 2's complement. levels = (2**(self.bits-1)-1) * 2 if self.symmetric else (2**self.bits)-1 if self.freeze_scale: # Scale is fixed value. In this case, scale is extracted from the # post-training quantizater scale. In order to retrain models with # this scale value, we need to divide it by m to make it in the same # value scale as x. scale = self.scale / m else: # Calculate the scale. scale = (K.max(abs(x), axis=axis, keepdims=True) * 2) / levels # If alpha is "auto_po2", then get the "best" po2 scale if "po2" in self.alpha: scale = K.pow(2.0, tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0))) for idx in range(5): v = tf.floor(tf.abs(x) / scale + 0.5) mask = v < levels / 2 z = tf.sign(x) * tf.where(mask, v, tf.ones_like(v) * levels / 2) scale = _get_least_squares_scale( alpha="auto_po2", x=x, q=z, scale_axis=self.scale_axis, elements_per_scale=self.elements_per_scale, min_po2_exponent=self.min_po2_exponent, max_po2_exponent=self.max_po2_exponent) elif self.alpha != "auto": # If alpha is "auto", then directly uuse the "best" # floating point scale. raise ValueError(f"Invalid alpha '{self.alpha}'") # Even for trainable scale, we still need to quantize x with the best # scale. This extra step is needed to ensure that with the same input # and scale, the quantized output is identical between training and # inference. v = tf.floor(tf.abs(x) / scale + 0.5) mask = v < levels / 2 z = tf.sign(x) * tf.where(mask, v, tf.ones_like(v) * levels / 2) # z is an integer number, so we must make the scale * m and z / m scale = scale * m # we will not use "z" right now because of stochastic_rounding # this is still under test. # if "new" in self.alpha: # z = z / m # self.scale = scale # return x + tf.stop_gradient(-x + scale * z) x = m_i * x xq = m_i * z / m if not self.freeze_scale: self.scale = scale xq = scale * xq if self.use_ste: return x + tf.stop_gradient(self.qnoise_factor * (-x + xq)) else: return (1 - self.qnoise_factor) * x + tf.stop_gradient( self.qnoise_factor * xq) else: scale = self.alpha # quantized_bits with "1" bit becomes a binary implementation. if unsigned_bits > 0: p = x * m / m_i xq = m_i * tf.keras.backend.clip( _round_through(p, self.use_stochastic_rounding, precision=1.0), self.keep_negative * (-m + self.symmetric), m - 1) / m else: xq = tf.sign(x) xq += (1.0 - tf.abs(xq)) if not self.keep_negative: xq = (xq + 1.0) / 2.0 self.scale = scale xq = scale * xq if self.use_ste: return x + tf.stop_gradient(self.qnoise_factor * (-x + xq)) else: return (1 - self.qnoise_factor) * x + tf.stop_gradient( self.qnoise_factor * xq) def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" self.freeze_scale = False self.symmetric = True def max(self): """Get maximum value that quantized_bits class can represent.""" unsigned_bits = self.bits - self.keep_negative if unsigned_bits > 0: return max( 1.0, np.array( K.pow(2., K.cast(self.integer, dtype="float32")), dtype="float32")) else: return 1.0 def min(self): """Get minimum value that quantized_bits class can represent.""" if not self.keep_negative: return 0.0 unsigned_bits = self.bits - self.keep_negative if unsigned_bits > 0: return -max( 1.0, np.array( K.pow(2, K.cast(self.integer, dtype="float32")), dtype="float32")) else: return -1.0 def range(self): """Returns a list of all values that quantized_bits can represent ordered by their binary representation ascending.""" assert self.symmetric == 0 assert self.keep_negative assert self.alpha is None or self.alpha == 1.0 x = np.asarray(range(2**self.bits), dtype=np.float32) p_and_n = np.where(x >= 2**(self.bits - 1), (x - 2**(self.bits - 1)) - 2**(self.bits - 1), x) return p_and_n * np.array( K.pow(2.0, -self.bits + K.cast(self.integer, dtype="float32") + 1), dtype="float32") @classmethod def from_config(cls, config): # Convert JSON-serializable lists back to NumPy arrays. if config.get("post_training_scale") is not None: config["post_training_scale"] = np.array(config["post_training_scale"]) return cls(**config) def get_config(self): config = { "bits": self.bits, "integer": self.integer.numpy() if isinstance(self.integer, tf.Variable) else self.integer, "symmetric": self.symmetric, "alpha": self.alpha, "keep_negative": self.keep_negative, "use_stochastic_rounding": self.use_stochastic_rounding, "qnoise_factor": self.qnoise_factor.numpy() if isinstance( self.qnoise_factor, tf.Variable) else self.qnoise_factor, "post_training_scale": # Since NumPy arrays are not directly JSON-serializable, # we convert them to lists. (self.post_training_scale.tolist() if self.post_training_scale is not None else None) } return config @quantizer_registry.register_quantizer class bernoulli(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes a Bernoulli sample with probability sigmoid(x). This computation uses ST approximation. To do that, we compute sigmoid(x) and a random sample z ~ U[0,1]. As p in [0,1] and z in [0,1], p - z in [-1,1]. However, -1 will never appear because to get -1 we would need sigmoid(-inf) - z == 1. As a result, the range will be in practical terms [0,1]. The noise introduced by z can be seen as a regularizer to the weights W of y = Wx as y = Wx + Wz for some noise z with mean mu(z) and var(z). As a result, W**2 var(z) to the variance of y, which has the same effect as a regularizer on L2 with lambda = var(z), as presented in Hinton"s Coursera Lecture 9c. Remember that E[dL/dy] = E[dL/dx] once we add stochastic sampling. Attributes: alpha: allows one to specify multiplicative factor for number generation of "auto" or "auto_po2". temperature: amplifier factor for sigmoid function, making stochastic less stochastic as it moves away from 0. use_real_sigmoid: use real sigmoid for probability. Returns: Computation of round with stochastic sampling with straight through gradient. """ def __init__(self, alpha=None, temperature=6.0, use_real_sigmoid=True): super().__init__() self.alpha = alpha self.bits = 1 self.temperature = temperature self.use_real_sigmoid = use_real_sigmoid self.default_alpha = 1.0 self.scale = None def __str__(self): flags = [] if self.alpha is not None: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.temperature != 6.0: flags.append("temperature=" + str(self.temperature)) if not self.use_real_sigmoid: flags.append("use_real_sigmoid=" + str(int(self.use_real_sigmoid))) return "bernoulli(" + ",".join(flags) + ")" def __call__(self, x): if isinstance(self.alpha, six.string_types): assert self.alpha in ["auto", "auto_po2"] if isinstance(self.alpha, six.string_types): len_axis = len(x.shape) if len_axis > 1: if K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) else: axis = [0] std = K.std(x, axis=axis, keepdims=True) + K.epsilon() else: std = 1.0 if self.use_real_sigmoid: p = tf.keras.backend.sigmoid(self.temperature * x / std) else: p = _sigmoid(self.temperature * x/std) r = tf.random.uniform(tf.shape(x)) q = tf.sign(p - r) q += (1.0 - tf.abs(q)) q = (q + 1.0) / 2.0 q_non_stochastic = tf.sign(x) q_non_stochastic += (1.0 - tf.abs(q_non_stochastic)) q_non_stochastic = (q_non_stochastic + 1.0) / 2.0 # if we use non stochastic binary to compute alpha, # this function seems to behave better scale = _get_least_squares_scale(self.alpha, x, q_non_stochastic) self.scale = scale return x + tf.stop_gradient(-x + scale * q) def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" def max(self): """Get the maximum value bernoulli class can represent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return 1.0 else: return max(1.0, self.alpha) def min(self): """Get the minimum value bernoulli class can represent.""" return 0.0 @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = {"alpha": self.alpha} return config @quantizer_registry.register_quantizer class ternary(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes an activation function returning -alpha, 0 or +alpha. Right now we assume two type of behavior. For parameters, we should have alpha, threshold and stochastic rounding on. For activations, alpha and threshold should be floating point numbers, and stochastic rounding should be off. Attributes: x: tensor to perform sign opertion with stochastic sampling. bits: number of bits to perform quantization. alpha: ternary is -alpha or +alpha. Alpha can be "auto" or "auto_po2". threshold: threshold to apply "dropout" or dead band (0 value). If "auto" is specified, we will compute it per output layer. use_stochastic_rounding: if true, we perform stochastic rounding. Returns: Computation of sign within the threshold. """ def __init__(self, alpha=None, threshold=None, use_stochastic_rounding=False, number_of_unrolls=5): super().__init__() self.bits = 2 self.alpha = alpha self.threshold = threshold self.use_stochastic_rounding = use_stochastic_rounding self.default_alpha = 1.0 self.default_threshold = 0.33 self.number_of_unrolls = number_of_unrolls self.scale = None def __str__(self): flags = [] if self.alpha is not None: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.threshold is not None: flags.append("threshold=" + str(self.threshold)) if self.use_stochastic_rounding: flags.append( "use_stochastic_rounding=" + str(int(self.use_stochastic_rounding))) if self.number_of_unrolls != 5: flags.append( "number_of_unrolls=" + str(int(self.number_of_unrolls))) return "ternary(" + ",".join(flags) + ")" def __call__(self, x): if isinstance(self.alpha, six.string_types): # parameters assert self.alpha in ["auto", "auto_po2"] assert self.threshold is None else: # activations assert not self.use_stochastic_rounding assert not isinstance(self.threshold, six.string_types) if self.alpha is None or isinstance(self.alpha, six.string_types): scale = 1.0 elif isinstance(self.alpha, np.ndarray): scale = self.alpha else: scale = float(self.alpha) # This is an approximiation from https://arxiv.org/abs/1605.04711 # We consider channels_last only for now. if isinstance(self.alpha, six.string_types): # It is for parameters # first, compute which asix corresponds to the channels. # TODO(b/237833510): support channels_first try: len_axis = len(x.shape.as_list()) except AttributeError: len_axis = len(list(x.shape)) if len_axis == 1: axis = None elif K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) # This approximation is exact if x ~ U[-m, m]. For x ~ N(0, m) # we need to iterate a few times before we can coverge m = K.max(tf.abs(x), axis=axis, keepdims=True) scale = 2 * m / 3.0 if "po2" in self.alpha: scale = K.pow(2.0, tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0))) for _ in range(self.number_of_unrolls): thres = scale / 2.0 # once we scale the number precision == 0.33 works # well for Uniform and Normal distribution of input v = scale * _round_through( x / scale, use_stochastic_rounding=self.use_stochastic_rounding, precision=1. / 3.) q = K.cast(tf.abs(v) >= thres, K.floatx()) * tf.sign(x) scale = _get_least_squares_scale(self.alpha, x, q) else: if self.threshold is None: thres = self.default_threshold else: thres = self.threshold q = K.cast(tf.abs(x) >= thres, K.floatx()) * tf.sign(x) # ternary ranges from -1 to +1, so we use tanh(x) to be a differentiable # version of that. if self.alpha is None: x = K.tanh(x) self.scale = scale return x + tf.stop_gradient(-x + scale * q) def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" def max(self): """Get the maximum value that ternary can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return 1.0 else: return max(1.0, self.alpha) def min(self): """Get the minimum value that ternary can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return -1.0 else: return -max(1.0, self.alpha) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "alpha": self.alpha, "threshold": self.threshold, "use_stochastic_rounding": self.use_stochastic_rounding, "number_of_unrolls": self.number_of_unrolls } return config @quantizer_registry.register_quantizer class stochastic_ternary(ternary): # pylint: disable=invalid-name """Computes a stochastic activation function returning -alpha, 0 or +alpha. Computes straight-through approximation using random sampling to make E[dL/dy] = E[dL/dx], and computing the sign function. See explanation above. Attributes: x: tensor to perform sign opertion with stochastic sampling. bits: number of bits to perform quantization. alpha: ternary is -alpha or +alpha, or "auto" or "auto_po2". threshold: (1-threshold) specifies the spread of the +1 and -1 values. temperature: amplifier factor for sigmoid function, making stochastic less stochastic as it moves away from 0. use_real_sigmoid: use real sigmoid for probability. number_of_unrolls: number of times we iterate between scale and threshold. Returns: Computation of sign with stochastic sampling with straight through gradient. """ def __init__( self, alpha=None, threshold=None, temperature=8.0, use_real_sigmoid=True, number_of_unrolls=5, ): super().__init__( alpha=alpha, threshold=threshold, number_of_unrolls=number_of_unrolls ) self.bits = 2 self.alpha = alpha self.threshold = threshold assert threshold != 1.0 self.default_alpha = 1.0 self.default_threshold = 0.33 self.temperature = temperature self.use_real_sigmoid = use_real_sigmoid self.number_of_unrolls = number_of_unrolls self.scale = None def __str__(self): flags = [] if self.alpha is not None: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.threshold is not None: flags.append("threshold=" + str(self.threshold)) if self.temperature != 8.0: flags.append("temperature=" + str(self.temperature)) if not self.use_real_sigmoid: flags.append("use_real_sigmoid=0") if self.number_of_unrolls != 5: flags.append("number_of_unrolls=" + str(self.number_of_unrolls)) return "stochastic_ternary(" + ",".join(flags) + ")" def __call__(self, x): def stochastic_output(): # right now we only accept alpha = "auto" or "auto_po2" assert isinstance(self.alpha, six.string_types) assert self.alpha in ["auto", "auto_po2"] if self.alpha is None: scale = self.default_alpha elif isinstance(self.alpha, six.string_types): scale = 1.0 assert self.alpha in ["auto", "auto_po2"] else: assert self.alpha >= 0.0 scale = float(self.alpha) len_axis = len(x.shape) if len_axis > 1: if K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) else: axis = [0] x_std = K.std(x, axis=axis, keepdims=True) m = K.max(tf.abs(x), axis=axis, keepdims=True) scale = 2.0 * m / 3.0 if self.alpha == "auto_po2": scale = K.pow( 2.0, tf.math.round(K.log(scale + K.epsilon()) / np.log(2.0)) ) for _ in range(self.number_of_unrolls): T = scale / 2.0 q_ns = K.cast(tf.abs(x) >= T, K.floatx()) * K.sign(x) scale = _get_least_squares_scale(self.alpha, x, q_ns) x_norm = x / (x_std + K.epsilon()) T = scale / (2.0 * (x_std + K.epsilon())) if self.use_real_sigmoid: p0 = tf.keras.backend.sigmoid(self.temperature * (x_norm - T)) p1 = tf.keras.backend.sigmoid(self.temperature * (x_norm + T)) else: p0 = _sigmoid(self.temperature * (x_norm - T)) p1 = _sigmoid(self.temperature * (x_norm + T)) r0 = tf.random.uniform(tf.shape(p0)) r1 = tf.random.uniform(tf.shape(p1)) q0 = tf.sign(p0 - r0) q0 += 1.0 - tf.abs(q0) q1 = tf.sign(p1 - r1) q1 += 1.0 - tf.abs(q1) q = (q0 + q1) / 2.0 self.scale = scale return x + tf.stop_gradient(-x + scale * q) output = tf_utils.smart_cond( K.learning_phase(), stochastic_output, lambda: ternary.__call__(self, x) ) return output def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" def max(self): """Get the maximum value that stochastic_ternary can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return 1.0 else: return max(1.0, self.alpha) def min(self): """Get the minimum value that stochastic_ternary can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return -1.0 else: return -max(1.0, self.alpha) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "alpha": self.alpha, "threshold": self.threshold, "temperature": self.temperature, "use_real_sigmoid": self.use_real_sigmoid, "number_of_unrolls": self.number_of_unrolls, } return config @quantizer_registry.register_quantizer class binary(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes the sign(x) returning a value between -alpha and alpha. Although we cannot guarantee E[dL/dy] = E[dL/dx] if we do not use the stochastic sampling, we still use the ST approximation. Modified from original binary to match QNN implementation. The binary qunatizer supports multiple-scales per tensor where: - alpha: It can be set to "auto" or "auto_po2" to enable auto-scaling. "auto" allows arbitrary scale while "auto_po2" allows power-of-two scales only. It can also be set to a fixed value or None (i.e., no scaling). - scale_axis: It determines the axis/axes to calculate the auto-scale at. - elements_per_scale: It enables fine-grained scaling where it determines the number of elements across scale axis/axes that should be grouped into one scale. Examples: 1. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=None, elements_per_scale=None --> Number of separate scales = 16 2. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=1, elements_per_scale=None --> Number of separate scales = 8 3. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=1, elements_per_scale=2 --> Number of separate scales = 4 4. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=[2, 3], elements_per_scale=2 --> Number of separate scales = 4*8 = 32 5. Input shape = [1, 8, 8, 16] alpha="auto", scale_axis=[2, 3], elements_per_scale=[2, 4] --> Number of separate scales = 4*4 = 16 Attributes: x: tensor to perform sign_through. bits: number of bits to perform quantization. use_01: if True, return {0,1} instead of {-1,+1}. alpha: binary is -alpha or +alpha, or "auto", "auto_po2" to compute automatically. use_stochastic_rounding: if true, we perform stochastic rounding. elements_per_scale: if set to an int or List[int], we create multiple scales per axis across scale_axis, where 'elements_per_scale' represents the number of elements/values associated with every separate scale value. scale_axis: int or List[int] which axis/axes to calculate scale from. min_po2_exponent: if set while using "auto_po2", it represents the minimum allowed power of two exponent. max_po2_exponent: if set while using "auto_po2", it represents the maximum allowed power of two exponent. Returns: Computation of sign operation with straight through gradient. """ def __init__(self, use_01=False, alpha=None, use_stochastic_rounding=False, scale_axis=None, elements_per_scale=None, min_po2_exponent=None, max_po2_exponent=None): super().__init__() self.use_01 = use_01 self.bits = 1 self.alpha = alpha self.use_stochastic_rounding = use_stochastic_rounding self.default_alpha = 1.0 self.scale = None self.scale_axis = scale_axis self.elements_per_scale = elements_per_scale self.min_po2_exponent = min_po2_exponent self.max_po2_exponent = max_po2_exponent def __str__(self): def list_to_str(l): return ",".join([str(x) for x in l]) flags = [] if self.use_01: flags.append("use_01=" + str(int(self.use_01))) if self.alpha is not None: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.elements_per_scale is not None: if isinstance(self.elements_per_scale, list): flags.append("elements_per_scale=[" + list_to_str(self.elements_per_scale) + "]") else: flags.append("elements_per_scale=" + str(self.elements_per_scale)) if self.scale_axis is not None: if isinstance(self.scale_axis, list): flags.append("scale_axis=[" + list_to_str(self.scale_axis) + "]") else: flags.append("scale_axis=" + str(self.scale_axis)) if self.min_po2_exponent is not None: flags.append("min_po2_exponent=" + str(self.min_po2_exponent)) if self.max_po2_exponent is not None: flags.append("max_po2_exponent=" + str(self.max_po2_exponent)) if self.use_stochastic_rounding: flags.append( "use_stochastic_rounding=" + str(self.use_stochastic_rounding)) return "binary(" + ",".join(flags) + ")" def __call__(self, x): if isinstance(self.alpha, six.string_types): assert self.alpha in ["auto", "auto_po2"] if self.alpha is None: scale = self.default_alpha elif isinstance(self.alpha, six.string_types): scale = 1.0 elif isinstance(self.alpha, np.ndarray): scale = self.alpha else: scale = float(self.alpha) if self.use_stochastic_rounding: try: len_axis = len(x.shape.as_list()) except AttributeError: len_axis = len(list(x.shape)) if len_axis == 1: axis = None elif K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) # if stochastic_round is through, we need to scale # number so that the precision is small enough. # This is especially important if range of x is very # small, which occurs during initialization of weights. m = K.max(tf.abs(x), axis=axis, keepdims=True) m = tf.where(m > 1.0, tf.ones_like(m), m) f = 2 * m x = tf_utils.smart_cond( K.learning_phase(), lambda: f * _round_through( x / f, use_stochastic_rounding=True, precision=0.125), lambda: x) k_sign = tf.sign(x) if self.use_stochastic_rounding: # in inference, we use a biased "1" for stochastic rounding right now k_sign += (1.0 - tf.abs(k_sign)) * tf_utils.smart_cond( K.learning_phase(), lambda: 2.0 * tf.round(tf.random.uniform(tf.shape(x))) - 1.0, lambda: tf.ones_like(tf.shape(x), dtype=K.floatx())) # if something still remains, just make it positive for now. k_sign += (1.0 - tf.abs(k_sign)) if self.use_01: k_sign = (k_sign + 1.0) / 2.0 # approximate binary by tanh(x) as it has limited range between -1 and +1. if self.alpha is None: x = K.tanh(x) self.scale = _get_least_squares_scale( self.alpha, x, k_sign, elements_per_scale=self.elements_per_scale, scale_axis=self.scale_axis, min_po2_exponent=self.min_po2_exponent, max_po2_exponent=self.max_po2_exponent, ) return x + tf.stop_gradient(-x + self.scale * k_sign) def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" def max(self): """Get maximum value that binary class can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return 1.0 else: return max(1.0, self.alpha) def min(self): """Get minimum value that binary class can respresent.""" if self.use_01: return 0.0 elif self.alpha is None or isinstance(self.alpha, six.string_types): return -1.0 else: return -max(1.0, self.alpha) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "use_01": self.use_01, "alpha": self.alpha, "use_stochastic_rounding": self.use_stochastic_rounding } return config @quantizer_registry.register_quantizer class stochastic_binary(binary): # pylint: disable=invalid-name """Computes a stochastic activation function returning -alpha or +alpha. Computes straight-through approximation using random sampling to make E[dL/dy] = E[dL/dx], and computing the sign function. See explanation above. Attributes: x: tensor to perform sign opertion with stochastic sampling. alpha: binary is -alpha or +alpha, or "auto" or "auto_po2". bits: number of bits to perform quantization. temperature: amplifier factor for sigmoid function, making stochastic behavior less stochastic as it moves away from 0. use_real_sigmoid: use real sigmoid from tensorflow for probablity. Returns: Computation of sign with stochastic sampling with straight through gradient. """ def __init__(self, alpha=None, temperature=6.0, use_real_sigmoid=True): super().__init__(alpha=alpha) self.alpha = alpha self.bits = 1 self.temperature = temperature self.use_real_sigmoid = use_real_sigmoid self.default_alpha = 1.0 self.scale = None def __str__(self): flags = [] if self.alpha is not None: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.temperature != 6.0: flags.append("temperature=" + str(self.temperature)) if not self.use_real_sigmoid: flags.append("use_real_sigmoid=" + str(int(self.use_real_sigmoid))) return "stochastic_binary(" + ",".join(flags) + ")" def __call__(self, x): def stochastic_output(): if isinstance(self.alpha, six.string_types): assert self.alpha in ["auto", "auto_po2"] len_axis = len(x.shape) if len_axis > 1: if K.image_data_format() == "channels_last": axis = list(range(len_axis - 1)) else: axis = list(range(1, len_axis)) else: axis = [0] std = K.std(x, axis=axis, keepdims=True) + K.epsilon() else: std = 1.0 if self.use_real_sigmoid: p = tf.keras.backend.sigmoid(self.temperature * x / std) else: p = _sigmoid(self.temperature * x / std) r = tf.random.uniform(tf.shape(x)) q = tf.sign(p - r) q += 1.0 - tf.abs(q) q_non_stochastic = tf.sign(x) q_non_stochastic += 1.0 - tf.abs(q_non_stochastic) scale = _get_least_squares_scale(self.alpha, x, q_non_stochastic) self.scale = scale return x + tf.stop_gradient(-x + scale * q) output = tf_utils.smart_cond( K.learning_phase(), stochastic_output, lambda: binary.__call__(self, x) ) return output def _set_trainable_parameter(self): if self.alpha is None: self.alpha = "auto_po2" def max(self): """Get the maximum value that stochastic_binary can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return 1.0 else: return max(1.0, self.alpha) def min(self): """Get the minimum value that stochastic_binary can respresent.""" if self.alpha is None or isinstance(self.alpha, six.string_types): return -1.0 else: return -max(1.0, self.alpha) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "alpha": self.alpha, "temperature": self.temperature, "use_real_sigmoid": self.use_real_sigmoid, } return config @tf.function(jit_compile=True) def fast_relu_quantize(p, m_i, factor): return m_i * tf.clip_by_value(tf.round(p) * factor, 0.0, 1.0 - factor) @quantizer_registry.register_quantizer class quantized_relu(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes a quantized relu to a number of bits. Modified from: [https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow] Assume h(x) = +1 with p = sigmoid(x), -1 otherwise, the expected value of h(x) is: E[h(x)] = +1 P(p <= sigmoid(x)) - 1 P(p > sigmoid(x)) = +1 P(p <= sigmoid(x)) - 1 ( 1 - P(p <= sigmoid(x)) ) = 2 P(p <= sigmoid(x)) - 1 = 2 sigmoid(x) - 1, if p is sampled from a uniform distribution U[0,1] If use_sigmoid is 0, we just keep the positive numbers up to 2**integer * (1 - 2**(-bits)) instead of normalizing them, which is easier to implement in hardware. Attributes: bits: number of bits to perform quantization. integer: number of bits to the left of the decimal point. use_sigmoid: if true, we apply sigmoid to input to normalize it. negative_slope: slope when activation < 0, needs to be power of 2. use_stochastic_rounding: if true, we perform stochastic rounding. relu_upper_bound: A float representing an upper bound of the unquantized relu. If None, we apply relu without the upper bound when "is_quantized_clip" is set to false (true by default). Note: The quantized relu uses the quantization parameters (bits and integer) to upper bound. So it is important to set relu_upper_bound appropriately to the quantization parameters. "is_quantized_clip" has precedence over "relu_upper_bound" for backward compatibility. is_quantized_clip: A boolean representing whether the inputs are clipped to the maximum value represented by the quantization parameters. This parameter is deprecated, and the default is set to True for backwards compatibility. Users are encouraged to use "relu_upper_bound" instead. qnoise_factor: float. a scalar from 0 to 1 that represents the level of quantization noise to add. This controls the amount of the quantization noise to add to the outputs by changing the weighted sum of (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x. var_name: String or None. A variable name shared between the tf.Variables created in the build function. If None, it is generated automatically. use_ste: Bool. Whether to use "straight-through estimator" (STE) method or not. use_variables: Bool. Whether to make the quantizer variables to be dynamic tf.Variables or not. Returns: Function that performs relu + quantization to bits >= 0. """ def __init__(self, bits=8, integer=0, use_sigmoid=0, negative_slope=0.0, use_stochastic_rounding=False, relu_upper_bound=None, is_quantized_clip=True, qnoise_factor=1.0, var_name=None, use_ste=True, use_variables=False, enable_fast_inference=False): super().__init__() self.bits = bits self.integer = integer self.use_sigmoid = use_sigmoid self.negative_slope = negative_slope self.use_stochastic_rounding = use_stochastic_rounding self.relu_upper_bound = relu_upper_bound self.is_quantized_clip = is_quantized_clip self.qnoise_factor = qnoise_factor self.use_ste = use_ste assert negative_slope >= 0.0 if negative_slope != 0.0: assert np.mod(np.log2(negative_slope), 1) == 0 self.var_name = var_name self.use_variables = use_variables self.enable_fast_inference = enable_fast_inference def __str__(self): # Converts Tensors to printable strings by converting to a numpy array and # then using regex to remove brackets when there is only one integer bit integer_bits = re.sub( r"\[(\d)\]", r"\g<1>", str(self.integer.numpy() if isinstance(self.integer, tf.Variable ) else self.integer)) flags = [str(self.bits), integer_bits] if self.use_sigmoid or self.use_stochastic_rounding: flags.append(str(int(self.use_sigmoid))) if self.negative_slope: flags.append(str(self.negative_slope)) if self.use_stochastic_rounding: flags.append(str(int(self.use_stochastic_rounding))) return "quantized_relu(" + ",".join(flags) + ")" def __call__(self, x): if self.enable_fast_inference: # This is the fast inference version of the quantizer. m_i = 1 << self.integer p = x * (2 ** (self.bits - self.integer)) factor = 2 ** -self.bits return fast_relu_quantize(p, m_i, factor) if not self.built: self.build(var_name=self.var_name, use_variables=self.use_variables) non_sign_bits = self.bits - (self.negative_slope != 0.0) x = K.cast(x, dtype="float32") m = K.cast(K.pow(2, non_sign_bits), dtype="float32") m_i = K.cast(K.pow(2, self.integer), dtype="float32") # is_quantized_clip has precedence over relu_upper_bound for backward # compatibility. m_f = K.cast( K.pow( tf.constant(2., tf.float32), K.cast(self.integer, dtype="float32") - non_sign_bits), dtype="float32") if self.is_quantized_clip: x_u = tf.where(x <= m_i - m_f, K.relu(x, alpha=self.negative_slope), tf.ones_like(x) * (m_i - m_f)) elif self.relu_upper_bound is not None: x_u = tf.where(x <= self.relu_upper_bound, K.relu(x, alpha=self.negative_slope), tf.ones_like(x) * self.relu_upper_bound) else: x_u = K.relu(x, alpha=self.negative_slope) if self.use_sigmoid: p = _sigmoid(x / m_i) * m xq = m_i * tf.keras.backend.clip( 2.0 * (_round_through(p, self.use_stochastic_rounding) / m) - 1.0, 0.0, 1.0 - 1.0 / m) if self.negative_slope > 0: neg_factor = 1 / (self.negative_slope * m) xq = xq + m_i * self.negative_slope * tf.keras.backend.clip( 2.0 * (_round_through(p * self.negative_slope, self.use_stochastic_rounding) * neg_factor) - 1.0, -1.0, 0.0) else: p = x * m / m_i xq = m_i * tf.keras.backend.clip( _round_through(p, self.use_stochastic_rounding) / m, 0.0, 1.0 - 1.0 / m) if self.negative_slope > 0: neg_factor = 1 / (self.negative_slope * m) xq = xq + m_i * self.negative_slope * ( tf.keras.backend.clip( _round_through(p * self.negative_slope, self.use_stochastic_rounding) * neg_factor, -1.0, 0.0)) if self.relu_upper_bound and not self.is_quantized_clip: xq = tf.where(xq <= self.relu_upper_bound, xq, tf.ones_like(xq) * self.relu_upper_bound) if self.use_ste: return x_u + tf.stop_gradient(self.qnoise_factor * (-x_u + xq)) else: return (1 - self.qnoise_factor) * x_u + tf.stop_gradient( self.qnoise_factor * xq) def max(self): """Get the maximum value that quantized_relu can represent.""" unsigned_bits = self.bits - (self.negative_slope != 0.0) if unsigned_bits > 0: return max( 1.0, np.array( K.pow(2.0, K.cast(self.integer, dtype="float32")), dtype="float32")) else: return 1.0 def min(self): """Get the minimum value that quantized_relu can represent.""" if self.negative_slope == 0.0: return 0.0 unsigned_bits = self.bits - 1 if unsigned_bits > 0: return min( -0.0, -self.negative_slope * np.array( K.pow(2.0, K.cast(self.integer, dtype="float32")), dtype="float32")) else: return -1.0 def range(self): """Returns a list of all values that quantized_relu can represent ordered by their binary representation ascending. """ assert self.use_sigmoid == 0 # current unsupported assert self.negative_slope == 0 # # unsupported unsupported x = np.asarray(range(2**self.bits)) return x * np.array( K.pow(2.0, -self.bits + K.cast(self.integer, dtype="float32")), dtype="float32") @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "bits": self.bits, "integer": self.integer.numpy() if isinstance(self.integer, tf.Variable) else self.integer, "use_sigmoid": self.use_sigmoid, "negative_slope": self.negative_slope, "use_stochastic_rounding": self.use_stochastic_rounding, "relu_upper_bound": self.relu_upper_bound, "qnoise_factor": self.qnoise_factor.numpy() if isinstance( self.qnoise_factor, tf.Variable) else self.qnoise_factor } return config @quantizer_registry.register_quantizer class quantized_ulaw(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes a u-law quantization. Attributes: bits: number of bits to perform quantization. integer: number of bits to the left of the decimal point. symmetric: if true, we will have the same number of values for positive and negative numbers. u: parameter of u-law Returns: Function that performs ulaw + quantization to bits in the range -1.0 to 1.0. """ def __init__(self, bits=8, integer=0, symmetric=0, u=255.0): super().__init__() self.bits = bits self.integer = integer self.symmetric = symmetric self.u = u def __str__(self): flags = [str(self.bits), str(self.integer)] if self.symmetric or self.u != 255.0: flags.append(str(int(self.symmetric))) if self.u != 255.0: flags.append(str(self.u)) return "quantized_ulaw(" + ",".join(flags) + ")" def __call__(self, x): non_sign_bits = self.bits - 1 m = pow(2, non_sign_bits) m_i = pow(2, self.integer) p = _sigmoid(x / m_i) * m rp = 2.0 * (_round_through(p) / m) - 1.0 u_law_p = tf.sign(rp) * tf.keras.backend.log( 1 + self.u * tf.abs(rp)) / tf.keras.backend.log(1 + self.u) xq = m_i * tf.keras.backend.clip(u_law_p, -1.0 + (1.0 * self.symmetric) / m, 1.0 - 1.0 / m) return xq def max(self): """Get the maximum value that quantized_ulaw can represent.""" unsigned_bits = self.bits - 1 if unsigned_bits > 0: return max(1.0, np.power(2.0, self.integer)) else: return 1.0 def min(self): """Get the minimum value that quantized_ulaw can represent.""" unsigned_bits = self.bits - 1 if unsigned_bits > 0: return -max(1.0, np.power(2.0, self.integer)) else: return -1.0 @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "bits": self.bits, "integer": self.integer, "symmetric": self.symmetric, "u": self.u } return config @quantizer_registry.register_quantizer class quantized_tanh(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes a quantized tanh to a number of bits. Modified from: [https://github.com/BertMoons/QuantizedNeuralNetworks-Keras-Tensorflow] Attributes: bits: number of bits to perform quantization. use_stochastic_rounding: if true, we perform stochastic rounding. symmetric: if true, we will have the same number of values for positive and negative numbers. use_real_tanh: if true, use the tanh function from Keras backend, if false, use tanh that is defined as 2 * sigmoid(x) - 1 Returns: Function that performs tanh + quantization to bits in the range -1.0 to 1.0. """ def __init__(self, bits=8, use_stochastic_rounding=False, symmetric=False, use_real_tanh=False): super().__init__() self.bits = bits self.symmetric = symmetric self.use_stochastic_rounding = use_stochastic_rounding self.use_real_tanh = use_real_tanh def __str__(self): flags = [str(self.bits)] if self.use_stochastic_rounding: flags.append(str(int(self.use_stochastic_rounding))) if self.symmetric: flags.append(str(int(self.symmetric))) if self.use_real_tanh: flags.append(str(int(self.use_real_tanh))) return "quantized_tanh(" + ",".join(flags) + ")" def __call__(self, x): non_sign_bits = self.bits - 1 x = K.cast_to_floatx(x) m = K.cast_to_floatx(K.pow(2, non_sign_bits)) p = K.tanh(x) if self.use_real_tanh else 2.0 * _sigmoid(x) - 1.0 return tf.keras.backend.clip( (_round_through(p * m, self.use_stochastic_rounding) / m), -1.0 + (1.0 * self.symmetric) / m, 1.0 - 1.0 / m) def max(self): """Get the maximum value that quantized_tanh can represent.""" return 1.0 - 1.0 / pow(2, self.bits - 1) def min(self): """Get the minimum value that quantized_tanh can represent.""" return -1.0 + (1.0 * self.symmetric) / pow(2, self.bits - 1) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "bits": self.bits, "symmetric": self.symmetric, "use_stochastic_rounding": self.use_stochastic_rounding, "use_real_tanh": self.use_real_tanh } return config @quantizer_registry.register_quantizer class quantized_sigmoid(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Computes a quantized sigmoid to a number of bits. Attributes: bits: number of bits to perform quantization. symmetric: if true, we will have the same number of values for positive and negative numbers. use_real_sigmoid: if true, will use the sigmoid from Keras backend use_stochastic_rounding: if true, we perform stochastic rounding. Returns: Function that performs sigmoid + quantization to bits in the range 0.0 to 1.0. """ def __init__(self, bits=8, symmetric=False, use_real_sigmoid=False, use_stochastic_rounding=False): super().__init__() self.bits = bits self.symmetric = symmetric self.use_real_sigmoid = use_real_sigmoid self.use_stochastic_rounding = use_stochastic_rounding def __str__(self): flags = [str(self.bits)] if self.symmetric: flags.append(str(int(self.symmetric))) if self.use_real_sigmoid: flags.append(str(int(self.use_real_sigmoid))) if self.use_stochastic_rounding: flags.append(str(int(self.use_stochastic_rounding))) return "quantized_sigmoid(" + ",".join(flags) + ")" def __call__(self, x): x = K.cast_to_floatx(x) m = K.cast_to_floatx(K.pow(2, self.bits)) p = K.sigmoid(x) if self.use_real_sigmoid else _sigmoid(x) return tf.keras.backend.clip((_round_through(p*m, self.use_stochastic_rounding) / m), (1.0 * self.symmetric) / m, 1.0 - 1.0 / m) def max(self): """Get the maximum value that quantized_sigmoid can represent.""" return 1.0 - 1.0 / pow(2, self.bits) def min(self): """Get the minimum value that quantized_sigmoid can represent.""" return (1.0 * self.symmetric) / pow(2, self.bits) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): config = { "bits": self.bits, "symmetric": self.symmetric, "use_real_sigmoid": self.use_real_sigmoid, "use_stochastic_rounding": self.use_stochastic_rounding } return config def _clip_power_of_two(x_abs, min_exp, max_exp, max_value, quadratic_approximation=False, use_stochastic_rounding=False, log2_rounding="rnd"): """Clips a tensor using power-of-two quantizer. Args: x_abs: A tensor object. Its elements should be non-negative. min_exp: An integer representing the smallest exponent. max_exp: An integer representing the largest exponent. max_value: A float or None. If it is None, we clip the value to max_value. quadratic_approximation: An boolean representing whether the quadratic approximation is applied. use_stochastic_rounding: An boolean representing whether the stochastic rounding method is applied. log2_rounding: log2 rounding mode. "rnd" and "floor" currently supported, corresponding to tf.round and tf.floor respectively. Returns: A tensor object, the values are clipped by min_exp and max_exp. """ # if quadratic_approximation is True, round to the exponent for sqrt(x), # so that the return value can be divided by two without remainder. log2 = np.log(2.0) # When the elements of x_abs are small than the keras epsilon, # we just overwrite x_abs with eps eps = tf.keras.backend.epsilon() x_filter = tf.where(x_abs < eps, eps, x_abs) if max_value is not None: # If the elements of x_filter has value larger than x_value, clip it. x_filter = tf.where(x_filter >= max_value, tf.ones_like(x_filter) * max_value, x_filter) def power_of_two_clip(x_abs, min_exp, max_exp, quadratic_approximation, use_stochastic_rounding, log2_rounding): assert log2_rounding in ["rnd", "floor"] if quadratic_approximation: q_factor = 2.0 x_input = tf.sqrt(x_abs) else: q_factor = 1.0 x_input = x_abs if log2_rounding == "floor": x_log2 = _floor_through(tf.keras.backend.log(x_input) / log2) elif use_stochastic_rounding: x_log2 = tf_utils.smart_cond( K.learning_phase(), lambda: stochastic_round_po2(x_input), lambda: _round_through(tf.keras.backend.log(x_input) / log2)) else: x_log2 = _round_through(tf.keras.backend.log(x_input) / log2) x_clipped = q_factor * tf.keras.backend.clip(x_log2, min_exp, max_exp) return x_clipped x_clipped = tf.where( x_abs < eps, tf.ones_like(x_abs) * min_exp, power_of_two_clip(x_filter, min_exp, max_exp, quadratic_approximation, use_stochastic_rounding, log2_rounding)) return x_clipped def _need_exponent_sign_bit_check(max_value): """Checks whether the sign bit of exponent is needed. This is used by quantized_po2 and quantized_relu_po2. Args: max_value: the maximum value allowed. Returns: An integer. 1: sign_bit is needed. 0: sign_bit is not needed. """ if max_value is not None: if max_value < 0: raise ValueError("po2 max_value should be non-negative.") if max_value > 1: # if max_value is larger than 1, # the exponent could be positive and negative. # e.g., log(max_value) > 0 when max_value > 1 need_exponent_sign_bit = 1 else: need_exponent_sign_bit = 0 else: # max_value is not specified, so we cannot decide the range. # Then we need to put sign_bit for exponent to be safe need_exponent_sign_bit = 1 return need_exponent_sign_bit def _get_min_max_exponents(non_sign_bits, need_exponent_sign_bit, quadratic_approximation): """Given a bitwidth, gets min and max exponents that it can represent. Args: non_sign_bits: An integer representing the bitwidth of the exponent. need_exponent_sign_bit: An integer representing whether it needs sign bit in exponent. (1: need sign bit. 0: sign bit is not needed.) quadratic_approximation: A boolean representing whether the quadratic approximiation method is enforced. Returns: A tuple of integers: min_exp, max_exp """ effect_bits = non_sign_bits - need_exponent_sign_bit min_exp = -2**(effect_bits) max_exp = 2**(effect_bits) - 1 if quadratic_approximation: max_exp = 2 * (max_exp // 2) return min_exp, max_exp @quantizer_registry.register_quantizer class quantized_po2(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Quantizes to the closest power of 2. Attributes: bits: An integer, the bits allocated for the exponent, its sign and the sign of x. max_value: An float or None. If None, no max_value is specified. Otherwise, the maximum value of quantized_po2 <= max_value use_stochastic_rounding: A boolean, default is False, if True, it uses stochastic rounding and forces the mean of x to be x statstically. quadratic_approximation: A boolean, default is False if True, it forces the exponent to be even number that closted to x. log2_rounding: A string, log2 rounding mode. "rnd" and "floor" currently supported, corresponding to tf.round and tf.floor respectively. qnoise_factor: float. a scalar from 0 to 1 that represents the level of quantization noise to add. This controls the amount of the quantization noise to add to the outputs by changing the weighted sum of (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x. var_name: String or None. A variable name shared between the tf.Variables created in the build function. If None, it is generated automatically. use_ste: Bool. Whether to use "straight-through estimator" (STE) method or not. use_variables: Bool. Whether to make the quantizer variables to be dynamic tf.Variables or not. """ def __init__(self, bits=8, max_value=None, use_stochastic_rounding=False, quadratic_approximation=False, log2_rounding="rnd", qnoise_factor=1.0, var_name=None, use_ste=True, use_variables=False): super().__init__() self.bits = bits self.max_value = max_value self.use_stochastic_rounding = use_stochastic_rounding self.log2_rounding = log2_rounding # if True, round to the exponent for sqrt(x), # so that the return value can be divided by two without remainder. self.quadratic_approximation = quadratic_approximation need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value) non_sign_bits = self.bits - 1 self._min_exp, self._max_exp = _get_min_max_exponents( non_sign_bits, need_exponent_sign_bit, self.quadratic_approximation) # qnoise_factor related attributes self.qnoise_factor = qnoise_factor self.use_ste = use_ste self.var_name = var_name self.use_variables = use_variables def __str__(self): flags = [str(self.bits)] if self.max_value is not None or self.use_stochastic_rounding: flags.append(str(int(self.max_value))) if self.use_stochastic_rounding: flags.append(str(int(self.use_stochastic_rounding))) if self.quadratic_approximation: flags.append( "quadratic_approximation=" + str(int(self.quadratic_approximation))) return "quantized_po2(" + ",".join(flags) + ")" def __call__(self, x): if not self.built: self.build(var_name=self.var_name, use_variables=self.use_variables) x_sign = tf.sign(x) x_sign += (1.0 - tf.abs(x_sign)) x_abs = tf.abs(x) x_clipped = _clip_power_of_two(x_abs, self._min_exp, self._max_exp, self.max_value, self.quadratic_approximation, self.use_stochastic_rounding, self.log2_rounding) xq = x_sign * pow(2.0, x_clipped) if self.use_ste: return x + tf.stop_gradient(self.qnoise_factor * (-x + xq)) else: return (1 - self.qnoise_factor) * x + tf.stop_gradient( self.qnoise_factor * xq) def max(self): """Get the maximum value that quantized_po2 can represent.""" if self.max_value: return max(1.0, self.max_value) else: return max(1.0, 2**self._max_exp) def min(self): """Get the minimum value that quantized_po2 can represent.""" if self.max_value: return -max(1.0, self.max_value) else: return -max(1.0, 2**self._max_exp) @classmethod def from_config(cls, config): return cls(**config) def get_config(self): """Gets configugration of the quantizer. Returns: A dict mapping quantization configuration, including bits: bitwidth for exponents. max_value: the maximum value of this quantized_po2 can represent. use_stochastic_rounding: if True, stochastic rounding is used. quadratic_approximation: if True, the exponent is enforced to be even number, which is the closest one to x. log2_rounding: A string, Log2 rounding mode """ config = { "bits": self.bits, "max_value": self.max_value, "use_stochastic_rounding": self.use_stochastic_rounding, "quadratic_approximation": self.quadratic_approximation, "qnoise_factor": self.qnoise_factor.numpy() if isinstance( self.qnoise_factor, tf.Variable) else self.qnoise_factor, "log2_rounding": self.log2_rounding } return config @quantizer_registry.register_quantizer class quantized_relu_po2(base_quantizer.BaseQuantizer): # pylint: disable=invalid-name """Quantizes x to the closest power of 2 when x > 0 Attributes: bits: An integer, the bits allocated for the exponent and its sign. max_value: default is None, or a non-negative value to put a constraint for the max value. negative_slope: slope when activation < 0, needs to be power of 2. use_stochastic_rounding: A boolean, default is False, if True, it uses stochastic rounding and forces the mean of x to be x statstically. quadratic_approximation: A boolean, default is False if True, it forces the exponent to be even number that is closest to x. log2_rounding: A string, log2 rounding mode. "rnd" and "floor" currently supported, corresponding to tf.round and tf.floor respectively. qnoise_factor: float. a scalar from 0 to 1 that represents the level of quantization noise to add. This controls the amount of the quantization noise to add to the outputs by changing the weighted sum of (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x. var_name: String or None. A variable name shared between the tf.Variables created in the build function. If None, it is generated automatically. use_ste: Bool. Whether to use "straight-through estimator" (STE) method or not. use_variables: Bool. Whether to make the quantizer variables to be dynamic tf.Variables or not. """ def __init__(self, bits=8, max_value=None, negative_slope=0, use_stochastic_rounding=False, quadratic_approximation=False, log2_rounding="rnd", qnoise_factor=1.0, var_name=None, use_ste=True, use_variables=False): super().__init__() self.bits = bits self.max_value = max_value self.negative_slope = negative_slope self.use_stochastic_rounding = use_stochastic_rounding self.log2_rounding = log2_rounding # if True, round to the exponent for sqrt(x), # so that the return value can be divided by two without remainder. self.quadratic_approximation = quadratic_approximation need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value) self._min_exp = -2**(self.bits - need_exponent_sign_bit) self._max_exp = 2**(self.bits - need_exponent_sign_bit) - 1 if self.quadratic_approximation: self._max_exp = 2 * (self._max_exp // 2) assert negative_slope >= 0.0 if negative_slope != 0: assert np.mod(np.log2(negative_slope), 1) == 0 # qnoise_factor related attributes self.qnoise_factor = qnoise_factor self.use_ste = use_ste self.var_name = var_name self.use_variables = use_variables def __str__(self): flags = [str(self.bits)] if self.max_value is not None or self.use_stochastic_rounding: flags.append(str(int(self.max_value))) if self.negative_slope: flags.append(str(self.negative_slope)) if self.use_stochastic_rounding: flags.append(str(int(self.use_stochastic_rounding))) if self.quadratic_approximation: flags.append( "quadratic_approximation=" + str(int(self.quadratic_approximation))) return "quantized_relu_po2(" + ",".join(flags) + ")" def __call__(self, x): if not self.built: self.build(var_name=self.var_name, use_variables=self.use_variables) x_original = x if self.max_value is None: x = K.relu(x, self.negative_slope) else: x = tf.where( x <= self.max_value, K.relu(x, self.negative_slope), tf.ones_like(x) * self.max_value) x_pos_clipped = _clip_power_of_two( K.relu(x_original), self._min_exp, self._max_exp, self.max_value, self.quadratic_approximation, self.use_stochastic_rounding, self.log2_rounding) x_neg_clipped = _clip_power_of_two( K.relu(-x_original) * self.negative_slope, self._min_exp, self._max_exp, self.max_value, self.quadratic_approximation, self.use_stochastic_rounding, self.log2_rounding) xq = tf.where( tf.logical_or(x_original >= 0.0, self.negative_slope == 0.0), pow(2.0, x_pos_clipped), -pow(2.0, x_neg_clipped)) if self.use_ste: return x + tf.stop_gradient(self.qnoise_factor * (-x + xq)) else: return (1 - self.qnoise_factor) * x + tf.stop_gradient( self.qnoise_factor * xq) def max(self): """Get the maximum value that quantized_relu_po2 can represent.""" if self.max_value: return max(1.0, self.max_value) else: return max(1.0, 2**self._max_exp) def min(self): """Get the minimum value that quantized_relu_po2 can represent.""" if self.negative_slope == 0.0: return 2**self._min_exp unsigned_bits = self.bits - 1 if unsigned_bits > 0: return min(2**self._min_exp, - self.negative_slope * np.power(2.0, unsigned_bits)) else: return 2**self._min_exp @classmethod def from_config(cls, config): return cls(**config) def get_config(self): """Gets configugration of the quantizer. Returns: A dict mapping quantization configuration, including bits: bitwidth for exponents. max_value: the maximum value of this quantized_relu_po2 can represent. use_stochastic_rounding: if True, stochastic rounding is used. quadratic_approximation: if True, the exponent is enforced to be even number, which is the closest one to x. log2_rounding: A string, Log2 rounding mode """ config = { "bits": self.bits, "max_value": self.max_value, "negative_slope": self.negative_slope, "use_stochastic_rounding": self.use_stochastic_rounding, "quadratic_approximation": self.quadratic_approximation, "qnoise_factor": self.qnoise_factor.numpy() if isinstance( self.qnoise_factor, tf.Variable) else self.qnoise_factor, "log2_rounding": self.log2_rounding } return config @quantizer_registry.register_quantizer class quantized_hswish(quantized_bits): # pylint: disable=invalid-name """Computes a quantized hard swish to a number of bits. # TODO(mschoenb97): Update to inherit from quantized_linear. Equation of h-swisth function in mobilenet v3: hswish(x) = x * ReluY(x + relu_shift) / Y Y is relu_upper_bound Attributes: bits: number of bits to perform quantization, also known as word length. integer: number of integer bits. symmetric: if True, the quantization is in symmetric mode, which puts restricted range for the quantizer. Otherwise, it is in asymmetric mode, which uses the full range. alpha: a tensor or None, the scaling factor per channel. If None, the scaling factor is 1 for all channels. use_stochastic_rounding: if true, we perform stochastic rounding. This parameter is passed on to the underlying quantizer quantized_bits which is used to quantize h_swish. scale_axis: which axis to calculate scale from qnoise_factor: float. a scalar from 0 to 1 that represents the level of quantization noise to add. This controls the amount of the quantization noise to add to the outputs by changing the weighted sum of (1 - qnoise_factor)*unquantized_x + qnoise_factor*quantized_x. var_name: String or None. A variable name shared between the tf.Variables created in the build function. If None, it is generated automatically. use_ste: Bool. Whether to use "straight-through estimator" (STE) method or not. use_variables: Bool. Whether to make the quantizer variables to be dynamic tf.Variables or not. relu_shift: integer type, representing the shift amount of the unquantized relu. relu_upper_bound: integer type, representing an upper bound of the unquantized relu. If None, we apply relu without the upper bound when "is_quantized_clip" is set to false (true by default). Note: The quantized relu uses the quantization parameters (bits and integer) to upper bound. So it is important to set relu_upper_bound appropriately to the quantization parameters. "is_quantized_clip" has precedence over "relu_upper_bound" for backward compatibility. """ def __init__( self, bits=8, integer=0, symmetric=0, alpha=None, use_stochastic_rounding=False, scale_axis=None, qnoise_factor=1.0, var_name=None, use_variables=False, relu_shift: int = 3, relu_upper_bound: int = 6, ): super().__init__( bits=bits, integer=integer, symmetric=symmetric, keep_negative=True, alpha=alpha, use_stochastic_rounding=use_stochastic_rounding, scale_axis=scale_axis, qnoise_factor=qnoise_factor, var_name=var_name, use_variables=use_variables, ) self.relu_shift = relu_shift self.relu_upper_bound = relu_upper_bound def __str__(self): """Converts Tensors to printable strings.""" integer_bits = re.sub( r"\[(\d)\]", r"\g<1>", str( self.integer.numpy() if isinstance(self.integer, tf.Variable) else self.integer ), ) assert isinstance(integer_bits, int) flags = [ str(self.bits), integer_bits, str(int(self.symmetric)), "relu_shift=" + str(self.relu_shift), "relu_upper_bound=" + str(self.relu_upper_bound), ] if not self.keep_negative: flags.append("keep_negative=False") if self.alpha: alpha = str(self.alpha) if isinstance(self.alpha, six.string_types): alpha = "'" + alpha + "'" flags.append("alpha=" + alpha) if self.use_stochastic_rounding: flags.append( "use_stochastic_rounding=" + str(int(self.use_stochastic_rounding)) ) return "quantized_hswish(" + ",".join(flags) + ")" def __call__(self, x): assert self.relu_upper_bound > 0, ( "relu_upper_bound must be a positive value, " f"found {self.relu_upper_bound} instead" ) assert ( self.relu_shift > 0 ), f"relu_shift must be a positive value, found {self.relu_shift} instead" x = K.cast_to_floatx(x) shift_x = x + self.relu_shift relu_x = tf.where( shift_x <= self.relu_upper_bound, K.relu(shift_x, alpha=False), tf.ones_like(shift_x) * self.relu_upper_bound, ) hswish_x = tf.math.multiply(x, relu_x) / self.relu_upper_bound return super(quantized_hswish, self).__call__(hswish_x) def min(self): """Gets the minimum value that quantized_hswish can represent.""" # get the minimum value that the number of bits can represent min_quant = super(quantized_hswish, self).min() # In the negative end, the hswish function becomes # x * (x + relu_shift) / relu_upper_bound # the min value of this parabolic function is # - relu_shift^2 / (4 * relu_upper_bound) denom = 4 * self.relu_upper_bound min_parabolic = -self.relu_shift * self.relu_shift / denom if min_quant >= min_parabolic: return min_quant # get the quantized value of min_parabolic return super(quantized_hswish, self).call(min_parabolic) def get_config(self): """Add relu_shift and relu_upper_bound to the config file.""" base_config = super(quantized_hswish, self).get_config() config = { "relu_shift": self.relu_shift, "relu_upper_bound": self.relu_upper_bound, } out_config = dict(list(base_config.items()) + list(config.items())) return out_config # TODO(akshayap): Update to use registry for quantizers instead of globals(). def get_quantizer(identifier): """Gets the quantizer. Args: identifier: An quantizer, which could be dict, string, or callable function. Returns: A quantizer class or quantization function from this file. For example, Quantizer classes: quantized_bits, quantized_po2, quantized_relu_po2, binary, stochastic_binary, ternary, stochastic_ternary, etc. Quantization functions: binary_sigmoid, hard_sigmoid, soft_sigmoid, etc. Raises: ValueError: An error occurred when quantizer cannot be interpreted. """ if identifier is None: return None if isinstance(identifier, dict): return deserialize_keras_object( identifier, module_objects=globals(), printable_module_name="quantizer") elif isinstance(identifier, six.string_types): return safe_eval(identifier, globals()) elif callable(identifier): return identifier else: raise ValueError("Could not interpret quantizer identifier: " + str(identifier)) def get_quantized_initializer(w_initializer, w_range): """Gets the initializer and scales it by the range.""" if isinstance(w_initializer, six.string_types): if w_initializer == "he_normal": return initializers.VarianceScaling( scale=2 * w_range, mode="fan_in", distribution="normal", seed=None) if w_initializer == "he_uniform": return initializers.VarianceScaling( scale=2 * w_range, mode="fan_in", distribution="uniform", seed=None) elif w_initializer == "glorot_normal": return initializers.VarianceScaling( scale=w_range, mode="fan_avg", distribution="normal", seed=None) elif w_initializer == "glorot_uniform": return initializers.VarianceScaling( scale=w_range, mode="fan_avg", distribution="uniform", seed=None) elif w_initializer == "random_uniform": return initializers.RandomUniform(-w_range, w_range) return w_initializer ================================================ FILE: qkeras/registry.py ================================================ # Copyright 2024 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """General purpose registy for registering classes or functions. The registry can be used along with decorators to record any class/function. Sample usage: # Setup registry with decorator. _REGISTRY = registry.Registry() def register(cls): _REGISTRY.register(cls) def lookup(name): return _REGISTRY.lookup(name) # Register instances. @register def foo_task(): ... @register def bar_task(): ... # Retrieve instances. def my_executor(): ... my_task = lookup("foo_task") ... """ class Registry(object): """A registry class to record class representations or function objects.""" def __init__(self): """Initializes the registry.""" self._container = {} def register(self, item, name=None): """Register an item. Args: item: Python item to be recorded. name: Optional name to be used for recording item. If not provided, item.__name__ is used. """ if not name: name = item.__name__ self._container[name] = item def lookup(self, name): """Retrieves an item from the registry. Args: name: Name of the item to lookup. Returns: Registered item from the registry. """ return self._container[name] ================================================ FILE: qkeras/safe_eval.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements a safe evaluation using globals().""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from pyparsing import delimitedList from pyparsing import Group from pyparsing import Optional from pyparsing import Regex from pyparsing import Suppress import logging from tensorflow import keras def Num(s): """Tries to convert string to either int or float.""" try: try: return int(s) except ValueError: return float(s) except ValueError: # this should be always true. if it isn't int or float, it should be str assert ( (s[0] == '"' and s[-1] == '"') or (s[0] == "'" and s[-1] == "'") ) s = s[1:-1] return s def Str(s): return s[1:-1] def IsNum(s): try: try: int(s) return True except ValueError: float(s) return True except ValueError: return False def IsBool(s): if s in ["True", "False"]: return True else: return False def IsNone(s): return s == "None" def Bool(s): return True if "True" in s else False def ListofNums(s): # remove list brackets s = s.replace("[", "").replace("]", "") list_s = s.split(" ") return [Num(e) for e in list_s] def IsListofNums(s): # remove list brackets s = s.replace("[", "").replace("]", "") list_s = s.split(" ") if len(list_s) > 1: for e in list_s: # if any of the elements is not a number return false if not IsNum(e): return False return True else: return False def GetArg(s): if IsBool(s): return Bool(s) elif IsNum(s): return Num(s) elif IsNone(s): return None elif IsListofNums(s): return ListofNums(s) else: return Str(s) def GetParams(s): """Extracts args and kwargs from string.""" # modified from https://stackoverflow.com/questions/38799223/parse-string-to-identify-kwargs-and-args # pylint: disable=line-too-long _lparen = Suppress("(") # pylint: disable=invalid-name _rparen = Suppress(")") # pylint: disable=invalid-name _eq = Suppress("=") # pylint: disable=invalid-name data = (_lparen + Optional( delimitedList( Group(Regex(r"[^=,)\s]+") + Optional(_eq + Regex(u"[^,)]*"))) ) ) + _rparen) items = data.parseString(s).asList() # need to make sure that kwargs only happen after args are processed args = [GetArg(i[0]) for i in items if len(i) == 1] kwargs = {i[0]: GetArg(i[1]) for i in items if len(i) == 2} # check for syntax error for i in range(1, len(items)): if (len(items[i]) == 1) and (len(items[i-1]) == 2): raise SyntaxError(("Error with item " + str(i) + " \n" + " parsing string " + s + "\n" + " Items: " + str(items) + "\n" + " Item[" + str(i-1) +"] :" + str(items[i-1]) + "\n" + " Item[" + str(i) +"] :" + str(items[i]) )) return args, kwargs def safe_eval(eval_str, op_dict, *params, **kwparams): # pylint: disable=invalid-name """Replaces eval by a safe eval mechanism.""" function_split = eval_str.split("(") quantizer = op_dict.get(function_split[0], None) if len(function_split) == 2: args, kwargs = GetParams("(" + function_split[1]) else: args = [] kwargs = {} args = args + list(params) for k in kwparams: kwargs[k] = kwparams[k] # must be Keras activation object if None if quantizer is None: logging.info("keras dict %s", function_split[0]) quantizer = keras.activations.get(function_split[0]) if len(function_split) == 2 or args or kwargs: return quantizer(*args, **kwargs) else: if isinstance(quantizer, type): # Check if quantizer is a class return quantizer() else: # Otherwise it is a function, so just return it return quantizer ================================================ FILE: qkeras/utils.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import copy import json import tempfile import types import numpy as np import os import six import re import networkx as nx import tensorflow as tf import tensorflow.keras.backend as K from tensorflow.keras.models import Model from tensorflow.keras.models import model_from_json from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper from tensorflow_model_optimization.python.core.sparsity.keras import prune_registry from tensorflow_model_optimization.python.core.sparsity.keras import prunable_layer from .qlayers import Clip from .qconv2d_batchnorm import QConv2DBatchnorm from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm from .qlayers import QActivation from .qlayers import QAdaptiveActivation from .qpooling import QAveragePooling2D from .qlayers import QDense from .qlayers import QInitializer from .qconvolutional import QConv1D from .qconvolutional import QConv2D from .qconvolutional import QConv2DTranspose from .qrecurrent import QSimpleRNN from .qrecurrent import QSimpleRNNCell from .qrecurrent import QLSTM from .qrecurrent import QLSTMCell from .qrecurrent import QGRU from .qrecurrent import QGRUCell from .qrecurrent import QBidirectional from .qconvolutional import QSeparableConv1D from .qconvolutional import QSeparableConv2D from .qconvolutional import QDepthwiseConv2D from .qnormalization import QBatchNormalization from .qpooling import QGlobalAveragePooling2D from .qtools import qgraph from .quantizers import binary from .quantizers import bernoulli from .quantizers import get_weight_scale from .quantizers import quantized_bits from .quantizers import quantized_relu from .quantizers import quantized_ulaw from .quantizers import quantized_tanh from .quantizers import quantized_sigmoid from .quantizers import quantized_po2 from .quantizers import quantized_relu_po2 from .quantizers import stochastic_binary from .quantizers import stochastic_ternary from .quantizers import ternary # from .google_internals.experimental_quantizers import quantized_bits_learnable_scale # from .google_internals.experimental_quantizers import parametric_quantizer_d_xmax from .safe_eval import safe_eval from tensorflow.python.ops import math_ops from .qmac import QScaleShift REGISTERED_LAYERS = [ "QActivation", "QAdaptiveActivation", "QDense", "QConv1D", "QConv2D", "QSeparableConv1D", "QSeparableConv2D", "QDepthwiseConv2D", "QConv2DTranspose", "QSimpleRNN", "QLSTM", "QGRU", "QBidirectional", "QBatchNormalization", "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm", "QAveragePooling2D", "QGlobalAveragePooling2D", ] def find_bn_fusing_layer_pair(model, custom_objects={}): """Finds layers that can be fused with the following batchnorm layers. Args: model: input model custom_objects: Dict of model specific objects needed for cloning. Returns: Dict that marks all the layer pairs that need to be fused. Note: supports sequential and non-sequential model """ fold_model = clone_model(model, custom_objects) (graph, _) = qgraph.GenerateGraphFromModel( fold_model, "quantized_bits(8, 0, 1)", "quantized_bits(8, 0, 1)") qgraph.GraphAddSingleSourceSingleSink(graph) qgraph.GraphRemoveNodeWithNodeType(graph, "InputLayer") qgraph.GraphPropagateActivationsToEdges(graph) # Finds the Batchnorm nodes and mark them. layers_followed_by_bn = {} bn_layers_to_skip = set() for node_id in nx.topological_sort(graph): node = graph.nodes[node_id] layer = node["layer"][0] if layer: successor_ids = list(graph.successors(node_id)) is_single = len(successor_ids) == 1 successor_layer = graph.nodes[successor_ids[0]]["layer"][0] followed_by_bn = (successor_layer.__class__.__name__ == "QBatchNormalization") # TODO(lishanok): extend to QDense types enable_bn_fusing = layer.__class__.__name__ in [ "QConv2D", "QDepthwiseConv2D" ] and is_single and followed_by_bn if enable_bn_fusing: layers_followed_by_bn[layer.name] = successor_layer.name bn_layers_to_skip.add(successor_layer.name) return (layers_followed_by_bn, bn_layers_to_skip) def add_bn_fusing_weights(prev_layer, bn_layer, saved_weights): """Adds additional fusing weights to saved_weights. In hardware inference, we need to combined fuse previous layer's output with the following batchnorm op. z[i] = bn(y[i]) = inv[i] * y'[i] * scale[i] - bias'[i] is the final output of the previous layer and bn layer, with: inv[i] = gamma[i]* rsqrt(variance[i]^2+epsilon) is computed from the bn layer weights y'[i] is the i-th channel output from the previous layer (before scale) scale[i] is the i-th channel kernel quantizer scale fused_bias[i] = inv[i] * bias[i] + beta[i] - inv[i]*mean[i] where bias is the bias term from the previous layer, beta and mean are the bn layer weights. Args: prev_layer: QKeras layer, could be QConv2D/QDepthwiseConv2D/QDense. bn_layer: The following QBatchNormalization layer that needs to be fused with the previous layer. saved_weights: Dict. The centralized weights dictionary that exports relevant weights and parameters for hardware inference. """ bn_qs = bn_layer.quantizers bn_ws = bn_layer.get_weights() if bn_qs[4] is not None: assert bn_qs[0] is None and bn_qs[3] is None, ( "If using the inverse quantizer, the gamma and variance quantizers " "should not be used in order to avoid quantizing a value twice.") def apply_quantizer(quantizer, input_weight): if quantizer: weight = tf.constant(input_weight) weight = tf.keras.backend.eval(quantizer(weight)) else: weight = input_weight return weight # Quantize respective bn layer weights gamma = 1.0 beta = 0 idx = 0 if bn_layer.scale: gamma = apply_quantizer(bn_layer.gamma_quantizer_internal, bn_ws[idx]) idx += 1 if bn_layer.center: beta = apply_quantizer(bn_layer.beta_quantizer_internal, bn_ws[idx]) idx += 1 mean = apply_quantizer(bn_layer.mean_quantizer_internal, bn_ws[idx]) idx += 1 variance = apply_quantizer(bn_layer.variance_quantizer_internal, bn_ws[idx]) # Compute inv[i] inv = gamma * math_ops.rsqrt(variance + bn_layer.epsilon) inv = inv.numpy() if bn_layer.inverse_quantizer_internal is not None: quantizer = bn_layer.inverse_quantizer_internal inv = tf.keras.backend.eval(quantizer(inv)) # Compute fused_bias[i] if prev_layer.use_bias: cur_weights = prev_layer.get_weights() assert len(cur_weights) == 2, ("Weights should have length of 2. Found" f"{len(cur_weights)} instead.") prev_bias = cur_weights[-1] else: prev_bias = 0 b_prime = inv * prev_bias + beta - inv * mean saved_weights[prev_layer.name]["enable_bn_fusing"] = True saved_weights[prev_layer.name]["fused_bn_layer_name"] = bn_layer.name saved_weights[prev_layer.name]["bn_inv"] = inv saved_weights[prev_layer.name]["fused_bias"] = b_prime # Model utilities: before saving the weights, we want to apply the quantizers def model_save_quantized_weights(model, filename=None, custom_objects={}): """Quantizes model for inference and save it. Takes a model with weights, apply quantization function to weights and returns a dictionary with quantized weights. User should be aware that "po2" quantization functions cannot really be quantized in meaningful way in Keras. So, in order to preserve compatibility with inference flow in Keras, we do not covert "po2" weights and biases to exponents + signs (in case of quantize_po2), but return instead (-1)**sign*(2**round(log2(x))). In the returned dictionary, we will return the pair (sign, round(log2(x))). Special care needs to be given to quantized_bits(alpha="auto_po2") as well. Since in this quantizer, hardware needs the integer weights and scale for hardware inference, this function will return the pair (scale, integer_weights) in the returned dictionary. Arguments: model: model with weights to be quantized. filename: if specified, we will save the hdf5 containing the quantized weights so that we can use them for inference later on. custom_objects: Dict of model specific objects needed to load/store. Returns: dictionary containing layer name and quantized weights that can be used by a hardware generator. """ saved_weights = {} # Find the conv/dense layers followed by Batchnorm layers (fusing_layer_pair_dict, bn_layers_to_skip) = find_bn_fusing_layer_pair( model, custom_objects ) print("... quantizing model") for layer in model.layers: if hasattr(layer, "get_quantizers"): # weights for software inference weights = [] signs = [] scales = [] # weights for hardware inference hw_weights = [] if any(isinstance(layer, t) for t in [ QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]): qs = layer.get_quantizers() ws = layer.get_folded_weights() elif any(isinstance(layer, t) for t in [QSimpleRNN, QLSTM, QGRU]): qs = layer.get_quantizers()[:-1] ws = layer.get_weights() else: qs = layer.get_quantizers() ws = layer.get_weights() has_sign = False has_scale = False enable_bn_fusing = False # isinstance() might fail due to inconsistent module import path. # Use __class__.__name__ instead. layer_class = layer.__class__.__name__ if (layer_class == "QBatchNormalization" and layer.name in bn_layers_to_skip): # Mark current bn layer to be fused with the previous layer enable_bn_fusing = True for quantizer, weight in zip(qs, ws): if quantizer: weight = tf.constant(weight) weight = tf.keras.backend.eval(quantizer(weight)) # If quantizer is power-of-2 (quantized_po2 or quantized_relu_po2), # we would like to process it here. # # However, we cannot, because we will lose sign information as # quanized_po2 will be represented by the tuple (sign, log2(abs(w))). # # In addition, we will not be able to use the weights on the model # any longer. # # So, instead of "saving" the weights in the model, we will return # a dictionary so that the proper values can be propagated. # Weights store the weight in the format that software inference uses. weights.append(weight) q_name = "" if quantizer: if isinstance(quantizer, six.string_types): q_name = quantizer elif hasattr(quantizer, "__name__"): q_name = quantizer.__name__ elif hasattr(quantizer, "name"): q_name = quantizer.name elif hasattr(quantizer, "__class__"): q_name = quantizer.__class__.__name__ if quantizer and ("_po2" in q_name): # Quantized_relu_po2 does not have a sign. if q_name == "quantized_po2": has_sign = True sign = np.sign(weight) # Makes sure values are -1 or +1 only sign += (1.0 - np.abs(sign)) # hw_weight store the weight in the format that hardware inference # uses. hw_weight = np.round(np.log2(np.abs(weight))) signs.append(sign) scales.append([]) elif (q_name == "quantized_bits" and quantizer.alpha == "auto_po2"): unsigned_bits = quantizer.bits - quantizer.keep_negative m = K.cast_to_floatx(pow(2, unsigned_bits)) m_i = K.cast_to_floatx(K.pow(2, quantizer.integer)) assert hasattr(quantizer.scale, "numpy") or isinstance( quantizer.scale, np.ndarray), ( "The auto_po2 quantizer has to be called first in order " "to know the values of scale.") scale = quantizer.scale if isinstance( quantizer.scale, np.ndarray) else quantizer.scale.numpy() scale = K.cast_to_floatx(scale) # Make sure scale is power of 2 values log2val = np.log2(scale) diff = np.round(log2val) - log2val assert np.all(diff == 0), "scale must be power of 2 values!" # Convert fixed point weight to integer weight, just hw_weight = weight * m / m_i # Because hw_weight is integer weights, set scale = scale * m_i / m # so that when we can multiply scale with the integer weight # during hardware inference to get the fixed point weights scale = scale * m_i / m has_scale = True scales.append(scale) else: hw_weight = weight signs.append([]) scales.append([]) hw_weights.append(hw_weight) # Save the weights in the format that hardware inference uses saved_weights[layer.name] = {"weights": hw_weights, "enable_bn_fusing": enable_bn_fusing} if (isinstance(layer, QAveragePooling2D) or isinstance(layer, QGlobalAveragePooling2D)): if isinstance(layer, QAveragePooling2D): pool_area = layer.pool_size if isinstance(layer.pool_size, int): pool_area = layer.pool_size * layer.pool_size else: pool_area = np.prod(layer.pool_size) else: pool_area = layer.compute_pooling_area(input_shape=layer.input_shape) saved_weights[ layer.name]["q_mult_factor"] = layer.average_quantizer_internal( 1.0 / pool_area).numpy() saved_weights[layer.name]["mult_factor"] = 1.0 / pool_area saved_weights[layer.name]["pool_area"] = pool_area if has_sign: saved_weights[layer.name]["signs"] = signs if has_scale: saved_weights[layer.name]["scales"] = scales if not any(isinstance(layer, t) for t in [ QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]): # Set layer weights in the format that software inference uses layer.set_weights(weights) else: print(layer.name, " conv and batchnorm weights cannot be seperately" " quantized because they will be folded before quantization.") # adjust weights for bn fusing if necessary if layer.name in fusing_layer_pair_dict.keys(): print(f"Fuse {layer.name} output with " f"{fusing_layer_pair_dict[layer.name]} for hardware inference.") add_bn_fusing_weights( prev_layer=layer, bn_layer=model.get_layer(fusing_layer_pair_dict[layer.name]), saved_weights=saved_weights) else: if layer.get_weights(): print(" ", layer.name, "has not been quantized") if filename: model.save_weights(filename) return saved_weights def quantize_activation(layer_config, activation_bits): """Replaces activation by quantized activation functions.""" str_act_bits = str(activation_bits) # relu -> quantized_relu(bits) # tanh -> quantized_tanh(bits) # sigmoid -> quantized_sigmoid(bits) # more to come later if layer_config.get("activation", None) is None: return if isinstance(layer_config["activation"], six.string_types): a_name = layer_config["activation"] elif isinstance(layer_config["activation"], types.FunctionType): a_name = layer_config["activation"].__name__ else: a_name = layer_config["activation"].__class__.__name__ if a_name == "linear": return if a_name == "relu": layer_config["activation"] = "quantized_relu(" + str_act_bits + ")" elif a_name == "tanh": layer_config["activation"] = "quantized_tanh(" + str_act_bits + ")" elif a_name == "sigmoid": layer_config["activation"] = "quantized_sigmoid(" + str_act_bits + ")" def get_config(quantizer_config, layer, layer_class, parameter=None): """Returns search of quantizer on quantizer_config.""" quantizer = quantizer_config.get(layer["config"]["name"], quantizer_config.get(layer_class, None)) if quantizer is not None and parameter is not None: quantizer = quantizer.get(parameter, None) return quantizer def is_TFOpLambda_layer(layer): return layer.__class__.__name__ == "TFOpLambda" def get_y_from_TFOpLambda(model_cfg, layer): """Get the value of "y" from the TFOpLambda layer's configuration. Args: model_cfg: dictionary type, model.get_config() output layer: a given layer instance Return: value of "y" for a TFOpLambda layer. 'y' here corresponds to how tensorflow stores TFOpLambda layer parameter in serialization. for example, TFOpLambda(func), where func is tf.multiply(input_tensor, 3). "y" would be the value 3. """ for layer_config in model_cfg["layers"]: op_name = layer_config["config"]["name"] class_name = layer_config["class_name"] # TODO(lishanok): Extend support for other TFOpLambda types when needed if op_name == layer.name and class_name == "TFOpLambda": assert ("tf.__operators__.add" in op_name or "tf.math.multiply" in op_name), "TFOpLambda layer {} not supported!".format(op_name) return layer_config["inbound_nodes"][-1][-1]["y"] return None def convert_to_folded_model(model): """Find conv/dense layers followed by bn layers and fold them. Args: model: input model Returns: new model without bn layers list of layers being folded Note: supports sequential and non-sequential model """ fold_model = clone_model(model) model_cfg = model.get_config() (graph, _) = qgraph.GenerateGraphFromModel( fold_model, "quantized_bits(8, 0, 1)", "quantized_bits(8, 0, 1)") qgraph.GraphAddSingleSourceSingleSink(graph) qgraph.GraphRemoveNodeWithNodeType(graph, "InputLayer") qgraph.GraphPropagateActivationsToEdges(graph) # Finds the Batchnorm nodes to be deleted and mark them. bn_nodes_to_delete = [] layers_to_fold = [] for node_id in nx.topological_sort(graph): layer_input_tensors = [] node = graph.nodes[node_id] layer = node["layer"][0] if layer: successor_ids = list(graph.successors(node_id)) is_single = len(successor_ids) == 1 successor_layer = graph.nodes[successor_ids[0]]["layer"][0] followed_by_bn = (successor_layer.__class__.__name__ == "BatchNormalization") # TODO(lishanok): extend to QDense types is_foldable = layer.__class__.__name__ in [ "Conv2D", "DepthwiseConv2D" ] and is_single and followed_by_bn if is_foldable: # Removes the batchnorm node from the graph. bn_nodes_to_delete.append(successor_ids[0]) layers_to_fold.append(layer.name) # Deletes the marked nodes. for node_id in bn_nodes_to_delete: qgraph.GraphRemoveNode(graph, node_id) # Modifies model according to the graph. model_outputs = [] x = model_inputs = fold_model.inputs for node_id in nx.topological_sort(graph): layer_input_tensors = [] node = graph.nodes[node_id] layer = node["layer"][0] if layer: # Gets layer input tensors from graph edge. for parent_node_id in graph.predecessors(node_id): edge = graph.edges[(parent_node_id, node_id)] input_tensor = edge["tensor"] layer_input_tensors.append(input_tensor) # We call the layer to get output tensor. if len(layer_input_tensors) == 1: layer_input_tensors = layer_input_tensors[0].deref() else: layer_input_tensors = [t.deref() for t in layer_input_tensors] if is_TFOpLambda_layer(layer): # TFOpLambda layer requires one extra input: "y" y = get_y_from_TFOpLambda(model_cfg, layer) x = layer(layer_input_tensors, y) else: x = layer(layer_input_tensors) # Replaces edge tensors between the predecessor and successor for u, v in graph.edges(node_id): # u is current layer node, v is successor layer node # graph[u][v] is the edge between the two nodes # Replace the tensor on this edge so that the input tensor for the # successor layer can be updated accordingly. graph[u][v]["tensor"] = x.ref() if v == -2 and x not in model_outputs: # When it is output layer, add the output tensor of this layer # into model outputs. model_outputs.append(x) new_model = Model(inputs=model_inputs, outputs=model_outputs) return new_model, layers_to_fold def model_quantize(model, quantizer_config, activation_bits, custom_objects=None, transfer_weights=False, prefer_qadaptiveactivation=False, enable_bn_folding=False): """Creates a quantized model from non-quantized model. The quantized model translation is based on json interface of Keras, which requires a custom_objects dictionary for "string" types. Because of the way json works, we pass "string" objects for the quantization mechanisms and we perform an eval("string") which technically is not safe, but it will do the job. The quantizer_config is a dictionary with the following form. { Dense_layer_name: { "kernel_quantizer": "quantizer string", "bias_quantizer": "quantizer_string" }, Conv2D_layer_name: { "kernel_quantizer": "quantizer string", "bias_quantizer": "quantizer_string" }, Activation_layer_name: "quantizer string", "QActivation": { "relu": "quantizer_string" }, "QConv2D": { "kernel_quantizer": "quantizer string", "bias_quantizer": "quantizer_string" }, "QBatchNormalization": {} } In the case of "QBidirectional", we can follow the same form as above. The specified configuration will be used for both forward and backwards layer. { "Bidirectional" : { "kernel_quantizer" : "quantizer string", "bias_quantizer" : "quantizer string", "recurrent_quantizer" : "quantizer string" } } In the case of "QActivation", we can modify only certain types of activations, for example, a "relu". In this case we represent the activation name by a dictionary, or we can modify all activations, without representhing as a set. We right now require a default case in case we cannot find layer name. This simplifies the dictionary because the simplest case, we can just say: { "default": { "kernel": "quantized_bits(4)", "bias": "quantized_bits(4)" } } and this will quantize all layers' weights and bias to be created with 4 bits. Arguments: model: model to be quantized quantizer_config: dictionary (as above) with quantized parameters activation_bits: number of bits for quantized_relu, quantized_tanh, quantized_sigmoid custom_objects: dictionary following keras recommendations for json translation. transfer_weights: if true, weights are to be transfered from model to qmodel. prefer_qadaptiveactivation: Bool. If true, try to use QAdaptiveActivation over QActivation whenever possible enable_bn_folding: Bool. If true, fold conv/dense layers with following batch normalization layers whenever possible. use QConv2DBatchnorm for example, to replace conv2d layers Returns: qmodel with quantized operations and custom_objects. """ if enable_bn_folding: # Removes bn layers from the model and find a list of layers to fold. model, layers_to_fold = convert_to_folded_model(model) if len(layers_to_fold) == 0: # If no layers to fold, no need to perform folding. enable_bn_folding = False if not custom_objects: custom_objects = {} # Let's make a deep copy to make sure our objects are not shared elsewhere. jm = copy.deepcopy(json.loads(model.to_json())) custom_objects = copy.deepcopy(custom_objects) config = jm["config"] layers = config["layers"] def quantize_rnn(layer, quantizer_config): q_name = "Q" + layer["class_name"] # Needs to add kernel, recurrent bias quantizers. kernel_quantizer = get_config( quantizer_config, layer, q_name, "kernel_quantizer") recurrent_quantizer = get_config( quantizer_config, layer, q_name, "recurrent_quantizer") if layer["config"]['use_bias']: bias_quantizer = get_config( quantizer_config, layer, q_name, "bias_quantizer") else: bias_quantizer = None state_quantizer = get_config( quantizer_config, layer, q_name, "state_quantizer") # This is to avoid unwanted transformations. if kernel_quantizer is None: return layer["config"]["kernel_quantizer"] = kernel_quantizer layer["config"]["recurrent_quantizer"] = recurrent_quantizer layer["config"]["bias_quantizer"] = bias_quantizer layer["config"]["state_quantizer"] = state_quantizer # If activation is present, add activation here. activation = get_config( quantizer_config, layer, q_name, "activation_quantizer") if activation: layer["config"]["activation"] = activation else: quantize_activation(layer["config"], activation_bits) # If recurrent activation is present, add activation here. if layer["class_name"] in ["LSTM", "GRU"]: recurrent_activation = get_config( quantizer_config, layer, q_name, "recurrent_activation_quantizer") if recurrent_activation: layer["config"]["recurrent_activation"] = recurrent_activation layer["class_name"] = q_name registered_name = layer.pop("registered_name", None) if registered_name: layer["registered_name"] = q_name for layer in layers: layer_config = layer["config"] # Dense becomes QDense, Conv1D becomes QConv1D etc # Activation converts activation functions. if layer["class_name"] in [ "Dense", "Conv1D", "Conv2D", "Conv2DTranspose", "SeparableConv1D", "SeparableConv2D" ]: if (layer["class_name"] in ["Dense", "Conv2D"] and enable_bn_folding and layer["name"] in layers_to_fold): # Only fold if current layer is followed by BN layer. q_name = "Q" + layer["class_name"] + "Batchnorm" layer_config["use_bias"] = True # Folded layers require a bias # Sets ema_freeze_delay and folding_mode specific to # QDepthwiseConv2DBatchnorm layer config. folding_mode = get_config( quantizer_config, layer, q_name, "folding_mode") layer_config["folding_mode"] = ( folding_mode if folding_mode else "ema_stats_folding") ema_freeze_delay = get_config( quantizer_config, layer, q_name, "ema_freeze_delay") layer_config["ema_freeze_delay"] = ( ema_freeze_delay if ema_freeze_delay else None) else: q_name = "Q" + layer["class_name"] # Needs to add kernel/bias quantizers. kernel_quantizer = get_config( quantizer_config, layer, q_name, "kernel_quantizer") if layer_config["use_bias"]: bias_quantizer = get_config( quantizer_config, layer, q_name, "bias_quantizer") else: bias_quantizer = None if (kernel_quantizer is None and q_name == "Q" + layer["class_name"] + "Batchnorm"): # Tries none-folded layer quantizer as a back up. kernel_quantizer = get_config( quantizer_config, layer, "Q" + layer["class_name"], "kernel_quantizer") bias_quantizer = get_config( quantizer_config, layer, "Q" + layer["class_name"], "bias_quantizer") # This is to avoid unwanted transformations. if kernel_quantizer is None: continue layer["class_name"] = q_name layer_config["kernel_quantizer"] = kernel_quantizer layer_config["bias_quantizer"] = bias_quantizer # If activation is present, add activation here. quantizer = get_config( quantizer_config, layer, q_name, "activation_quantizer") if quantizer: layer_config["activation"] = quantizer else: quantize_activation(layer_config, activation_bits) elif layer["class_name"] == "DepthwiseConv2D": if enable_bn_folding and layer["name"] in layers_to_fold: q_name = "QDepthwiseConv2DBatchnorm" layer_config["use_bias"] = True # Folded layers require a bias # Sets ema_freeze_delay and folding_mode specific to # QDepthwiseConv2DBatchnorm layers. folding_mode = get_config( quantizer_config, layer, q_name, "folding_mode") layer_config["folding_mode"] = ( folding_mode if folding_mode else "ema_stats_folding") ema_freeze_delay = get_config( quantizer_config, layer, q_name, "ema_freeze_delay") layer_config["ema_freeze_delay"] = ( ema_freeze_delay if ema_freeze_delay else None) else: q_name = "QDepthwiseConv2D" # Needs to add kernel/bias quantizers. depthwise_quantizer = get_config(quantizer_config, layer, q_name, "depthwise_quantizer") if layer_config["use_bias"]: bias_quantizer = get_config(quantizer_config, layer, q_name, "bias_quantizer") else: bias_quantizer = None if depthwise_quantizer is None and q_name == "QDepthwiseConv2DBatchnorm": # Tries none-folded layer quantizer as a back up. depthwise_quantizer = get_config( quantizer_config, layer, "QDepthwiseConv2D", "depthwise_quantizer") bias_quantizer = get_config( quantizer_config, layer, "QDepthwiseConv2D", "bias_quantizer") # This is to avoid unwanted transformations. if depthwise_quantizer is None: continue layer["class_name"] = q_name layer_config["depthwise_quantizer"] = depthwise_quantizer layer_config["bias_quantizer"] = bias_quantizer # If activation is present, add activation here. quantizer = get_config(quantizer_config, layer, q_name, "activation_quantizer",) if quantizer: layer_config["activation"] = quantizer else: quantize_activation(layer_config, activation_bits) elif layer["class_name"] in ["SimpleRNN", "LSTM", "GRU"]: quantize_rnn(layer, quantizer_config) elif layer["class_name"] == "Bidirectional": forward_layer_quantizer_config = { layer_config["layer"]["config"]["name"]: get_config(quantizer_config, layer, "QBidirectional") } quantize_rnn(layer["config"]["layer"], forward_layer_quantizer_config) if "backward_layer" in layer_config: backward_layer_quantizer_config = { layer_config["backward_layer"]["config"]["name"]: get_config(quantizer_config, layer, "QBidirectional") } quantize_rnn(layer["config"]["backward_layer"], backward_layer_quantizer_config) layer["class_name"] = "QBidirectional" elif layer["class_name"] == "Activation": if prefer_qadaptiveactivation: # Try to find QAdaptiveActivation first quantizer = get_config(quantizer_config, layer, "QAdaptiveActivation") is_qadaptiveactivation = True if quantizer is None: # Try QActivation as a backup quantizer = get_config(quantizer_config, layer, "QActivation") is_qadaptiveactivation = False else: # Tries to find QActivation first. quantizer = get_config(quantizer_config, layer, "QActivation") is_qadaptiveactivation = False if quantizer is None: # Try QAdaptiveActivation as a backup quantizer = get_config(quantizer_config, layer, "QAdaptiveActivation") is_qadaptiveactivation = True # This is to avoid softmax from quantizing in autoq. if quantizer is None: continue # If quantizer exists in dictionary related to this name, # use it, otherwise, use normal transformations. if not isinstance(quantizer, dict) or quantizer.get( layer_config["activation"], None): # Only change activation layer if we will use a quantized activation. layer["class_name"] = ("QAdaptiveActivation" if is_qadaptiveactivation else "QActivation") if isinstance(quantizer, dict): quantizer = quantizer[layer_config["activation"]] if quantizer: if is_qadaptiveactivation: assert quantizer.find(",") < 0, \ "Only integer bits should be defined for QAdaptiveActivation" layer_config["total_bits"] = int(re.sub(r"[^\d]", "", quantizer)) quantizer = re.sub(r"\(.*", "", quantizer) # remove params layer_config["activation"] = quantizer else: quantize_activation(layer_config, activation_bits) # We have to do this because of other instances of ReLU. elif layer["class_name"] in ["ReLU", "relu", "LeakyReLU"]: quantizer = get_config(quantizer_config, layer, "QActivation") # This is to avoid unwanted transformations. if quantizer is None: continue if layer["class_name"] == "LeakyReLU": negative_slope = layer["config"]["alpha"] elif layer["class_name"] == "relu": max_value = layer["config"]["max_value"] negative_slope = layer["config"]["alpha"] threshold = layer["config"]["threshold"] else: # ReLU from mobilenet max_value = layer["config"]["max_value"] negative_slope = layer["config"]["negative_slope"] threshold = layer["config"]["threshold"] if negative_slope > 0: q_name = "leakyrelu" else: q_name = "relu" # If quantizer exists in dictionary related to this name, # use it, otherwise, use normal transformations. if not isinstance(quantizer, dict) or quantizer.get(q_name, None): # Only change activation layer if we will use a quantized activation. layer["class_name"] = "QActivation" # Remove relu specific configurations # remember that quantized relu's are always upper bounded. if layer["class_name"] == "LeakyReLU": del layer["config"]["alpha"] elif layer["class_name"] == "relu": del layer["config"]["max_value"] del layer["config"]["alpha"] del layer["config"]["threshold"] else: # ReLU from mobilenet del layer["config"]["max_value"] del layer["config"]["negative_slope"] del layer["config"]["threshold"] if isinstance(quantizer, dict): quantizer = quantizer[q_name] if quantizer: layer["config"]["activation"] = quantizer else: quantize_activation(layer["config"], activation_bits) elif layer["class_name"] == "BatchNormalization": # We will assume at least QBatchNormalization or # layer name is in dictionary to enable conversion # otherwise we will just skip it. if ( layer_config["name"] not in quantizer_config and "QBatchNormalization" not in quantizer_config ): continue layer["class_name"] = "QBatchNormalization" # Needs to add kernel/bias quantizers. gamma_quantizer = get_config( quantizer_config, layer, "QBatchNormalization", "gamma_quantizer") beta_quantizer = get_config( quantizer_config, layer, "QBatchNormalization", "beta_quantizer") mean_quantizer = get_config( quantizer_config, layer, "QBatchNormalization", "mean_quantizer") variance_quantizer = get_config( quantizer_config, layer, "QBatchNormalization", "variance_quantizer") layer_config["gamma_quantizer"] = gamma_quantizer layer_config["beta_quantizer"] = beta_quantizer layer_config["mean_quantizer"] = mean_quantizer layer_config["variance_quantizer"] = variance_quantizer elif layer["class_name"] in ["AveragePooling2D", "GlobalAveragePooling2D"]: q_name = "Q" + layer["class_name"] # Adds the average quanizer to config. average_quantizer = get_config( quantizer_config, layer, q_name, "average_quantizer") # This is to avoid unwanted transformations. if average_quantizer is None: continue layer["class_name"] = q_name layer_config["average_quantizer"] = average_quantizer # Adds activation to config. quantizer = get_config( quantizer_config, layer, q_name, "activation_quantizer") if quantizer: layer_config["activation"] = quantizer else: quantize_activation(layer_config, activation_bits) registered_name = layer.pop("registered_name", None) if registered_name: layer["registered_name"] = q_name or registered_name # We need to keep a dictionary of custom objects as our quantized library # is not recognized by keras. qmodel = quantized_model_from_json(json.dumps(jm), custom_objects) # If transfer_weights is true, we load the weights from model to qmodel. if transfer_weights and not enable_bn_folding: for layer, qlayer in zip(model.layers, qmodel.layers): if layer.get_weights(): qlayer.set_weights(copy.deepcopy(layer.get_weights())) return qmodel def _add_supported_quantized_objects(custom_objects): """Map all the quantized objects.""" custom_objects["QInitializer"] = QInitializer custom_objects["QDense"] = QDense custom_objects["QConv1D"] = QConv1D custom_objects["QConv2D"] = QConv2D custom_objects["QConv2DTranspose"] = QConv2DTranspose custom_objects["QSimpleRNNCell"] = QSimpleRNNCell custom_objects["QSimpleRNN"] = QSimpleRNN custom_objects["QLSTMCell"] = QLSTMCell custom_objects["QLSTM"] = QLSTM custom_objects["QGRUCell"] = QGRUCell custom_objects["QGRU"] = QGRU custom_objects["QBidirectional"] = QBidirectional custom_objects["QDepthwiseConv2D"] = QDepthwiseConv2D custom_objects["QSeparableConv1D"] = QSeparableConv1D custom_objects["QSeparableConv2D"] = QSeparableConv2D custom_objects["QActivation"] = QActivation custom_objects["QAdaptiveActivation"] = QAdaptiveActivation custom_objects["QBatchNormalization"] = QBatchNormalization custom_objects["Clip"] = Clip custom_objects["quantized_bits"] = quantized_bits custom_objects["bernoulli"] = bernoulli custom_objects["stochastic_ternary"] = stochastic_ternary custom_objects["ternary"] = ternary custom_objects["stochastic_binary"] = stochastic_binary custom_objects["binary"] = binary custom_objects["quantized_relu"] = quantized_relu custom_objects["quantized_ulaw"] = quantized_ulaw custom_objects["quantized_tanh"] = quantized_tanh custom_objects["quantized_sigmoid"] = quantized_sigmoid custom_objects["quantized_po2"] = quantized_po2 custom_objects["quantized_relu_po2"] = quantized_relu_po2 # custom_objects["quantized_bits_learnable_scale"] = quantized_bits_learnable_scale custom_objects["QConv2DBatchnorm"] = QConv2DBatchnorm custom_objects["QDepthwiseConv2DBatchnorm"] = QDepthwiseConv2DBatchnorm custom_objects["QAveragePooling2D"] = QAveragePooling2D custom_objects["QGlobalAveragePooling2D"] = QGlobalAveragePooling2D custom_objects["QScaleShift"] = QScaleShift def clone_model(model, custom_objects=None): """Clones model with custom_objects.""" if not custom_objects: custom_objects = {} # Makes a deep copy to make sure our objects are not shared elsewhere. custom_objects = copy.deepcopy(custom_objects) _add_supported_quantized_objects(custom_objects) json_string = model.to_json() qmodel = quantized_model_from_json(json_string, custom_objects=custom_objects) qmodel.set_weights(model.get_weights()) return qmodel def quantized_model_from_json(json_string, custom_objects=None): if not custom_objects: custom_objects = {} # Makes a deep copy to make sure our objects are not shared elsewhere. custom_objects = copy.deepcopy(custom_objects) _add_supported_quantized_objects(custom_objects) qmodel = model_from_json(json_string, custom_objects=custom_objects) return qmodel def load_qmodel(filepath, custom_objects=None, compile=True): """Loads quantized model from Keras's model.save() h5 file. Arguments: filepath: one of the following: - string, path to the saved model - h5py.File or h5py.Group object from which to load the model - any file-like object implementing the method `read` that returns `bytes` data (e.g. `io.BytesIO`) that represents a valid h5py file image. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. compile: Boolean, whether to compile the model after loading. Returns: A Keras model instance. If an optimizer was found as part of the saved model, the model is already compiled. Otherwise, the model is uncompiled and a warning will be displayed. When `compile` is set to False, the compilation is omitted without any warning. """ if not custom_objects: custom_objects = {} # Makes a deep copy to make sure our objects are not shared elsewhere. custom_objects = copy.deepcopy(custom_objects) _add_supported_quantized_objects(custom_objects) qmodel = tf.keras.models.load_model(filepath, custom_objects=custom_objects, compile=compile) return qmodel def print_model_sparsity(model): """Prints sparsity for the pruned layers in the model.""" def _get_sparsity(weights): return 1.0 - np.count_nonzero(weights) / float(weights.size) print("Model Sparsity Summary ({})".format(model.name)) print("--") for layer in model.layers: if isinstance(layer, pruning_wrapper.PruneLowMagnitude): prunable_weights = layer.layer.get_prunable_weights() elif isinstance(layer, prunable_layer.PrunableLayer): prunable_weights = layer.get_prunable_weights() elif prune_registry.PruneRegistry.supports(layer): weight_names = prune_registry.PruneRegistry._weight_names(layer) prunable_weights = [getattr(layer, weight) for weight in weight_names] else: prunable_weights = None if prunable_weights: print("{}: {}".format( layer.name, ", ".join([ "({}, {})".format(weight.name, str(_get_sparsity(K.get_value(weight)))) for weight in prunable_weights ]))) print("\n") def get_model_sparsity(model, per_layer=False, allow_list=None): """Calculates the sparsity of the model's weights and biases. Quantizes the model weights using model_save_quantized_weights (but does not save the quantized weights) before calculating the proportion of weights and biases set to zero. Arguments: model: The model to use to calculate sparsity. Assumes that this is a QKeras model with trained weights. per_layer: If to return a per-layer breakdown of sparsity allow_list: A list of layer class names that sparsity will be calculated for. If set to None, a default list will be used. Returns: A float value representing the proportion of weights and biases set to zero in the quantized model. If per_layer is True, it also returns a per-layer breakdown of model sparsity formatted as a list of tuples in the form (, ) """ # Checks if to use a default list of allowed layers to calculate sparsity. if allow_list is None: allow_list = [ "QDense", "Dense", "QConv1D", "Conv1D", "QConv2D", "Conv2D", "QDepthwiseConv2D", "DepthwiseConv2D", "QSeparableConv1D", "SeparableConv1D", "QSeparableConv2D", "SeparableConv2D", "QOctaveConv2D", "QSimpleRNN", "RNN", "QLSTM", "QGRU", "QConv2DTranspose", "Conv2DTranspose", "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm", ] # Quantizes the model weights for a more accurate sparsity calculation. model_save_quantized_weights(model) # Calculates the sparsity layer by layer. layer_sparsity = [] total_sparsity = 0. all_weights = [] for layer in model.layers: if hasattr(layer, "quantizers") and layer.__class__.__name__ in allow_list: if layer.__class__.__name__ in [ "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: weights_to_examine = layer.get_folded_weights() else: weights_to_examine = layer.get_weights() layer_weights = [] for weight in weights_to_examine: try: weight_numpy = weight.ravel() except AttributeError: # In case of EagerTensor. weight_numpy = weight.numpy().ravel() layer_weights.append(weight_numpy) all_weights.append(weight_numpy) layer_weights = np.concatenate(layer_weights) layer_sparsity.append((layer.name, np.mean(layer_weights == 0))) if len(all_weights) > 0: # Average the sparsity for the entire model. all_weights = np.concatenate(all_weights) total_sparsity = np.mean(all_weights == 0) if per_layer: return (total_sparsity, layer_sparsity) else: return total_sparsity def quantized_model_debug(model, X_test, plot=False, plt_instance=None): """Debugs and plots model weights and activations. Args: model: The QKeras model to debug X_test: The sample data to use to give to model.predict plot: Bool. If to plot the results. plt_instance: A matplotlib.pyplot instance used to plot in an IPython environment. """ assert (plt_instance and plot) or not plot, ( "plt_instance is required if plt is True") outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in REGISTERED_LAYERS: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=model.inputs, outputs=outputs) y_pred = model_debug.predict(X_test) print("{:30} {: 8.4f} {: 8.4f}".format( "input", np.min(X_test), np.max(X_test))) for n, p in zip(output_names, y_pred): layer = model.get_layer(n) if (layer.__class__.__name__ in "QActivation" or layer.__class__.__name__ in "QAdaptiveActivation"): alpha = get_weight_scale(layer.activation, p) else: alpha = 1.0 print( "{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p / alpha), np.max(p / alpha)), end="") if alpha != 1.0: print(" a[{: 8.4f} {:8.4f}]".format(np.min(alpha), np.max(alpha))) if plot and layer.__class__.__name__ in [ "QConv1D", "QConv2D", "QConv2DTranspose", "QDense", "QActivation", "QAdaptiveActivation", "QSimpleRNN", "QLSTM", "QGRU", "QBidirectional", "QSeparableConv1D", "QSeparableConv2D" ]: plt_instance.hist(p.flatten(), bins=25) plt_instance.title(layer.name + "(output)") plt_instance.show() alpha = None if layer.__class__.__name__ not in [ "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: weights_to_examine = layer.get_weights() else: weights_to_examine = layer.get_folded_weights() for i, weights in enumerate(weights_to_examine): if hasattr(layer, "get_quantizers") and layer.get_quantizers()[i]: weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) if i == 0 and layer.__class__.__name__ in [ "QConv1D", "QConv2D", "QConv2DTranspose", "QDense", "QSimpleRNN", "QLSTM", "QGRU", "QSeparableConv1D", "QSeparableConv2D", "QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm" ]: alpha = get_weight_scale(layer.get_quantizers()[i], weights) # if alpha is 0, let's remove all weights. alpha_mask = (alpha == 0.0) weights = np.where(alpha_mask, weights * alpha, weights / alpha) if plot: plt_instance.hist(weights.flatten(), bins=25) plt_instance.title(layer.name + "(weights)") plt_instance.show() print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") if alpha is not None and isinstance(alpha, np.ndarray): print(" a({: 10.6f} {: 10.6f})".format( np.min(alpha), np.max(alpha)), end="") print("") def quantized_model_dump(model, x_test, output_dir=None, layers_to_dump=[]): """Dumps tensors of target layers to binary files. Arguments: model: QKeras model object. x_test: numpy type, test tensors to generate output tensors. output_dir: a string for the directory to hold binary data. layers_to_dump: a list of string, specified layers by layer customized name. """ outputs = [] y_names = [] if not output_dir: with tempfile.TemporaryDirectory() as output_dir: print("temp dir", output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) print("create dir", output_dir) for layer in model.layers: if not layers_to_dump or layer.name in layers_to_dump: y_names.append(layer.name) outputs.append(layer.output) # Gather the tensor outputs from specified layers at layers_to_dump. model_debug = Model(inputs=model.inputs, outputs=outputs) y_pred = model_debug.predict(x_test) # Dumps tensors to files. for name, tensor_data in zip(y_names, y_pred): filename = os.path.join(output_dir, name + ".bin") print("writing the layer output tensor to ", filename) with open(filename, "w") as fid: tensor_data.astype(np.float32).tofile(fid) def clone_model_and_freeze_auto_po2_scale( orig_model, orig_model_path=None, quantize_model_weights=False): """Clone model and freeze the scale value of auto_po2 type quantizers. Args: orig_model: original model which will be used to clone the new model. If set to None, the function will load the original model from orig_model_path argument. orig_model_path: The path to the original model file. If set to None, the function will load the original model from the orig_model argument. quantize_model_weights: Bool to quantize weights to HW format. If set to False, the model weights will be in float format. If set to True, the model weights will be in HW format and the function will also check if the hw weights extracted from the new model matches the original model. Returns: A tuple of the new model and the new model's hw weights. Note: + When using this function to retrain model with fixed scale value. Set quantize_model_weights to False in this case. + This function only supports a collection of common layers that will use auto_po2 quantizers. For less common layers, it will raise errors and we will add more support case by case. Example usage: model, _ = clone_model_and_freeze_auto_po2_scale( orig_model_path="path/to/model", quantize_model_weights=False) """ def _create_bn_layer(layer_cfg, bn_inv_quantizer): # Clone batch normalization layer with the new inverse quantizer. if bn_inv_quantizer is not None: layer_cfg["inverse_quantizer"]["config"] = bn_inv_quantizer.get_config() return QBatchNormalization(**layer_cfg) def _create_qconv2d_layer(layer_cfg, kernel_quantizer): # Clone QConv2D layer wiht the new kernel quantizers. if kernel_quantizer is not None: layer_cfg["kernel_quantizer"]["config"] = kernel_quantizer.get_config() return QConv2D(**layer_cfg) def _create_qdepthwise_conv2d_layer(layer_cfg, depthwise_quantizer): # Clone QDepthwiseConv2D layer with the new depthwise_quantizer quantizer. if depthwise_quantizer is not None: layer_cfg["depthwise_quantizer"][ "config"] = depthwise_quantizer.get_config() return QDepthwiseConv2D(**layer_cfg) def _create_qdense_layer(layer_cfg, kernel_quantizer): # Clone QDense layer with the new kernel quantizer. if kernel_quantizer is not None: layer_cfg["kernel_quantizer"]["config"] = kernel_quantizer.get_config() return QDense(**layer_cfg) def _create_other_layer(orig_layer): # Clone other layers. config = orig_layer.get_config() return orig_layer.__class__.from_config(config) def _create_quantized_bits_with_post_training_scale(q): # Create a new quantized_bits instance with the fixed scale value. if q is not None: q_cfg = q.get_config() q_cfg["post_training_scale"] = q.scale.numpy() q = quantized_bits(**q_cfg) return q def _find_auto_po2_quantizer(layer): # Find the auto_po2 quantizer in the layer. Note that we allow at # most one auto_po2 quantizer in each layer due to the limitation of # the current HW implementation. num_auto_po2_quantizers = 0 auto_po2_quantizer = None if hasattr(layer, "quantizers"): for q in layer.quantizers: if hasattr(q, "alpha") and q.alpha == "auto_po2": num_auto_po2_quantizers += 1 auto_po2_quantizer = q if num_auto_po2_quantizers > 1: raise ValueError( f"{layer.name} has more than one auto_po2 quantizer. " "Please check if this is expected.") else: return auto_po2_quantizer def _check_hw_weights_equal(hw_weights_1, hw_weights_2): # Check if the hw weights extracted from the new model matches the # original model. for layer_name in hw_weights_2.keys(): for key in hw_weights_2[layer_name].keys(): val1 = hw_weights_2[layer_name][key] val2 = hw_weights_1[layer_name][key] if isinstance(val1, list): for (v1, v2) in zip(val1, val2): if not np.all(v1 == v2): raise ValueError( f"{layer_name}/{key}: No Match! v1={v1}, v2={v2}") else: if not np.all(val1 == val2): raise ValueError( f"{layer_name}/{key}: No Match! val1={val1}, val2={val2}") # Load the original model with float weights. # Note: weights will be quantized later in silicon flow by calling # model_save_quantized_weights. if orig_model is not None and orig_model_path is not None: raise ValueError( "Only one of orig_model and orig_model_path can be set.") elif orig_model is None and orig_model_path is None: raise ValueError( "One of orig_model and orig_model_path must be set.") elif orig_model_path is not None: orig_model = load_qmodel(orig_model_path, compile=False) # Quantize model weights and compute quantizer scale values. quantized_model = tf.keras.models.clone_model(orig_model) quantized_model.set_weights(orig_model.get_weights()) # In silicon flow, weight binary files are generated from hw weights. orig_hw_weights = model_save_quantized_weights( quantized_model) # Create a new model with fixed scale quantizers. x = inputs = tf.keras.Input( shape=orig_model.input_shape[1:], name=orig_model.layers[0].name) for layer in quantized_model.layers[1:]: layer_class = layer.__class__.__name__ auto_po2_quantizer = _find_auto_po2_quantizer(layer) auto_po2_quantizer_with_frozen_scale = ( _create_quantized_bits_with_post_training_scale(auto_po2_quantizer)) layer_cfg = layer.get_config() # To be compatible with different python versions, we do not use # match-case style here. if layer_class == "QConv2D": x = _create_qconv2d_layer(layer_cfg, auto_po2_quantizer_with_frozen_scale)(x) elif layer_class == "QDepthwiseConv2D": x = _create_qdepthwise_conv2d_layer( layer_cfg, auto_po2_quantizer_with_frozen_scale)(x) elif layer_class == "QBatchNormalization": x = _create_bn_layer(layer_cfg, auto_po2_quantizer_with_frozen_scale)(x) elif layer_class == "QDense": x = _create_qdense_layer(layer_cfg, auto_po2_quantizer_with_frozen_scale)(x) else: x = _create_other_layer(layer)(x) new_model = tf.keras.Model(inputs, x) # Set the weights of the new model to the original model (float weights). new_model.set_weights(orig_model.get_weights()) # Check if the new model still has auto_po2 quantizer. # This function only supports a colleciton of common layers that will use # auto_po2 quantizers. For less common layers, we need to add extra support # in the future. for layer in new_model.layers: q = _find_auto_po2_quantizer(layer) if q is not None and q.post_training_scale is None: raise ValueError( f"{layer.name} in the new model still has auto_po2 quantizer with " "adaptive scales. Please check if this is expected!") new_hw_weights = None if quantize_model_weights: new_hw_weights = model_save_quantized_weights(new_model) # Check if the hw weights extracted from the new model matches the original # nima model. _check_hw_weights_equal(orig_hw_weights, new_hw_weights) return new_model, new_hw_weights ================================================ FILE: requirements.txt ================================================ tensorflow>=2.5.0rc0 numpy>=1.16.5 pyparser pandas>=1.1.0 matplotlib>=3.3.0 scipy>=1.4.1 setuptools>=41.0.0 argparse>=1.4.0 pyasn1<0.5.0,>=0.4.6 requests<3,>=2.21.0 pyparsing pytest>=4.6.9 tensorflow-model-optimization>=0.2.1 networkx>=2.1 # prompt_toolkit is required by IPython. # IPython is required by keras-tuner. # Later prompt_toolkit version requires Python 3.6.2, # which is not supported. cl/380856863 prompt_toolkit<=3.0.18 keras-tuner==1.0.3 scikit-learn>=0.23.1 tqdm>=4.48.0 ================================================ FILE: setup.cfg ================================================ [metadata] name = qkeras version = 0.9.0 author = Google author_email = qkeras-team@google.com description = A quantization extension to Keras that provides drop-in layer replacements long_description = file: README.md long_description_content_type = text/markdown url = https://github.com/google/qkeras classifiers = Programming Language :: Python :: 3 License :: OSI Approved :: Apache Software License Operating System :: OS Independent [options] packages = find: python_requires = >=3.7 [options.packages.find] where = qkeras [aliases] test=pytest ================================================ FILE: setup.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Setup script for qkeras.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import io import setuptools with io.open("README.md", "r", encoding="utf8") as fh: long_description = fh.read() setuptools.setup( name="QKeras", version="0.9.0", author="QKeras Team", author_email="qkeras-team@google.com", maintainer="Shan Li", maintainer_email="lishanok@google.com", packages=setuptools.find_packages(), scripts=[], url="", license="Apache v.2.0", description="Quantization package for Keras", long_description=long_description, install_requires=[ "numpy>=1.16.0", "scipy>=1.4.1", "pyparser", "setuptools>=41.0.0", "tensorflow-model-optimization>=0.2.1", "networkx>=2.1", "keras-tuner>=1.0.1", "scikit-learn>=0.23.1", "tqdm>=4.48.0" ], setup_requires=[ "pytest-runner", ], tests_require=[ "pytest", ], ) ================================================ FILE: tests/automatic_conversion_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import pytest from tensorflow.keras.layers import * from tensorflow.keras.models import * from qkeras import * from qkeras.utils import model_quantize def create_network(): xi = Input((28,28,1)) x = Conv2D(32, (3, 3))(xi) x = Activation("relu", name='relu_act')(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = Activation("softmax")(x) x = QConv2D(32, (3, 3), activation="quantized_relu(4)")(x) return Model(inputs=xi, outputs=x) def create_network_with_bn(): xi = Input((28,28,1)) x = Conv2D(32, (3, 3))(xi) x = BatchNormalization(axis=-1)(x) x = Activation("relu", name='relu_act')(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = Activation("softmax")(x) x = DepthwiseConv2D((3, 3))(x) x = BatchNormalization(axis=-1)(x) return Model(inputs=xi, outputs=x) def create_network_sequential(): model = Sequential([ Conv2D(32, (3, 3), input_shape=(28,28,1)), Activation('relu'), Conv2D(32, (3, 3), activation="relu"), Activation('softmax'), QConv2D(32, (3, 3), activation="quantized_relu(4)") ]) return model def test_linear_activation(): m = create_network() assert m.layers[1].activation.__name__ == "linear", "test failed" def test_linear_activation_conversion(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary", "activation_quantizer": "binary" } } qq = model_quantize(m, d, 4) assert str(qq.layers[1].activation) == "binary()" def test_no_activation_conversion_to_quantized(): m = create_network() d = {"QConv2D": {"kernel_quantizer": "binary", "bias_quantizer": "binary"}} qq = model_quantize(m, d, 4) assert qq.layers[2].__class__.__name__ == "Activation" assert qq.layers[4].__class__.__name__ == "Activation" def test_automatic_conversion_from_relu_to_qr(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }} qq = model_quantize(m, d, 4) assert str(qq.layers[3].activation) == "quantized_relu(4,0)" def test_conversion_from_relu_activation_to_qr_qactivation(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" } } qq = model_quantize(m, d, 4) assert qq.layers[2].__class__.__name__ == "QActivation" assert str(qq.layers[2].quantizer) == "ternary()" assert qq.layers[4].__class__.__name__ == "Activation" def test_conversion_from_relu_activation_to_qadaptiveactivation(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QAdaptiveActivation": { "relu": "quantized_relu(8)" } } qq = model_quantize(m, d, 4) assert qq.layers[2].__class__.__name__ == "QAdaptiveActivation" assert str(qq.layers[2].quantizer).startswith("quantized_relu(8,") assert qq.layers[4].__class__.__name__ == "Activation" def test_conversion_qadaptiveactivation_with_preference(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "relu_act": { "relu": "quantized_relu(8)" } } # Test with QActivation preference qq1 = model_quantize(m, d, 4, prefer_qadaptiveactivation=False) assert qq1.layers[2].__class__.__name__ == "QActivation" assert str(qq1.layers[2].quantizer).startswith("quantized_relu(8,") assert qq1.layers[4].__class__.__name__ == "Activation" # Test with QAdaptiveActivation preference qq2 = model_quantize(m, d, 4, prefer_qadaptiveactivation=True) assert qq2.layers[2].__class__.__name__ == "QAdaptiveActivation" assert str(qq2.layers[2].quantizer).startswith("quantized_relu(8,") assert qq2.layers[4].__class__.__name__ == "Activation" def test_sequential_model_conversion(): m = create_network_sequential() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }} qq = model_quantize(m, d, 4) assert str(qq.layers[2].activation) == "quantized_relu(4,0)" def test_folded_layer_conversion(): # create a sequential model with conv2d layer and activation layers m1 = create_network() # create a sequantial model with conv2d layer followed by bn layer m2 = create_network_with_bn() # quantization config d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QDepthwiseConv2D": { "depthwise_quantizer": "binary", "bias_quantizer": "binary" }, "QConv2DBatchnorm": { "kernel_quantizer": "ternary", "bias_quantizer": "ternary", }, "QDepthwiseConv2DBatchnorm": { "depthwise_quantizer": "ternary", "bias_quantizer": "ternary", }, "relu_act": { "relu": "quantized_relu(8)" } } # test when model has no layer to fold # desired behavior: un-folded layers qq1 = model_quantize(m1, d, 4, enable_bn_folding=True) assert qq1.layers[1].__class__.__name__ == "QConv2D" assert str(qq1.layers[1].quantizers[0]).startswith("binary") # test when the 1st conv2d layers needs to fold but the 2nd conv2d layer # does not (not followed by bn layer) # desired behavior: 1st conv2d is folded, 2nd conv2d unfolded # also test the depthwiseconv2d layer should fold qq2 = model_quantize(m2, d, 4, enable_bn_folding=True) assert qq2.layers[1].__class__.__name__ == "QConv2DBatchnorm" assert str(qq2.layers[1].quantizers[0]).startswith("ternary") assert qq2.layers[3].__class__.__name__ == "QConv2D" assert str(qq2.layers[3].quantizers[0]).startswith("binary") assert qq2.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm" assert str(qq2.layers[5].quantizers[0]).startswith("ternary") # test when there are layers to fold but folding is disabled # desired behavior: all conv2d/depthwise2d layers are not folded qq3 = model_quantize(m2, d, 4, enable_bn_folding=False) assert qq3.layers[1].__class__.__name__ == "QConv2D" assert str(qq3.layers[1].quantizers[0]).startswith("binary") assert qq3.layers[2].__class__.__name__ == "BatchNormalization" assert str(qq3.layers[3].quantizer).startswith("quantized_relu") assert qq3.layers[6].__class__.__name__ == "QDepthwiseConv2D" assert str(qq3.layers[6].quantizers[0]).startswith("binary") # test when QConv2DBatchnorm quantizer, e.g., is not given in config # desired behavior: quantizers for QConv2DBatchnorm layer fall back to QConv2D # quantizers d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QDepthwiseConv2D": { "depthwise_quantizer": "binary", "bias_quantizer": "binary" }, "relu_act": { "relu": "quantized_relu(8)" } } qq4 = model_quantize(m2, d, 4, enable_bn_folding=True) assert qq4.layers[1].__class__.__name__ == "QConv2DBatchnorm" assert str(qq4.layers[1].quantizers[0]).startswith("binary") assert qq4.layers[3].__class__.__name__ == "QConv2D" assert str(qq4.layers[3].quantizers[0]).startswith("binary") assert qq4.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm" assert str(qq4.layers[5].quantizers[0]).startswith("binary") if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/autoqkeras_test.py ================================================ # ============================================================================== # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import tempfile import numpy as np import pytest from sklearn.datasets import load_iris from sklearn.preprocessing import MinMaxScaler import tensorflow.compat.v2 as tf tf.enable_v2_behavior() from tensorflow.keras.layers import Activation from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Dropout from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.utils import to_categorical from qkeras.autoqkeras import AutoQKerasScheduler def dense_model(): """Creates test dense model.""" x = x_in = Input((4,), name="input") x = Dense(20, name="dense_0")(x) x = BatchNormalization(name="bn0")(x) x = Activation("relu", name="relu_0")(x) x = Dense(40, name="dense_1")(x) x = BatchNormalization(name="bn1")(x) x = Activation("relu", name="relu_1")(x) x = Dense(20, name="dense_2")(x) x = BatchNormalization(name="bn2")(x) x = Activation("relu", name="relu_2")(x) x = Dense(3, name="dense")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=x_in, outputs=x) return model def test_autoqkeras(): """Tests AutoQKeras scheduler.""" np.random.seed(42) tf.random.set_seed(42) x_train, y_train = load_iris(return_X_y=True) scaler = MinMaxScaler(feature_range=(-0.5, 0.5)) scaler.fit(x_train) x_train = scaler.transform(x_train) nb_classes = np.max(y_train) + 1 y_train = to_categorical(y_train, nb_classes) quantization_config = { "kernel": { "stochastic_ternary": 2, "quantized_bits(8,0,1,alpha=1.0)": 8 }, "bias": { "quantized_bits(4,0,1)": 4 }, "activation": { "quantized_relu(4,1)": 4 }, "linear": { "binary": 1 } } goal = { "type": "energy", "params": { "delta_p": 8.0, "delta_n": 8.0, "rate": 2.0, "stress": 1.0, "process": "horowitz", "parameters_on_memory": ["sram", "sram"], "activations_on_memory": ["sram", "sram"], "rd_wr_on_io": [False, False], "min_sram_size": [0, 0], "reference_internal": "int8", "reference_accumulator": "int32" } } model = dense_model() model.summary() optimizer = Adam(lr=0.01) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"]) limit = { "dense_0": [["stochastic_ternary"], 8, 4], "dense": [["quantized_bits(8,0,1,alpha=1.0)"], 8, 4], "BatchNormalization": [], "Activation": [4] } run_config = { "output_dir": tempfile.mkdtemp(), "goal": goal, "quantization_config": quantization_config, "learning_rate_optimizer": False, "transfer_weights": False, "mode": "random", "seed": 42, "limit": limit, "tune_filters": "layer", "tune_filters_exceptions": "^dense$", "max_trials": 1, "blocks": [ "^.*0$", "^dense$" ], "schedule_block": "cost" } autoqk = AutoQKerasScheduler(model, metrics=["acc"], **run_config) autoqk.fit(x_train, y_train, validation_split=0.1, batch_size=150, epochs=4) qmodel = autoqk.get_best_model() optimizer = Adam(lr=0.01) qmodel.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"]) history = qmodel.fit(x_train, y_train, epochs=5, batch_size=150, validation_split=0.1) quantized_acc = history.history["acc"][-1] if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/bn_folding_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests layers from folded_layers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_equal from numpy.testing import assert_raises import tempfile import tensorflow as tf from tensorflow.keras import layers from tensorflow.keras.models import Model from tensorflow.keras.backend import clear_session from tensorflow.keras.utils import to_categorical from tensorflow.keras import metrics from qkeras import QConv2DBatchnorm from qkeras import QConv2D from qkeras import QDense from qkeras import QActivation from qkeras import QDepthwiseConv2D from qkeras import QDepthwiseConv2DBatchnorm from qkeras import utils as qkeras_utils from qkeras import bn_folding_utils def get_sgd_optimizer(learning_rate): if hasattr(tf.keras.optimizers, "legacy"): return tf.keras.optimizers.legacy.SGD(learning_rate) else: return tf.keras.optimizers.SGD(learning_rate) def get_qconv2d_model(input_shape, kernel_size, kernel_quantizer=None): num_class = 2 x = x_in = layers.Input(input_shape, name="input") x = QConv2D( filters=2, kernel_size=kernel_size, strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, bias_quantizer=None, name="conv2d")(x) x = layers.BatchNormalization( axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, virtual_batch_size=None, adjustment=None, name="bn")( x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones", name="dense")(x) x = layers.Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return model def get_qconv2d_batchnorm_model(input_shape, kernel_size, folding_mode, kernel_quantizer=None): num_class = 2 x = x_in = layers.Input(input_shape, name="input") x = QConv2DBatchnorm( filters=2, kernel_size=kernel_size, strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, name="foldconv2d")(x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones", name="dense")(x) x = layers.Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return model def get_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay): x_shape = (2, 2, 1) loss_fn = tf.keras.losses.MeanSquaredError() optimizer = get_sgd_optimizer(learning_rate=1e-3) # define a model with seperate conv2d and bn layers x = x_in = layers.Input(x_shape, name="input") x = QConv2D( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, bias_quantizer=None, name="conv2d")(x) x = layers.BatchNormalization( axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, virtual_batch_size=None, adjustment=None, name="bn")(x) unfold_model = Model(inputs=[x_in], outputs=[x]) unfold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") x = x_in = layers.Input(x_shape, name="input") x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="foldconv2d")(x) fold_model = Model(inputs=[x_in], outputs=[x]) fold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") return (unfold_model, fold_model) def get_debug_model(model): layer_output_list = [] for layer in model.layers: if layer.__class__.__name__ not in ["Flatten", "InputLayer"]: layer_output_list.append(layer.output) debug_model = Model(inputs=model.inputs, outputs=layer_output_list) return debug_model def generate_dataset(train_size=10, batch_size=5, input_shape=(3, 3, 1), num_class=2, output_shape=None): """create tf.data.Dataset with shape: (N,) + input_shape.""" x_train = np.random.randint( 4, size=(train_size, input_shape[0], input_shape[1], input_shape[2])) x_train = np.random.rand( train_size, input_shape[0], input_shape[1], input_shape[2]) if output_shape: y_train = np.random.random_sample((train_size,) + output_shape) else: y_train = np.random.randint(num_class, size=train_size) y_train = to_categorical(y_train, num_class) train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) train_ds = train_ds.batch(batch_size) return train_ds def run_training(model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False): # Iterate over epochs. for epoch in range(epochs): if do_print: print("- epoch {} -".format(epoch)) # Iterate over the batches of the dataset. for step, (x_batch_train, y_batch_train) in enumerate(train_ds): if do_print: print("\n - step {} -".format(step)) with tf.GradientTape() as tape: predictions = model(x_batch_train, training=True) if epoch == epochs - 1: if do_print: print("y_pred:", predictions) print("y:", y_batch_train) output_predictions = predictions # Compute loss loss = loss_fn(y_batch_train, predictions) grads = tape.gradient(loss, model.trainable_weights) if do_print: if epoch == epochs - 1: # print("old trainable:", model.trainable_weights) print("grads:", grads) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if do_print: if epoch == epochs - 1: # print("new trainable:", model.trainable_weights) print("loss:", loss) loss_metric(loss) if do_print: if epoch == epochs - 1: print("mean loss = %.4f" % (loss_metric.result())) return output_predictions def test_unfold_model(): """Test if unfold_model works properly. Convert a folded model to a normal model. The kernel/bias weight in the normal model should be the same as the folded kernel/bias in the folded model. Test if the function can convert both sequential and non-sequantial models properly. """ x_shape = (2, 2, 1) kernel_quantizer = "quantized_bits(4, 0, 1)" folding_mode = "batch_stats_folding" ema_freeze_delay = 10 kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]]) gamma = np.array([2., 1.]) beta = np.array([0., 1.]) moving_mean = np.array([1., 1.]) moving_variance = np.array([1., 2.]) iteration = np.array(-1) def _get_sequantial_folded_model(x_shape): x = x_in = layers.Input(x_shape, name="input") x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(2, 2), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="foldconv2d")(x) x = QDepthwiseConv2DBatchnorm( kernel_size=(2, 2), strides=(1, 1), use_bias=False, depthwise_quantizer=kernel_quantizer, folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="folddepthwiseconv2d")(x) model = Model(inputs=[x_in], outputs=[x]) model.layers[1].set_weights([ kernel, gamma, beta, iteration, moving_mean, moving_variance ]) return model def _get_nonseq_folded_model(x_shape): x = x_in = layers.Input(x_shape, name="input") x1 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), name="conv2d_1")(x) x2 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), name="conv2d_2")(x) x = layers.Maximum()([x1, x2]) x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="foldconv2d")(x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(2, use_bias=False, kernel_initializer="ones", name="dense")(x) model = Model(inputs=[x_in], outputs=[x]) model.layers[4].set_weights([ kernel, gamma, beta, iteration, moving_mean, moving_variance ]) return model seq_model = _get_sequantial_folded_model((4, 4, 1)) nonseq_model = _get_nonseq_folded_model(x_shape) for model in [nonseq_model, seq_model]: # preparing data for testing if model prediction matches output_shape = model.output_shape[1:] input_shape = model.input_shape[1:] train_ds = generate_dataset(train_size=10, batch_size=5, input_shape=input_shape, output_shape=output_shape) # convert model with folded layers to a model with coresspoinding QConv2D # or QDepthwiseConv2D layers cvt_model = bn_folding_utils.unfold_model(model) for layer_type in ["QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]: weight1 = None weight2 = None for layer in model.layers: if layer.__class__.__name__ == layer_type: weight1 = layer.get_folded_weights() break for layer in cvt_model.layers: if layer.__class__.__name__ == layer_type[:-9]: weight2 = layer.get_weights() break # test if the corresponding layers have identical weights if weight1 and weight2: assert_equal(weight1[0], weight2[0]) assert_equal(weight1[1], weight2[1]) # test if the predictions of the two models are identical pred1 = model.predict(train_ds) pred2 = cvt_model.predict(train_ds) assert_equal(pred1, pred2) def test_loading(): """Test to load model using different approahches.""" loss_fn = tf.keras.losses.MeanSquaredError() loss_metric = metrics.Mean() optimizer = get_sgd_optimizer(learning_rate=1e-3) x_shape = (2, 2, 1) custom_objects = {} qkeras_utils._add_supported_quantized_objects(custom_objects) train_ds = generate_dataset(train_size=1, batch_size=1, input_shape=x_shape, num_class=2) model_fold = get_qconv2d_batchnorm_model( input_shape=x_shape, kernel_size=(2, 2), folding_mode="ema_stats_folding") model_fold.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") run_training(model_fold, 10, loss_fn, loss_metric, optimizer, train_ds, do_print=False) # test load model from json to ensure saving/loading model architecture works model_fold.use_legacy_config = True # Ensures old Keras serialization json_string = model_fold.to_json() clear_session() model_from_json = qkeras_utils.quantized_model_from_json(json_string) model_from_json.use_legacy_config = True assert json_string == model_from_json.to_json() # test reload model from hdf5 files to ensure saving/loading works _, fname = tempfile.mkstemp(".h5") model_fold.save(fname) model_loaded = qkeras_utils.load_qmodel(fname) weight1 = model_fold.layers[1].get_folded_weights() weight2 = model_loaded.layers[1].get_folded_weights() assert_equal(np.array(weight1[0]), np.array(weight2[0])) assert_equal(np.array(weight1[1]), np.array(weight2[1])) # test convert a folded model to a normal model for zpm # the kernel/bias weight in the normal model should be the same as the folded # kernel/bias in the folded model normal_model = bn_folding_utils.unfold_model(model_fold) weight2 = normal_model.layers[1].get_weights() assert_equal(weight1[0], weight2[0]) assert_equal(weight1[1], weight2[1]) def test_same_training_and_prediction(): """test if fold/unfold layer has the same training and prediction output.""" epochs = 5 loss_fn = tf.keras.losses.MeanSquaredError() loss_metric = metrics.Mean() optimizer = get_sgd_optimizer(learning_rate=1e-3) x_shape = (2, 2, 1) kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]]) gamma = np.array([2., 1.]) beta = np.array([0., 1.]) moving_mean = np.array([1., 1.]) moving_variance = np.array([1., 2.]) iteration = np.array(-1) train_ds = generate_dataset(train_size=10, batch_size=10, input_shape=x_shape, num_class=2) (unfold_model, fold_model_batch) = get_models_with_one_layer( kernel_quantizer=None, folding_mode="batch_stats_folding", ema_freeze_delay=10) (_, fold_model_ema) = get_models_with_one_layer( kernel_quantizer=None, folding_mode="ema_stats_folding", ema_freeze_delay=10) unfold_model.layers[1].set_weights([kernel]) unfold_model.layers[2].set_weights( [gamma, beta, moving_mean, moving_variance]) fold_model_batch.layers[1].set_weights([ kernel, gamma, beta, iteration, moving_mean, moving_variance ]) fold_model_ema.layers[1].set_weights([ kernel, gamma, beta, iteration, moving_mean, moving_variance ]) # check if prediction is the same y1 = unfold_model.predict(train_ds) y2_batch = fold_model_batch.predict(train_ds) y2_ema = fold_model_ema.predict(train_ds) assert_allclose(y1, y2_batch, rtol=1e-4) assert_allclose(y1, y2_ema, rtol=1e-4) # check if training for a number of epochs, and before bn freeeze, models # reached the same point y1 = run_training(unfold_model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) y2_batch = run_training(fold_model_batch, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) y2_ema = run_training(fold_model_ema, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) assert_allclose(y1, y2_batch, rtol=1e-4) assert_allclose(y1, y2_ema, rtol=1e-4) # check if training for long enough (after bn freezes), unfold model and fold # models should be different, but the two folding modes should be the same epochs = 5 iteration = np.array(8) (unfold_model, fold_model_batch) = get_models_with_one_layer( kernel_quantizer=None, folding_mode="batch_stats_folding", ema_freeze_delay=10) (_, fold_model_ema) = get_models_with_one_layer( kernel_quantizer=None, folding_mode="ema_stats_folding", ema_freeze_delay=10) unfold_model.layers[1].set_weights([kernel]) unfold_model.layers[2].set_weights( [gamma, beta, moving_mean, moving_variance]) fold_model_batch.layers[1].set_weights([ kernel, gamma, beta, iteration, moving_mean, moving_variance ]) fold_model_ema.layers[1].set_weights([ kernel, gamma, beta, iteration, moving_mean, moving_variance ]) y1 = run_training( unfold_model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) y2_batch = run_training( fold_model_batch, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) y2_ema = run_training( fold_model_ema, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) assert_raises(AssertionError, assert_allclose, y1, y2_batch, rtol=1e-4) assert_allclose(y2_batch, y2_ema, rtol=1e-4) # test QDepthwiseConv2DBatchnorm layers def _get_models(x_shape, num_class, depthwise_quantizer, folding_mode, ema_freeze_delay): x = x_in = layers.Input(x_shape, name="input") x = QDepthwiseConv2DBatchnorm( kernel_size=(2, 2), strides=(2, 2), depth_multiplier=1, depthwise_initializer="ones", bias_initializer="zeros", use_bias=False, depthwise_quantizer=depthwise_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="fold_depthwiseconv2d")(x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones", name="dense")(x) x = layers.Activation("softmax", name="softmax")(x) fold_model = Model(inputs=[x_in], outputs=[x]) x = x_in = layers.Input(x_shape, name="input") x = QDepthwiseConv2D( kernel_size=(2, 2), strides=(2, 2), depth_multiplier=1, depthwise_initializer="ones", bias_initializer="zeros", use_bias=False, depthwise_quantizer=depthwise_quantizer, name="depthwiseconv2d")(x) x = layers.BatchNormalization( beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", name="bn")(x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones", name="dense")(x) x = layers.Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return (model, fold_model) input_shape = (4, 4, 1) num_class = 2 depthwise_quantizer = None folding_mode = "ema_stats_folding" ema_freeze_delay = 10 # weights depthwise_kernel = np.array([[[[1.]], [[0.]]], [[[0.]], [[1.]]]]) gamma = np.array([2]) beta = np.array([0]) moving_mean = np.array([4.]) moving_variance = np.array([2.]) iteration = np.array(2) folded_depthwise_kernel_quantized = np.array( [[[[1.4138602]], [[0.]]], [[[0.]], [[1.4138602]]]]) folded_bias_quantized = np.array([-5.655441]) dense_weight = np.array([[1., 0], [0, 0], [0, 0], [0, 0]]) # generate dataset train_ds = generate_dataset(train_size=3, batch_size=3, input_shape=input_shape, num_class=2) # define models, one with folded layer and one without (model, fold_model) = _get_models( input_shape, num_class=num_class, depthwise_quantizer=depthwise_quantizer, folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay) # set weights fold_model.layers[1].set_weights([ depthwise_kernel, gamma, beta, iteration, moving_mean, moving_variance]) fold_model.layers[3].set_weights([dense_weight]) model.layers[1].set_weights([depthwise_kernel]) model.layers[2].set_weights([gamma, beta, moving_mean, moving_variance]) model.layers[4].set_weights([dense_weight]) # perform training epochs = 5 loss_fn = tf.keras.losses.MeanSquaredError() loss_metric = metrics.Mean() optimizer = get_sgd_optimizer(learning_rate=1e-3) pred1 = run_training( model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) pred2 = run_training( fold_model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) # before bn freezes, the two models should reach the same point assert_allclose(pred1, pred2, rtol=1e-4) # after bn freezes, the two models will not reach the same iteration = np.array(12) epochs = 5 ema_freeze_delay = 10 (model, fold_model) = _get_models( input_shape, num_class=num_class, depthwise_quantizer=depthwise_quantizer, folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay) fold_model.layers[1].set_weights([ depthwise_kernel, gamma, beta, iteration, moving_mean, moving_variance]) fold_model.layers[3].set_weights([dense_weight]) model.layers[1].set_weights([depthwise_kernel]) model.layers[2].set_weights([gamma, beta, moving_mean, moving_variance]) model.layers[4].set_weights([dense_weight]) pred1 = run_training( model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) pred2 = run_training( fold_model, epochs, loss_fn, loss_metric, optimizer, train_ds, do_print=False) assert_raises(AssertionError, assert_allclose, pred1, pred2, rtol=1e-4) def test_populate_bias_quantizer_from_accumulator(): """Test populate_bias_quantizer_from_accumulator function. Define a qkeras model with a QConv2DBatchnorm layer. Set bias quantizer in the layer as None. Call populate_bias_quantizer_from_accumulator function to automatically generate bias quantizer type from the MAC accumulator type. Set the bias quantizer accordingly in the model. Call populate_bias_quantizer_from_accumulator again in this model. This time since bias quantizer is already set, populate_bias_quantizer_from_accumulator function should not change the bias quantizer. """ x_shape = (2, 2, 1) # get a qkeras model with QConv2DBatchnorm layer. Set bias quantizer in the # layer as None. x = x_in = layers.Input(x_shape, name="input") x1 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_1")(x) x2 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_2")(x) x = layers.Maximum()([x1, x2]) x = QActivation("quantized_relu(4, 1)")(x) x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", bias_quantizer=None, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode="batch_stats_folding", ema_freeze_delay=10, name="foldconv2d")(x) x1 = x x2 = layers.Flatten(name="flatten")(x) x2 = QDense(2, use_bias=False, kernel_initializer="ones", kernel_quantizer="quantized_bits(6, 2, 1)", name="dense")(x2) model = Model(inputs=[x_in], outputs=[x1, x2]) assert_equal(model.layers[5].get_quantizers()[1], None) # Call populate_bias_quantizer_from_accumulator function # to automatically generate bias quantizer from the MAC accumulator type. _ = bn_folding_utils.populate_bias_quantizer_from_accumulator( model, ["quantized_bits(8, 0, 1)"]) q = model.layers[5].get_quantizers()[1] assert_equal(q.__str__(), "quantized_bits(10,3,1)") # Call populate_bias_quantizer_from_accumulator function again # bias quantizer should not change _ = bn_folding_utils.populate_bias_quantizer_from_accumulator( model, ["quantized_bits(8, 0, 1)"]) q = model.layers[5].get_quantizers()[1] assert_equal(q.__str__(), "quantized_bits(10,3,1)") ================================================ FILE: tests/callbacks_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for callbacks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import pytest from numpy.testing import assert_equal from tensorflow.keras.layers import * from tensorflow.keras.models import * import tensorflow.compat.v2 as tf from qkeras import * from qkeras.utils import get_model_sparsity from qkeras.utils import model_quantize from qkeras.callbacks import QNoiseScheduler def qconv_model(): x = x_in = tf.keras.layers.Input((4, 4, 1), name="input") x = QConv2D( 1, 2, 1, kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name="qconv2d_1")( x) x = QActivation("quantized_relu(4)", name="QA_1")(x) model = keras.Model(inputs=[x_in], outputs=[x]) return model def test_QNoiseScheduler(): model = qconv_model() model.compile(optimizer="sgd", loss=tf.keras.losses.MeanSquaredError()) num_data = 5 x_train = np.random.rand(num_data, 4, 4, 1) y_train = np.random.rand(num_data, 1) ######################### # Test "step" freq_type # ######################### # The number of batch passes the finish of 4. gradual_qnoise_callback_0 = QNoiseScheduler( start=2, finish=4, freq_type="step", exponent=3.0) model.fit( x_train, y_train, batch_size=1, epochs=1, verbose=0, callbacks=[ gradual_qnoise_callback_0, ], ) # QConv2D has a kernel_quantizer and a bias_quantizer, and QActivation has a # quantizer. num_quantizers_with_qnoise_factor = 0 for quantizer in gradual_qnoise_callback_0.quantizers: if hasattr(quantizer, "qnoise_factor"): num_quantizers_with_qnoise_factor += 1 assert_equal(num_quantizers_with_qnoise_factor, 3) # Test "step" qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_0.quantizers ] assert_equal(qnoise_factor, np.ones_like(qnoise_factor)) # The number of batch does not pass the finish of 10. Exponent 3.0 gradual_qnoise_callback_1 = QNoiseScheduler( start=2, finish=10, freq_type="step", exponent=3.0) model.fit( x_train, y_train, batch_size=1, epochs=1, verbose=0, callbacks=[ gradual_qnoise_callback_1, ], ) qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_1.quantizers ] val = 1 - np.power((10.0 - 4.0) / (10.0 - 2.0), 3) assert_equal(qnoise_factor, np.full_like(qnoise_factor, val)) # The number of batch does not pass the finish of 10. Exponent 2.0 gradual_qnoise_callback_2 = QNoiseScheduler( start=2, finish=10, freq_type="step", exponent=2.0) model.fit( x_train, y_train, batch_size=1, epochs=1, verbose=0, callbacks=[ gradual_qnoise_callback_2, ], ) qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_2.quantizers ] val = 1 - np.power((10.0 - 4.0) / (10.0 - 2.0), 2) assert_equal(qnoise_factor, np.full_like(qnoise_factor, val)) # The number of batch does not pass the start of 6. gradual_qnoise_callback_3 = QNoiseScheduler( start=6, finish=10, freq_type="step", exponent=3.0) model.fit( x_train, y_train, batch_size=1, epochs=1, verbose=0, callbacks=[ gradual_qnoise_callback_3, ], ) qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_3.quantizers ] assert_equal(qnoise_factor, np.zeros_like(qnoise_factor)) # The number of training iterations passes the number of batches of an epoch. gradual_qnoise_callback_4 = QNoiseScheduler( start=6, finish=20, freq_type="step", exponent=3.0) epochs = 2 model.fit( x_train, y_train, batch_size=1, epochs=epochs, verbose=0, callbacks=[ gradual_qnoise_callback_4, ], ) qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_4.quantizers ] val = 1 - np.power((20.0 - (epochs*num_data - 1)) / (20.0 - 6.0), 3) assert_equal(qnoise_factor, np.full_like(qnoise_factor, val)) # The number of training iterations passes the number of batches of an epoch # with update_freq = 2. gradual_qnoise_callback_5 = QNoiseScheduler( start=0, finish=20, freq_type="step", update_freq=2, exponent=3.0) epochs = 2 model.fit( x_train, y_train, batch_size=1, epochs=epochs, verbose=0, callbacks=[ gradual_qnoise_callback_5, ], ) qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_5.quantizers ] # It updates when the number of training iterations modulo update_freq is 0. val = 1 - np.power( (20.0 - epochs * ((epochs * num_data - 1) // epochs)) / (20.0 - 0.0), 3) assert_equal(qnoise_factor, np.full_like(qnoise_factor, val)) ########################## # Test "epoch" freq_type # ########################## # The number of epoch does not pass the finish of 5. gradual_qnoise_callback_6 = QNoiseScheduler( start=1, finish=5, freq_type="epoch", exponent=3.0) model.fit( x_train, y_train, batch_size=1, epochs=3, verbose=0, callbacks=[ gradual_qnoise_callback_6, ], ) qnoise_factor = [ np.array(d.qnoise_factor) for d in gradual_qnoise_callback_6.quantizers ] val = 1 - np.power((5.0 - 2.0) / (5.0 - 1.0), 3) assert_equal(qnoise_factor, np.full_like(qnoise_factor, val)) assert_equal(len(gradual_qnoise_callback_6.quantizers), 3) # Test "epoch" if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/codebook_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test activation from qlayers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose import pytest from qkeras import quantized_bits from qkeras.codebook import weight_compression @pytest.mark.parametrize( 'bits, axis, quantizer, weights, expected_result', [ ( 3, 3, quantized_bits(4, 0, 1, alpha='auto_po2'), np.array([ [[ 0.14170583, -0.34360626, 0.29548156], [ 0.6517242, 0.06870092, -0.21646781], [ 0.12486842, -0.05406165, -0.23690471]], [[-0.07540564, 0.2123149 , 0.2382695 ], [ 0.78434753, 0.36171672, -0.43612534], [ 0.3685556, 0.41328752, -0.48990643]], [[-0.04438099, 0.0590747 , -0.0644061 ], [ 0.15280165, 0.40714318, -0.04622072], [ 0.21560416, -0.22131851, -0.5365659 ]]], dtype=np.float32), np.array([ [[ 0.125 , -0.375 , 0.25 ], [ 0.75 , 0.125 , -0.25 ], [ 0.125 , 0.0 , -0.25 ]], [[ 0.0 , 0.25 , 0.25 ], [ 0.75 , 0.375 , -0.375 ], [ 0.375 , 0.375 , -0.5 ]], [[ 0.0 , 0.0 , 0.0 ], [ 0.125 , 0.375 , 0.0 ], [ 0.25 , -0.25 , -0.5 ]]], dtype=np.float32) ) ] ) def test_codebook_weights(bits, axis, quantizer, weights, expected_result): np.random.seed(22) weights = weights.reshape(weights.shape + (1,)) expected_result = expected_result.reshape(expected_result.shape + (1,)) index_table, codebook_table = weight_compression(weights, bits, axis, quantizer) new_weights = np.zeros(weights.shape) for i in range(weights.shape[axis]): new_weights[:, :, :, i] = codebook_table[i][index_table[:, :, :, i]] assert_allclose(new_weights, expected_result, rtol=1e-4) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/leakyrelu_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test activation from qlayers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose import pytest from tensorflow.keras import backend as K from qkeras import quantized_relu from qkeras import quantized_relu_po2 @pytest.mark.parametrize( 'bits, integer, use_sigmoid, negative_slope, test_values, expected_values', [ (6, 2, 0, 0.25, np.array( [[-3.0, -2.0, -1.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875, 0.054688, 6.0]], dtype=K.floatx()), np.array([[-0.75, -0.5, -0.25, 0.0, 2.5, 3.375, 1.5, 1.0, 0.0, 3.875]], dtype=K.floatx()), ), (6, 2, 1, 0.125, np.array([[ 0.458069, 0.573227, 0.194336, 1.539047, 0.045883, 4.009995, 3.962494, 3.937500, 0.363266, 0.875198, 0.710938, 4.000000, 7.000000, 3.937500, 3.937592, 0.199326, 0.458008, 0.625977, 0.544922, 1.046875, 0.586899, 3.367188, 3.804688, 0.312500, 0.062500, 0.562500, 0.375000, 3.367188, 1.046875, 2.796875, 0.054688, 1.562500, 2.562500 ]], dtype=K.floatx()), np.array([[ 0.5 , 0.5 , 0.25 , 1.5 , 0. , 3.875, 3.875, 3.875, 0.25 , 1. , 0.75 , 3.875, 3.875, 3.875, 3.875, 0.25 , 0.5 , 0.75 , 0.5 , 1. , 0.5 , 3.25 , 3.75 , 0.25 , 0. , 0.5 , 0.5 , 3.25 , 1. , 2.75 , 0. , 1.5 , 2.5 ]], dtype=K.floatx())), (6, 2, 1, 0.125, np.array([[ -0.458069, -0.573227, -0.194336, -1.539047, -0.045883, -4.009995, -3.962494, -3.937500, -0.363266, -0.875198, -0.710938, -4.000000, -7.000000, -3.937500, -3.937592, -0.199326, -0.458008, -0.625977, -0.544922, -1.046875, -0.586899, -3.367188, -3.804688, -0.312500, -0.062500, -0.562500, -0.375000, -3.367188, -1.046875, -2.796875, -0.054688, -1.562500, -2.562500 ]], dtype=K.floatx()), np.array([[ 0.0, 0.0, 0.0, -0.25, 0.0, -0.5, -0.5, -0.5, 0.0, 0.0, 0.0, -0.5, -0.5, -0.5, -0.5, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, -0.5, -0.5, 0.0, 0.0, 0.0, 0.0, -0.5, -0.25, -0.25, 0.0, -0.25, -0.25 ]], dtype=K.floatx())), ]) def test_quantized_relu(bits, integer, use_sigmoid, negative_slope, test_values, expected_values): """Test quantized_relu function.""" x = K.placeholder(ndim=2) f = K.function([x], [quantized_relu(bits, integer, use_sigmoid, negative_slope)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( 'bits, negative_slope, test_values, expected_values', [ ( 8, 2**-4, np.array([[ -1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01, -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01, -2.00000000e-01, -1.00000000e-01, -2.22044605e-16, 1.00000000e-01, 2.00000000e-01, 3.00000000e-01, 4.00000000e-01, 5.00000000e-01, 6.00000000e-01, 7.00000000e-01, 8.00000000e-01, 9.00000000e-01 ]], dtype=K.floatx()), np.array([[ -0.0625 , -0.0625 , -0.0625 , -0.03125 , -0.03125 , -0.03125 , -0.03125 , -0.015625 , -0.015625 , -0.0078125, 0. , 0.125 , 0.25 , 0.25 , 0.5 , 0.5 , 0.5 , 0.5 , 1. , 1. ]], dtype=K.floatx()) ), ( 3, 2**-4, np.array([[ -1.00000000e+00, -9.00000000e-01, -8.00000000e-01, -7.00000000e-01, -6.00000000e-01, -5.00000000e-01, -4.00000000e-01, -3.00000000e-01, -2.00000000e-01, -1.00000000e-01, -2.22044605e-16, 1.00000000e-01, 2.00000000e-01, 3.00000000e-01, 4.00000000e-01, 5.00000000e-01, 6.00000000e-01, 7.00000000e-01, 8.00000000e-01, 9.00000000e-01 ]], dtype=K.floatx()), np.array([[ -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, -0.0625, 0.125 , 0.25 , 0.25 , 0.5 , 0.5 , 0.5 , 0.5 , 1. , 1. ]], dtype=K.floatx()) ), ( 6, 2**-3, np.array([[ -3.0, -2.0, -1.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875, 0.054688, 6.0]], dtype=K.floatx()), np.array([[ -5.00000000e-01, -2.50000000e-01, -1.25000000e-01, 2.32830644e-10, 2.00000000e+00, 4.00000000e+00, 2.00000000e+00, 1.00000000e+00, 6.25000000e-02, 8.00000000e+00 ]], dtype=K.floatx()) ) ]) def test_quantized_relu_po2(bits, negative_slope, test_values, expected_values): x = K.placeholder(ndim=2) f = K.function([x], [quantized_relu_po2(bits, negative_slope=negative_slope)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/min_max_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests min/max values that are used for autorange.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import pytest from qkeras import * from tensorflow.keras import backend as K def test_binary(): q = binary(alpha=1.0) assert q.min() == -1.0 assert q.max() == 1.0 q = stochastic_binary(alpha=1.0) assert q.min() == -1.0 assert q.max() == 1.0 def test_ternary(): q = ternary(alpha=1.0) assert q.min() == -1.0 assert q.max() == 1.0 q = stochastic_ternary(alpha=1.0) assert q.min() == -1.0 assert q.max() == 1.0 def test_quantized_bits(): results = { (1,0): [-1.0, 1.0], (2,0): [-1.0, 1.0], (3,0): [-1.0, 1.0], (4,0): [-1.0, 1.0], (5,0): [-1.0, 1.0], (6,0): [-1.0, 1.0], (7,0): [-1.0, 1.0], (8,0): [-1.0, 1.0], (1,1): [-1.0, 1.0], (2,1): [-2.0, 2.0], (3,1): [-2.0, 2.0], (4,1): [-2.0, 2.0], (5,1): [-2.0, 2.0], (6,1): [-2.0, 2.0], (7,1): [-2.0, 2.0], (8,1): [-2.0, 2.0], (3,2): [-4.0, 4.0], (4,2): [-4.0, 4.0], (5,2): [-4.0, 4.0], (6,2): [-4.0, 4.0], (7,2): [-4.0, 4.0], (8,2): [-4.0, 4.0], } for i in range(3): for b in range(1,9): if b <= i: continue q = quantized_bits(b,i,1) expected = results[(b,i)] assert expected[0] == q.min() assert expected[1] == q.max() def test_po2(): po2 = { 3: [-2, 2], 4: [-8, 8], 5: [-128, 128], 6: [-32768, 32768] } po2_max_value = { (3,1): [-1.0, 1.0], (3,2): [-2, 2], (3,4): [-4, 4], (4,1): [-1.0, 1.0], (4,2): [-2, 2], (4,4): [-4, 4], (4,8): [-8, 8], (5,1): [-1.0, 1.0], (5,2): [-2, 2], (5,4): [-4, 4], (5,8): [-8, 8], (5,16): [-16, 16], (6,1): [-1.0, 1.0], (6,2): [-2, 2], (6,4): [-4, 4], (6,8): [-8, 8], (6,16): [-16, 16], (6,32): [-32, 32] } po2_quadratic = { 4: [-4, 4], 5: [-64, 64], 6: [-16384, 16384] } relu_po2_quadratic = { 4: [0.00390625, 64], 5: [1.52587890625e-05, 16384], 6: [2.3283064365386963e-10, 1073741824] } for b in range(3,7): q = quantized_po2(b) assert po2[b][0] == q.min() assert po2[b][1] == q.max() for i in range(0,b): q = quantized_po2(b,2**i) assert po2_max_value[(b,2**i)][0] == q.min() assert po2_max_value[(b,2**i)][1] == q.max() for b in range(4,7): q = quantized_po2(b,quadratic_approximation=True) assert po2_quadratic[b][0] == q.min() assert po2_quadratic[b][1] == q.max() q = quantized_relu_po2(b,quadratic_approximation=True) assert relu_po2_quadratic[b][0] == q.min() assert relu_po2_quadratic[b][1] == q.max() if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/print_qstats_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import pytest from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import DepthwiseConv2D from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from qkeras.estimate import print_qstats from qkeras.utils import model_quantize from qkeras import QConv2D from qkeras.quantizers import * def create_network(): xi = Input((28, 28, 1)) x = Conv2D(32, (3, 3))(xi) x = Activation("relu")(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = Activation("softmax")(x) return Model(inputs=xi, outputs=x) def create_mix_network(): xi = Input((28, 28, 1)) x = QConv2D(32, (3, 3), kernel_quantizer=binary())(xi) x = Activation("relu")(x) x = Conv2D(32, (3, 3))(x) x = Activation("softmax")(x) return Model(inputs=xi, outputs=x) def create_network_with_bn(): """Creates a network contains both QConv2D and QDepthwiseConv2D layers.""" xi = Input((28, 28, 1)) x = Conv2D(32, (3, 3))(xi) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), activation="relu")(x) x = BatchNormalization()(x) x = Activation("softmax")(x) return Model(inputs=xi, outputs=x) def test_conversion_print_qstats(): # this tests if references in tensorflow are working properly. m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" } } qq = model_quantize(m, d, 4) qq.summary() print_qstats(qq) # test if print_qstats works with unquantized layers print_qstats(m) # test if print_qstats works with mixture of quantized and unquantized layers m1 = create_mix_network() print_qstats(m1) m2 = create_network_with_bn() d2 = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" }, "QConv2DBatchnorm": { "kernel_quantizer": "ternary", "bias_quantizer": "ternary", }, "QDepthwiseConv2DBatchnorm": { "depthwise_quantizer": "ternary", "bias_quantizer": "ternary", }, } m2 = model_quantize(m2, d2, 4, enable_bn_folding=True) m2.summary() print_qstats(m2) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qactivation_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test activation from qlayers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose, assert_array_equal import pytest from tensorflow import keras from tensorflow.keras import backend as K import tempfile from qkeras import set_internal_sigmoid from qkeras import binary from qkeras import hard_sigmoid from qkeras import quantized_bits from qkeras import quantized_hswish from qkeras import quantized_po2 from qkeras import quantized_relu from qkeras import quantized_relu_po2 from qkeras import quantized_sigmoid from qkeras import quantized_tanh from qkeras import smooth_sigmoid from qkeras import stochastic_binary from qkeras import stochastic_ternary from qkeras import ternary from qkeras.quantizers import _default_sigmoid_type @pytest.mark.parametrize( 'bits, max_value, use_stochastic_rounding, quadratic_approximation, ' 'log2_rounding, test_values, expected_values', [ # bits=4 without max_value. Therefore the max exponent is 4 when # quadratic approximiation is enabled. The max and min values from this # quantization function are 16 and -16 respectively. ( 4, None, 0, 1, "floor", np.array( [[-10.0, -0.25, 0.25, 1.0, 1.99, 2.0, 5.0, 10.0, 16.0, 32.0]], dtype=K.floatx()), np.array( [[-4.0, -0.25, 0.25, 1.0, 1.0, 1.0, 4.0, 4.0, 16.0, 16.0]], dtype=K.floatx()), ), # bits=3. The minimum exponent is -4. Therefore, the smallest absolute # value is 0.0625 in this quantization. The max absolute value is 0.5, # which is specified by the second input argument. ( 3, 0.5, 0, 0, "floor", np.array([[-7, -0.12, -0.03, 0.01, 5]], dtype=K.floatx()), np.array([[-0.5, -0.0625, -0.0625, 0.0625, 0.5]], dtype=K.floatx()), ), (8, None, 0, 0, "floor", np.array( [[-3, -2, -1.5, -0.5, -0.033, 0.5, 0.667, 1, 1.5, 4, 10]], dtype=K.floatx()), np.array( [[-2, -2, -1, -0.5, -0.03125, 0.5, 0.5, 1, 1, 4, 8]], dtype=K.floatx()), ), (4, None, 0, 0, "floor", np.array( [[-16, -7, -0.12, -0.03, 0, 0.01, 5, 10]], dtype=K.floatx()), np.array( [[-8, -4, -0.0625, -0.0625, 0.0625, 0.0625, 4, 8]], dtype=K.floatx()), ), (3, 0.5, 0, 0, "floor", np.array([[-7, -0.12, -0.03, 0.01, 5]], dtype=K.floatx()), np.array([[-0.5, -0.0625, -0.0625, 0.0625, 0.5]], dtype=K.floatx()), ), (4, 4, 0, 0, "floor", np.array([[-7, -0.12, -0.03, 0, 0.01, 5]], dtype=K.floatx()), np.array([[-4, -0.0625, -0.0625, 0.0625, 0.0625, 4]], dtype=K.floatx()), ), (4, None, 0, 1, "floor", np.array( [[0.01, 0.03, 0.06, 0.5, 1, 2, 5, 10, 16, 32]], dtype=K.floatx()), np.array( [[0.00390625, 0.015625, 0.015625, 0.25, 1, 1, 4, 4, 16, 16]], dtype=K.floatx()), ), (4, None, 0, 1, "floor", np.array( [[-32, -16, -10, -5, -2, -1, -0.5, -0.03, -0.01]], dtype=K.floatx()), np.array( [[-16, -16, -4, -4, -1, -1, -0.25, -0.015625, -0.00390625]], dtype=K.floatx()), ), (4, None, 0, 1, "floor", np.array( [[-32, -16, -10, -5, -2, -1, -0.5, -0.03, -0.01]], dtype=K.floatx()), np.array( [[-16, -16, -4, -4, -1, -1, -0.25, -0.015625, -0.00390625]], dtype=K.floatx()), ), ]) def disable_test_quantized_po2( bits, max_value, use_stochastic_rounding, quadratic_approximation, log2_rounding, test_values, expected_values): """Test quantized_po2 function.""" x = K.placeholder(ndim=2) f = K.function([x], [quantized_po2( bits, max_value, use_stochastic_rounding, quadratic_approximation, log2_rounding)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05, atol=1e-05) @pytest.mark.parametrize( 'bits, max_value, use_stochastic_rounding, quadratic_approximation, ' + 'log2_rounding, test_values, expected_values', [ # bits=3 without max_value. Therefore the max exponent is 4 when # quadratic approximiation is enabled. The max value from this # quantization function is 16. For the negative value, relu enforce it # to be the minimum value of this quantization function, which is 2**-4. ( 3, None, 0, 1, "floor", np.array( [[-10.0, -0.25, 0.25, 1.0, 1.99, 2.01, 5.0, 10.0, 16.0, 32.0]], dtype=K.floatx()), np.array( [[0.0625, 0.0625, 0.25, 1.0, 1.0, 1.0, 4.0, 4.0, 16.0, 16.0]], dtype=K.floatx()), ), # bits=3. The minimum exponent is -4. Therefore, the smallest absolute # value is 0.0625 in this quantization. The max absolute value is 4, # which is specified by the second input argument. (3, 4, 0, 0, "floor", np.array([[-7.0, -0.12, -0.03, 0, 0.01, 5.0]], dtype=K.floatx()), np.array([[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0]], dtype=K.floatx()) ), (8, None, 0, 0, "floor", np.array([[-0.033, 0.5, 0.667, 1, 1.5, 4, 10]], dtype=K.floatx()), np.array([[0, 0.5, 0.5, 1, 1, 4, 8]], dtype=K.floatx()), ), (3, None, 0, 0, "floor", np.array( [[-16.0, -7.0, -0.12, -0.03, 0, 0.01, 5.0, 10.0]], dtype=K.floatx()), np.array( [[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0, 8.0]], dtype=K.floatx()), ), (2, 0.5, 0, 0, "floor", np.array([[-7.0, -0.12, -0.03, 0.01, 5.0]], dtype=K.floatx()), np.array([[0.0625, 0.0625, 0.0625, 0.0625, 0.5]], dtype=K.floatx()), ), (3, 4, 0, 0, "floor", np.array( [[-7.0, -0.12, -0.03, 0, 0.01, 5.0]], dtype=K.floatx()), np.array( [[0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 4.0]], dtype=K.floatx()), ), (3, None, 0, 1, "floor", np.array( [[0.01, 0.03, 0.06, 0.5, 1, 2, 5, 10, 16, 32]], dtype=K.floatx()), np.array( [[0.00390625, 0.015625, 0.015625, 0.25, 1, 1, 4, 4, 16, 16]], dtype=K.floatx()), ), ]) def disable_test_quantized_relu_po2(bits, max_value, use_stochastic_rounding, quadratic_approximation, log2_rounding, test_values, expected_values): """Test quantized_po2 function.""" x = K.placeholder(ndim=2) f = K.function([x], [quantized_relu_po2(bits, max_value, 0, use_stochastic_rounding, quadratic_approximation, log2_rounding)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05, atol=1e-05) def test_smooth_sigmoid(): """Test smooth_sigmoid function.""" test_values = np.array( [[-3.0, -2.0, -1.0, -0.5, 0.005, 0.0, 0.005, 0.5, 1, 4, 10]], dtype=K.floatx()) def ref_smooth_sigmoid(y): x = 0.1875 * y + 0.5 z = 0.0 if x <= 0.0 else (1.0 if x >= 1.0 else x) return z sigmoid = np.vectorize(ref_smooth_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [smooth_sigmoid(x)]) result = f([test_values])[0] expected = sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05) def test_hard_sigmoid(): """Test hard_sigmoid function.""" test_values = np.array( [[-3.0, -2.0, -1.0, -0.5, 0.005, 0.0, 0.005, 0.5, 1, 4, 10]], dtype=K.floatx()) def ref_hard_sigmoid(y): x = 0.5 * y + 0.5 z = 0.0 if x <= 0.0 else (1.0 if x >= 1.0 else x) return z sigmoid = np.vectorize(ref_hard_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [hard_sigmoid(x)]) result = f([test_values])[0] expected = sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05) @pytest.mark.parametrize( 'bits, sigmoid_type, use_real_sigmoid, test_values, expected_values', [ ( 6, "hard", False, np.array( [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]], dtype=K.floatx()), np.array([[0.015625, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875]], dtype=K.floatx()), ), ( 6, "smooth", False, np.array( [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]], dtype=K.floatx()), np.array([[0.3125, 0.359375, 0.40625, 0.453125, 0.5, 0.546875, 0.59375, 0.640625]], dtype=K.floatx()), ), ( 6, "real", True, np.array( [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]], dtype=K.floatx()), np.array([[0.265625, 0.328125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.671875]], dtype=K.floatx()), ), ]) def test_quantized_sigmoid(bits, sigmoid_type, use_real_sigmoid, test_values, expected_values): """Test quantized_sigmoid function with three different sigmoid variants.""" set_internal_sigmoid(sigmoid_type) x = K.placeholder(ndim=2) f = K.function([x], [quantized_sigmoid(bits, symmetric=True, use_real_sigmoid=use_real_sigmoid)(x)]) set_internal_sigmoid(_default_sigmoid_type) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( 'bits, sigmoid_type, use_real_sigmoid, test_values, expected_values', [ ( 4, "hard", False, np.array( [-15, 15], dtype=K.floatx()), np.array([0.0625, 0.9375], dtype=K.floatx()), ), ( 4, "smooth", False, np.array( [-15, 15], dtype=K.floatx()), np.array([0.0625, 0.9375], dtype=K.floatx()), ), ( 4, "real", True, np.array( [-15, 15], dtype=K.floatx()), np.array([0.0625, 0.9375], dtype=K.floatx()), ), ]) def test_quantized_sigmoid_limits( bits, sigmoid_type, use_real_sigmoid, test_values, expected_values): """Test the min and max values of quantized_sigmoid function with three different sigmoid variants.""" set_internal_sigmoid(sigmoid_type) x = K.placeholder(ndim=2) f = K.function([x], [quantized_sigmoid(bits, symmetric=True, use_real_sigmoid=use_real_sigmoid)(x)]) set_internal_sigmoid(_default_sigmoid_type) result = f([test_values])[0] min_max = np.array( [quantized_sigmoid(bits, symmetric=True, use_real_sigmoid=use_real_sigmoid).min(), quantized_sigmoid(bits, symmetric=True, use_real_sigmoid=use_real_sigmoid).max()]) assert_allclose(result, expected_values, rtol=1e-05) assert_allclose(result, min_max, rtol=1e-05) @pytest.mark.parametrize( 'bits, use_real_tanh, test_values, expected_values', [ ( 4, False, np.array( [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]], dtype=K.floatx()), np.array([[-0.875, -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]], dtype=K.floatx()), ), ( 4, True, np.array( [[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75]], dtype=K.floatx()), np.array([[-0.75, -0.625, -0.5, -0.25, 0., 0.25, 0.5, 0.625]], dtype=K.floatx()), ) ]) def test_quantized_tanh(bits, use_real_tanh, test_values, expected_values): """Test quantized_tanh function with three different sigmoid variants.""" # store previous sigmoid type set_internal_sigmoid('hard') x = K.placeholder(ndim=2) f = K.function([x], [quantized_tanh( bits, symmetric=True, use_real_tanh=use_real_tanh)(x)]) set_internal_sigmoid(_default_sigmoid_type) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( 'bits, sigmoid_type, use_real_tanh, test_values, expected_values', [ ( 4, "hard", False, np.array( [-15, 15], dtype=K.floatx()), np.array([-0.875, 0.875], dtype=K.floatx()), ), ( 4, "smooth", False, np.array( [-15, 15], dtype=K.floatx()), np.array([-0.875, 0.875], dtype=K.floatx()), ), ( 4, "real", True, np.array( [-15, 15], dtype=K.floatx()), np.array([-0.875, 0.875], dtype=K.floatx()), ), ]) def test_quantized_tanh_limits(bits, sigmoid_type, use_real_tanh, test_values, expected_values): """Test the min and max values of quantized_tanh function with three different sigmoid variants.""" set_internal_sigmoid(sigmoid_type) x = K.placeholder(ndim=2) f = K.function([x], [quantized_tanh( bits, symmetric=True, use_real_tanh=use_real_tanh)(x)]) set_internal_sigmoid(_default_sigmoid_type) result = f([test_values])[0] min_max = np.array( [quantized_tanh(bits, symmetric=True, use_real_tanh=use_real_tanh).min(), quantized_tanh(bits, symmetric=True, use_real_tanh=use_real_tanh).max()]) assert_allclose(result, expected_values, rtol=1e-05) assert_allclose(result, min_max, rtol=1e-05) @pytest.mark.parametrize( 'bits, integer, use_sigmoid, test_values, expected_values', [ ( 6, 2, 0, np.array( [[-3.0, 0.0, 2.5625, 3.3671875, 1.5625, 1.046875, 0.054688]], dtype=K.floatx()), np.array([[0.0, 0.0, 2.5625, 3.375, 1.5625, 1.0625, 0.0625]], dtype=K.floatx()), ), (6, 2, 1, np.array([[ 0.458069, 0.573227, 0.194336, 1.539047, 0.045883, 4.009995, 3.962494, 3.937500, 0.363266, 0.875198, 0.710938, 4.000000, 7.000000, 3.937500, 3.937592, 0.199326, 0.458008, 0.625977, 0.544922, 1.046875, 0.586899, 3.367188, 3.804688, 0.312500, 0.062500, 0.562500, 0.375000, 3.367188, 1.046875, 2.796875, 0.054688, 1.562500, 2.562500 ]], dtype=K.floatx()), np.array([[ 0.500000, 0.625000, 0.250000, 1.500000, 0.000000, 3.937500, 3.937500, 3.937500, 0.375000, 0.875000, 0.750000, 3.937500, 3.937500, 3.937500, 3.937500, 0.250000, 0.500000, 0.625000, 0.500000, 1.000000, 0.625000, 3.375000, 3.750000, 0.250000, 0.000000, 0.500000, 0.375000, 3.375000, 1.000000, 2.750000, 0.000000, 1.500000, 2.500000 ]], dtype=K.floatx())), ]) def test_quantized_relu(bits, integer, use_sigmoid, test_values, expected_values): """Test quantized_relu function.""" x = K.placeholder(ndim=2) f = K.function([x], [quantized_relu(bits, integer, use_sigmoid)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( ( "bits, integer, symmetric, keep_negative, test_values, expected_values," " rtol" ), [ ( 8, 100, 1, True, np.array([[1.25e+29, 3, -1.1e+30, 4.0e+32]], dtype=K.floatx()), np.array([[1.23794004e+29, 0.0, -1.09929075e+30, 1.26269884e+30]], dtype=K.floatx()), 5.0e+27, # Effective quantization step size ), ( 6, 2, 0, True, np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]], dtype=K.floatx()), np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 3.875, 3.875]], dtype=K.floatx()), 1e-05, ), ( 6, 2, 0, False, np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]], dtype=K.floatx()), np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1, 3.9375, 3.9375]], dtype=K.floatx()), 1e-05, ), ( 6, 2, 1, True, np.array([[-10, -4, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]], dtype=K.floatx()), np.array([[-3.875, -3.875, -1.0, -0.5, 0.0, 0.5, 1, 3.875, 3.875]], dtype=K.floatx()), 1e-05, ) ]) def test_quantized_bits(bits, integer, symmetric, keep_negative, test_values, expected_values, rtol): x = K.placeholder(ndim=2) f = K.function([x], [quantized_bits(bits, integer, symmetric, keep_negative)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=rtol) @pytest.mark.parametrize( "bits, integer, expected_output, expected_scale", [(4, 2, [[0.25, 3.0, 0.09375, 0.25], [0.4375, 0.0, 0.21875, 1.5]], [[0.125, 1., 0.0625, 0.5]]), (4, 1, [[0.25, 3., 0.09375, 0.25], [0.4375, 0., 0.21875, 1.5]], [[0.25, 2., 0.125, 1.]]), (5, 2, [[0.21875, 2.75, 0.09375, 0.375], [0.46875, 0.25, 0.234375, 1.375]], [[0.125, 1, 0.0625, 0.5]]), ]) def test_quantized_bits_with_auto_po2_scale( bits, integer, expected_output, expected_scale): # Test if quantizer with the fixed scale works properly. x = np.array([[0.23, 2.76, 0.1, 0.33], [0.53, 0.16, 0.3, 1.43]]) q = quantized_bits( bits=bits, integer=integer, alpha="auto_po2") q_out = q(x).numpy() scale = q.scale.numpy() np.testing.assert_array_equal(q_out, expected_output) np.testing.assert_array_equal(scale, expected_scale) def test_quantized_bits_with_post_training_scale(): # Test if quantizer with the fixed scale works properly. np.random.seed(42) array = np.random.uniform(low=0, high=10, size=(7, 64, 64, 3)) auto_po2_quantizer = quantized_bits( bits=8, integer=3, alpha="auto_po2") qw = auto_po2_quantizer(array) auto_po2_scale = auto_po2_quantizer.scale.numpy() alpha_ndarray_quantizer = quantized_bits( bits=8, integer=3, alpha="auto_po2", post_training_scale=auto_po2_scale) # Check if the scale is the same as auto_po2 quantizer. np.testing.assert_array_equal(auto_po2_scale, alpha_ndarray_quantizer.scale) qw_ndarray = alpha_ndarray_quantizer(array) # Check if the quantized values are the same as auto_po2 quantizer. np.testing.assert_array_equal(qw.numpy(), qw_ndarray.numpy()) @pytest.mark.parametrize('alpha, threshold, test_values, expected_values', [ (1.0, 0.33, np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-1.0, -1.0, -1.0, 0, 0.0, 0.0, 1, 1, 1]], dtype=K.floatx())), (10.0, 5.0, np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-10.0, -10.0, 0.0, 0, 0.0, 0.0, 0, 0, 10]], dtype=K.floatx())), ]) def test_ternary(alpha, threshold, test_values, expected_values): x = K.placeholder(ndim=2) f = K.function([x], [ternary(alpha, threshold)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize('use_01, alpha, test_values, expected_values', [ (False, 1.0, np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-1.0, -1.0, -1.0, -1.0, 1, 1, 1, 1, 1]], dtype=K.floatx())), (False, 5.0, np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-5.0, -5.0, -5.0, -5, 5.0, 5.0, 5, 5, 5]], dtype=K.floatx())), (True, 5.0, np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[0, 0, 0, 0, 5, 5, 5, 5, 5]], dtype=K.floatx())), ]) def test_binary(use_01, alpha, test_values, expected_values): x = K.placeholder(ndim=2) f = K.function([x], [binary(use_01, alpha)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize('test_values, expected_values', [ (np.array([[42.0] * 100000], dtype=K.floatx()), 42.0), (np.array([[100.0] * 100000], dtype=K.floatx()), 100.0), (np.array([[48.0] * 100000], dtype=K.floatx()), 48.0), (np.array([[-141.0] * 100000], dtype=K.floatx()), -141.0), (np.array([[-32.0] * 100000], dtype=K.floatx()), -32.0), (np.array([[32.0] * 100000], dtype=K.floatx()), 32.0), (np.array([[10031.0] * 100000], dtype=K.floatx()), 10031.0), (np.array([[0.0] * 100000], dtype=K.floatx()), 0.0), ]) def test_stochastic_round_quantized_po2(test_values, expected_values): K.set_learning_phase(1) np.random.seed(666) x = K.placeholder(ndim=2) q = quantized_po2(use_stochastic_rounding=True) f = K.function([x], [q(x)]) res = f([test_values])[0] res = np.average(res) assert_allclose(res, expected_values, rtol=1e-01, atol=1e-6) @pytest.mark.parametrize('test_values, expected_values', [ (np.array([[42.0] * 100000], dtype=K.floatx()), 42.0), (np.array([[-42.0] * 100000], dtype=K.floatx()), 0.0), (np.array([[0.0] * 100000], dtype=K.floatx()), 0.0), (np.array([[100.0] * 100000], dtype=K.floatx()), 100.0), (np.array([[48.0] * 100000], dtype=K.floatx()), 48.0), ]) def test_stochastic_round_quantized_relu_po2(test_values, expected_values): K.set_learning_phase(1) np.random.seed(666) x = K.placeholder(ndim=2) q = quantized_relu_po2(use_stochastic_rounding=True) f = K.function([x], [q(x)]) res = f([test_values])[0] res = np.average(res) assert_allclose(res, expected_values, rtol=1e-01, atol=1e-6) def test_stochastic_binary(): np.random.seed(42) K.set_learning_phase(1) x = np.random.uniform(-0.01, 0.01, size=10) x = np.sort(x) # Adding a dimension to have a common channel axis for quantization. This is # to cope with a bug fix in "_get_scale" without changing the test cases. x = np.expand_dims(x, axis=1) s = stochastic_binary(alpha="auto_po2") ty = np.zeros_like(s) ts = 0.0 n = 1000 for _ in range(n): y = K.eval(s(K.constant(x))) scale = K.eval(s.scale)[0] ts = ts + scale ty = ty + (y / scale) # Perform squeezing to remove the common channel axis. result = (ty/n).astype(np.float32) result = np.squeeze(result) scale = np.array([ts/n]) scale = np.squeeze(scale) expected = np.array( [-1., -1., -1., -0.852, 0.782, 0.768, 0.97, 0.978, 1.0, 1.0] ).astype(np.float32) expected_scale = np.array([0.003906]) assert_allclose(result, expected, atol=0.1) assert_allclose(scale, expected_scale, rtol=0.1) @pytest.mark.parametrize('alpha, test_values, expected_values', [ (1.0, np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-1.0, -1.0, -1.0, -1.0, 1, 1, 1, 1, 1]], dtype=K.floatx())), (5.0, np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-5.0, -5.0, -5.0, -5, 5.0, 5.0, 5, 5, 5]], dtype=K.floatx())) ]) def test_stochastic_binary_inference_mode(alpha, test_values, expected_values): K.set_learning_phase(0) x = K.placeholder(ndim=2) q = stochastic_binary(alpha) f = K.function([x], [q(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( 'bound, alpha, temperature, expected_values, expected_scale', [ ( 0.01, "auto", 8, np.array([-0.973, -0.903, -0.759, -0.574, -0.242, 0.161, 0.508, 0.723, 0.874, 0.975]).astype(np.float32), np.array([0.008427, 0.007001, 0.0057, 0.004457, 0.003537, 0.003416, 0.004507, 0.005536, 0.006853, 0.008282] ).astype(np.float32) ), ( 0.01, "auto_po2", 8, np.array([-0.979, -0.877, -0.639, -0.586, -0.23, 0.154, 0.327, 0.603, 0.83, 0.986]).astype(np.float32), np.array([0.007812, 0.007812, 0.007812, 0.003906, 0.003906, 0.003906, 0.007812, 0.007812, 0.007812, 0.007812] ).astype(np.float32) ) ]) def test_stochastic_ternary(bound, alpha, temperature, expected_values, expected_scale): np.random.seed(42) K.set_learning_phase(1) n = 1000 x = np.random.uniform(-bound, bound, size=(n, 10)) x = np.sort(x, axis=1) s = stochastic_ternary(alpha=alpha, temperature=temperature) y = K.eval(s(K.constant(x))) scale = K.eval(s.scale).astype(np.float32)[0] ty = np.zeros_like(s) for i in range(n): ty = ty + (y[i] / scale) result = (ty/n).astype(np.float32) assert_allclose(result, expected_values, atol=0.1) assert_allclose(scale, expected_scale, rtol=0.1) @pytest.mark.parametrize('alpha, threshold, test_values, expected_values', [ (1.0, 0.33, np.array([[-3.0, -2.0, -1.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-1.0, -1.0, -1.0, 0, 0.0, 0.0, 1, 1, 1]], dtype=K.floatx())), (10.0, 5.0, np.array([[-11.0, -7.0, -4.0, -0.2, 0.0, 0.3, 1, 4, 10]], dtype=K.floatx()), np.array([[-10.0, -10.0, 0.0, 0, 0.0, 0.0, 0, 0, 10]], dtype=K.floatx())), ]) def test_stochastic_ternary_inference_mode(alpha, threshold, test_values, expected_values): K.set_learning_phase(0) x = K.placeholder(ndim=2) q = stochastic_ternary(alpha, threshold) f = K.function([x], [q(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( # y = x * relu6(x+3)/6, the total world length is 6 bits with 2 integer # bits. The quantization is in asymmetric mode. ('bits, integer, symmetric, relu_shift, relu_upper_bound,' 'test_values, expected_values'), [ (6, 2, 0, 3, 6, np.array([[-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1, 4, 10]], dtype=K.floatx()), np.array([[0., -0.375, -0.375, -0.25, 0., 0.25, 0.625, 3.875, 3.875]], dtype=K.floatx()), ), (6, 4, 1, 3, 6, np.array([[-10.0, -2.0, -2.3, -0.25, 0.0, 0.5, 1, 4, 10]], dtype=K.floatx()), np.array([[0., -0.5, -0.5, 0., 0., 0.5, 0.5, 4., 10.]], dtype=K.floatx()), ), (2, 0, 0, 3, 6, np.array([[-10.0, -2.0, -2.3, -0.25, 0.0, 0.5, 1, 4, 10]], dtype=K.floatx()), np.array([[0., -0.5, -0.5, 0., 0., 0.5, 0.5, 0.5, 0.5]], dtype=K.floatx()), ),]) def test_quantized_hswish(bits, integer, symmetric, relu_shift, relu_upper_bound, test_values, expected_values): x = K.placeholder(ndim=2) f = K.function( [x], [quantized_hswish(bits, integer, symmetric, relu_shift=relu_shift, relu_upper_bound=relu_upper_bound)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05) def test_quantized_relu_fast_inference(): q1 = quantized_relu(10, 2, enable_fast_inference=False) q2 = quantized_relu(10, 2, enable_fast_inference=True) x = np.array([-2.1, 0.73, 2.36, 4.98]) np.testing.assert_array_equal(q1(x).numpy(), q2(x).numpy()) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/qadaptiveactivation_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test the QAdaptiveActivation layer from qlayers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import itertools import numpy as np import pytest import tensorflow.compat.v2 as tf from qkeras.qlayers import QAdaptiveActivation from qkeras.quantizers import _get_integer_bits def run_qadaptiveactivation_test(input_val, kwargs): """Helper function to test QAdaptiveActivation inputs and outputs.""" err = 'Failed test with {} on input {}'.format(kwargs, input_val) # Only test inputs of shape (batch_size, width, height, channels) assert len(input_val.shape) == 4, err # Only test short term layer usage with ema_decay == 0 assert kwargs['ema_decay'] == 0, err assert kwargs['ema_freeze_delay'] is None, err # Prepare layer in a static TF graph model = tf.keras.Sequential([QAdaptiveActivation(**kwargs)]) model.compile() # Test input on untrained EMAs qout = model(input_val, training=False).numpy() assert np.isclose(model.layers[0].quantizer(input_val), qout).all(), err assert np.isclose(model.layers[0].ema_min.numpy().flatten(), 0).all(), err assert np.isclose(model.layers[0].ema_max.numpy().flatten(), 0).all(), err # Run an unquantized input and train the EMA unquantized_out = model(input_val, training=True).numpy() assert kwargs['current_step'].numpy() == 0, err if kwargs['activation'] == 'quantized_relu': assert np.isclose(unquantized_out, np.maximum(input_val, 0)).all(), err elif kwargs['activation'] == 'quantized_bits': assert np.isclose(unquantized_out, input_val).all(), err else: raise ValueError('Invalid quantizer type ', kwargs['activation']) # Check EMAs if kwargs['per_channel']: assert np.isclose(model.layers[0].ema_min.numpy(), np.min(input_val, axis=(0, 1, 2))).all(), err assert np.isclose(model.layers[0].ema_max.numpy(), np.max(input_val, axis=(0, 1, 2))).all(), err else: assert np.isclose(model.layers[0].ema_min.numpy(), np.min(input_val, axis=(0, 1, 2, 3))).all(), err assert np.isclose(model.layers[0].ema_max.numpy(), np.max(input_val, axis=(0, 1, 2, 3))).all(), err # Check quantizer quant = model.layers[0].quantizer assert quant.__class__.__name__ == kwargs['activation'], err assert quant.bits == kwargs['total_bits'], err assert quant.symmetric == kwargs['symmetric'], err keep_negative = None if kwargs['activation'] == 'quantized_relu': assert not quant.is_quantized_clip, err assert quant.negative_slope == kwargs['relu_neg_slope'], err assert quant.relu_upper_bound is None, err keep_negative = kwargs['relu_neg_slope'] != 0 elif kwargs['activation'] == 'quantized_bits': assert quant.keep_negative, err assert quant.alpha == 1.0, err keep_negative = True expected_integer_bits = _get_integer_bits(model.layers[0].ema_min.numpy(), model.layers[0].ema_max.numpy(), kwargs['total_bits'], kwargs['symmetric'], keep_negative, kwargs['po2_rounding']).numpy() assert np.isclose(expected_integer_bits, quant.integer.numpy()).all(), err # Skip to a step where the quantization is used kwargs['current_step'].assign(tf.constant(kwargs['quantization_delay'], tf.int64)) # Check quantized output # To set qnoise_factor to 1.0 explicitly. qnoise_factor = np.array(quant.qnoise_factor) quant.update_qnoise_factor(1.0) expected_qout = np.copy(quant(input_val)) # Revert qnoise_factor to its original value. quant.update_qnoise_factor(qnoise_factor) qout = model(input_val, training=True).numpy() assert np.isclose(expected_qout, qout).all(), err # Check testing mode qout = model(input_val, training=False).numpy() assert np.isclose(quant(input_val), qout).all(), err @pytest.mark.parametrize( 'momentum, ema_freeze_delay, total_steps, estimate_step_count', [(0.9, 50, 100, False), (0.5, 1000, 1500, False), (0.1, 2, 100, False), (0.999, 98, 100, False), (0.9, 50, 100, True), (0.5, 1000, 1500, True), (0.1, 2, 100, True), (0.999, 98, 100, True)]) def test_qadaptiveact_ema(momentum, ema_freeze_delay, total_steps, estimate_step_count): """Test the exponential moving averages over time for QAdaptiveActivation.""" # Initialize a QAdaptiveActivation layer just for testing the EMA if estimate_step_count: step = None else: step = tf.Variable(0, dtype=tf.int64) q_act = QAdaptiveActivation(activation='quantized_bits', total_bits=8, current_step=step, quantization_delay=total_steps*2, ema_freeze_delay=ema_freeze_delay, ema_decay=momentum, per_channel=True, po2_rounding=False) model = tf.keras.Sequential([q_act]) model.compile() # Simulate a number of training steps and check the EMA values exp_ema_max = 0.0 exp_ema_min = 0.0 for i in range(0, total_steps): vals = np.random.random((1, 2, 1)) * i # generate random values for update model(vals, training=True) # Simulate training # Check the steps match if estimate_step_count: assert np.equal(q_act.step.numpy(), i) # Calculate expected values if i <= ema_freeze_delay: exp_ema_max = (exp_ema_max * momentum) + (vals.max() * (1.0 - momentum)) exp_ema_min = (exp_ema_min * momentum) + (vals.min() * (1.0 - momentum)) exp_int_bits = _get_integer_bits(exp_ema_min, exp_ema_max, q_act.quantizer.bits, q_act.quantizer.symmetric, q_act.quantizer.symmetric, False) # Check results assert np.abs(exp_ema_max - q_act.ema_max.numpy()[0]) < 0.0001 assert np.isclose(exp_int_bits.numpy(), q_act.quantizer.integer.numpy()) if not estimate_step_count: step.assign_add(1) def test_qadaptiveactivation(): """Test a wide variety of inputs to the QAdaptiveActivation layer.""" test_options = { 'activation': ['quantized_bits', 'quantized_relu'], 'total_bits': [1, 2, 4, 8, 16], 'symmetric': [True, False], 'quantization_delay': [1], # We will only run for one step 'per_channel': [True, False], 'po2_rounding': [True, False], 'relu_neg_slope': [0.0, -0.5] } for args in itertools.product(*test_options.values()): args = {list(test_options.keys())[i]: args[i] for i in range(len(args))} args['ema_freeze_delay'] = None # This test does not test the EMA freeze args['ema_decay'] = 0 # This test not test the EMA delay for img_shape in [(1, 28, 28, 3), (1, 3, 4, 5)]: for input_scale in [255, 1]: args['current_step'] = tf.Variable(0, dtype=tf.int64) img = np.random.random(img_shape) * input_scale run_qadaptiveactivation_test(img, args) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/qalpha_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test get_weight_scale function with auto and auto_po2 modes of quantizers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import logging from numpy.testing import assert_allclose from numpy.testing import assert_equal import pytest from tensorflow.keras import backend as K from qkeras import binary from qkeras import get_weight_scale from qkeras import ternary from qkeras.quantizers import _get_integer_bits # expected value if input is uniform distribution is: # - alpha = m/2.0 for binary # - alpha = (m+d)/2.0 for ternary def test_binary_auto(): """Test binary auto scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer = binary(alpha="auto") q = K.eval(quantizer(x)) result = get_weight_scale(quantizer, q) expected = m / 2.0 logging.info("expect %s", expected) logging.info("result %s", result) assert_allclose(result, expected, rtol=0.02) def test_binary_auto_po2(): """Test binary auto_po2 scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer_ref = binary(alpha="auto") quantizer = binary(alpha="auto_po2") q_ref = K.eval(quantizer_ref(x)) q = K.eval(quantizer(x)) ref = get_weight_scale(quantizer_ref, q_ref) expected = np.power(2.0, np.round(np.log2(ref))) result = get_weight_scale(quantizer, q) assert_allclose(result, expected, rtol=0.0001) def test_ternary_auto(): """Test ternary auto scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer = ternary(alpha="auto") q = K.eval(quantizer(x)) d = m/3.0 result = np.mean(get_weight_scale(quantizer, q)) expected = (m + d) / 2.0 assert_allclose(result, expected, rtol=0.02) def test_ternary_auto_po2(): """Test ternary auto_po2 scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer_ref = ternary(alpha="auto") quantizer = ternary(alpha="auto_po2") q_ref = K.eval(quantizer_ref(x)) q = K.eval(quantizer(x)) ref = get_weight_scale(quantizer_ref, q_ref) expected = np.power(2.0, np.round(np.log2(ref))) result = get_weight_scale(quantizer, q) assert_allclose(result, expected, rtol=0.0001) def test_get_integer_bits(): """Test automated integer bit (po2 scale) estimator.""" bits = 4 min_value = np.array([ -4.0, -4.0, -4.0, -4.0, 1.0, -3.0, -10.0, -16, -25, 0, 0, 0, 0.1, 0.0, -1.0, 0.0, 0.0, 0.0, 0, 0, 0 ]) max_value = np.array([ 3.5, 3.51, 3.75, 3.751, 2.0, 4.0, 5.0, 8, 0, 0, 0.1, 0.999, 0.5, 0.8751, 0.9375, 0.93751, 1.875, 1.8751, 9, 11, 12 ]) # unsigned number (keep_negative=False) without clippling. symmetric = False # symmetric is irrelevant. keep_negative = False is_clipping = False integer_bits = _get_integer_bits( min_value=min_value, max_value=max_value, bits=bits, symmetric=symmetric, keep_negative=keep_negative, is_clipping=is_clipping) assert_equal( integer_bits, np.array([2, 2, 2, 3, 2, 3, 3, 4, 0, 0, 0, 1, 0, 0, 0, 1, 1, 2, 4, 4, 4])) # unsigned number (keep_negative=False) with clippling. symmetric = False # symmetric is irrelevant. keep_negative = False is_clipping = True integer_bits = _get_integer_bits( min_value=min_value, max_value=max_value, bits=bits, symmetric=symmetric, keep_negative=keep_negative, is_clipping=is_clipping) assert_equal( integer_bits, np.array([2, 2, 2, 2, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 4])) # signed number (keep_negative=True) non-symmetric without clippling symmetric = False keep_negative = True is_clipping = False integer_bits = _get_integer_bits( min_value=min_value, max_value=max_value, bits=bits, symmetric=symmetric, keep_negative=keep_negative, is_clipping=is_clipping) assert_equal( integer_bits, np.array([2, 3, 3, 3, 2, 3, 3, 3, 3, 0, 0, 1, 0, 1, 1, 1, 2, 2, 3, 3, 3])) # signed number (keep_negative=True) non-symmetric with clippling symmetric = False keep_negative = True is_clipping = True integer_bits = _get_integer_bits( min_value=min_value, max_value=max_value, bits=bits, symmetric=symmetric, keep_negative=keep_negative, is_clipping=is_clipping) assert_equal( integer_bits, np.array([2, 2, 2, 2, 1, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3])) # signed number (keep_negative=True) symmetric without clippling symmetric = True keep_negative = True is_clipping = False integer_bits = _get_integer_bits( min_value=min_value, max_value=max_value, bits=bits, symmetric=symmetric, keep_negative=keep_negative, is_clipping=is_clipping) assert_equal( integer_bits, np.array([3, 3, 3, 3, 2, 3, 3, 3, 3, 0, 0, 1, 0, 1, 1, 1, 2, 2, 3, 3, 3])) # signed number (keep_negative=True) symmetric with clippling symmetric = True keep_negative = True is_clipping = True integer_bits = _get_integer_bits( min_value=min_value, max_value=max_value, bits=bits, symmetric=symmetric, keep_negative=keep_negative, is_clipping=is_clipping) assert_equal( integer_bits, np.array([2, 2, 2, 2, 1, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3])) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qconvolutional_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test layers from qconvolutional.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import numpy as np from numpy.testing import assert_allclose import pytest import tempfile import tensorflow as tf from tensorflow.keras import backend as K from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.backend import clear_session from qkeras import binary from qkeras import ternary from qkeras import QActivation from qkeras import QDense from qkeras import QConv1D from qkeras import QConv2D from qkeras import QConv2DTranspose from qkeras import QSeparableConv1D from qkeras import QSeparableConv2D from qkeras import quantized_bits from qkeras import quantized_relu from qkeras.utils import model_save_quantized_weights from qkeras.utils import quantized_model_from_json from qkeras.utils import load_qmodel from qkeras import print_qstats from qkeras import extract_model_operations def test_qnetwork(): K.set_learning_phase(1) x = x_in = Input((28, 28, 1), name='input') x = QSeparableConv2D( 32, (2, 2), strides=(2, 2), depthwise_quantizer=binary(alpha=1.0), pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0), activation=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')( x) x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x) x = QConv2D( 64, (3, 3), strides=(2, 2), kernel_quantizer=ternary(alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m', activation=quantized_relu(6, 3, 1))( x) x = QConv2D( 64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')( x) x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x) x = Flatten(name='flatten')(x) x = QDense( 10, kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='dense')( x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # generate same output for weights np.random.seed(42) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if (len(layer.get_weights()) == 3 and i > 0): # pointwise kernel and bias input_size = input_size // np.prod(layer.kernel_size) shape = weights.shape print(shape) assert input_size > 0, 'input size for {} {}'.format(layer.name, i) # he normal initialization with a scale factor of 2.0 all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) # test_qnetwork_weight_quantization all_weights_signature = np.array( [2., -6.75, -0.625, -2., -0.25, -56., 1.125, -1.625, -1.125]) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test_qnetwork_forward: expected_output = np.array( [[0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 7.6e-06], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00]]).astype(np.float16) inputs = 2 * np.random.rand(10, 28, 28, 1) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4) def test_sequential_qnetwork(): model = tf.keras.Sequential() model.add(Input((28, 28, 1), name='input')) model.add( QConv2D( 32, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')) model.add(QActivation(quantized_relu(4, 0), name='act0_m')) model.add( QConv2D( 64, (3, 3), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m')) model.add(QActivation(quantized_relu(4, 0), name='act1_m')) model.add( QConv2D( 64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')) model.add(QActivation(quantized_relu(4, 0), name='act2_m')) model.add(Flatten()) model.add( QDense( 10, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')) model.add(Activation('softmax', name='softmax')) # Check that all model operation were found correctly model_ops = extract_model_operations(model) for layer in model_ops.keys(): assert model_ops[layer]['type'][0] != 'null' return model @pytest.mark.parametrize("layer_cls", ["QConv1D", "QSeparableConv1D"]) def test_qconv1d(layer_cls): np.random.seed(33) if layer_cls == "QConv1D": x = Input((4, 4,)) y = QConv1D( 2, 1, kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='qconv1d')( x) model = Model(inputs=x, outputs=y) else: x = Input((4, 4,)) y = QSeparableConv1D( 2, 2, depthwise_quantizer=quantized_bits(6, 2, 1, alpha=1.0), pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='qconv1d')( x) model = Model(inputs=x, outputs=y) # Extract model operations model_ops = extract_model_operations(model) # Check the input layer model operation was found correctly assert model_ops['qconv1d']['type'][0] != 'null' # Assertion about the number of operations for this (Separable)Conv1D layer if layer_cls == "QConv1D": assert model_ops['qconv1d']['number_of_operations'] == 32 else: assert model_ops['qconv1d']['number_of_operations'] == 30 # Print qstats to make sure it works with Conv1D layer print_qstats(model) # reload the model to ensure saving/loading works # json_string = model.to_json() # clear_session() # model = quantized_model_from_json(json_string) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 10 * 10 shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Return a compiled model identical to the previous one model = load_qmodel(fname) # Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) inputs = np.random.rand(2, 4, 4) p = model.predict(inputs).astype(np.float16) if layer_cls == "QConv1D": y = np.array([[[-2.441, 3.816], [-3.807, -1.426], [-2.684, -1.317], [-1.659, 0.9834]], [[-4.99, 1.139], [-2.559, -1.216], [-2.285, 1.905], [-2.652, -0.467]]]).astype(np.float16) else: y = np.array([[[-2.275, -3.178], [-0.4358, -3.262], [ 1.987, 0.3987]], [[-0.01251, -0.376], [ 0.3928, -1.328], [-1.243, -2.43 ]]] ).astype(np.float16) assert_allclose(p, y, rtol=1e-4) def test_qconv2dtranspose(): x = Input((4, 4, 1,)) y = QConv2DTranspose( 1, kernel_size=(3, 3), kernel_quantizer=binary(), bias_quantizer=binary(), name='conv2d_tran')(x) model = Model(inputs=x, outputs=y) data = np.ones(shape=(1,4,4,1)) kernel = np.ones(shape=(3,3,1,1)) bias = np.ones(shape=(1,)) model.get_layer('conv2d_tran').set_weights([kernel, bias]) actual_output = model.predict(data).astype(np.float16) expected_output = np.array( [ [2., 3., 4., 4., 3., 2.], [3., 5., 7., 7., 5., 3.], [4., 7., 10., 10., 7., 4.], [4., 7., 10., 10., 7., 4.], [3., 5., 7., 7., 5., 3.], [2., 3., 4., 4., 3., 2.] ]).reshape((1,6,6,1)).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4) def test_masked_qconv2d_creates_correct_parameters(): mask = mask = np.ones((5, 5), dtype=np.float32) model = tf.keras.Sequential() model.add(tf.keras.layers.Input(shape=(10, 10, 1))) model.add(QConv2D(mask=mask, filters=1, kernel_size=(5, 5), use_bias=False)) # There should be no non-trainable params. np.testing.assert_equal(len(model.non_trainable_weights), 0) # Validate number of trainable params. This should be equal to one (5,5) # kernel. np.testing.assert_equal(len(model.trainable_weights), 1) num_trainable_params = np.prod(model.trainable_weights[0].shape) np.testing.assert_equal(num_trainable_params, 25) def test_qconv2d_masks_weights(): # Create an arbitrary mask. mask = np.array( [ [1.0, 0.0, 1.0, 0.0, 1.0], [0.0, 0.0, 1.0, 0.0, 0.0], [1.0, 0.0, 1.0, 0.0, 1.0], [0.0, 0.0, 1.0, 0.0, 0.0], [1.0, 0.0, 1.0, 0.0, 1.0], ], dtype=np.float32, ) model = tf.keras.Sequential() model.add(tf.keras.layers.Input(shape=(5, 5, 1))) model.add(QConv2D(mask=mask, filters=1, kernel_size=(5, 5), use_bias=False)) # Set the weights to be all ones. model.layers[0].set_weights([np.ones((5, 5, 1, 1), dtype=np.float32)]) # Run inference on a all ones input. output = model.predict(np.ones((1, 5, 5, 1), dtype=np.float32)) # Output should just be summation of number of ones in the mask. np.testing.assert_array_equal( output, np.array([[[[11.0]]]], dtype=np.float32) ) def test_masked_qconv2d_load_restore_works(): model = tf.keras.Sequential() model.add(tf.keras.layers.Input(shape=(10, 10, 1))) model.add( QConv2D( mask=np.ones((5, 5), dtype=np.float32), filters=1, kernel_size=(5, 5), use_bias=False, ) ) with tempfile.TemporaryDirectory() as temp_dir: model_path = os.path.join(temp_dir, 'model.keras') # Can save the model. model.save(model_path) # Can load the model. custom_objects = { 'QConv2D': QConv2D, } loaded_model = tf.keras.models.load_model( model_path, custom_objects=custom_objects ) np.testing.assert_array_equal( model.layers[0].weights[0], loaded_model.layers[0].weights[0] ) def test_qconv2d_groups_works(): model = tf.keras.Sequential() model.add(tf.keras.layers.Input(shape=(10, 10, 10))) model.add( QConv2D( filters=6, kernel_size=(1, 1), use_bias=True, groups=2, ) ) # Validate number of trainable params. np.testing.assert_equal(len(model.trainable_weights), 2) num_trainable_params = np.prod(model.trainable_weights[0].shape) + np.prod( model.trainable_weights[1].shape ) expected_trainable_params = 36 # (5*3)*2 + 6 np.testing.assert_equal(num_trainable_params, expected_trainable_params) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/qdepthwise_conv2d_transpose_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test layers from qconvolutional.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tempfile import numpy as np from numpy.testing import assert_allclose, assert_equal import pytest import tensorflow as tf from qkeras import QDepthwiseConv2DTranspose from qkeras import quantized_bits # Predicted output from float model. _FLOAT_PREDICTED_OUTPUT = np.array([[ [ [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], ], [ [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], ], [ [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], ], [ [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0], ], [ [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], ], [ [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], [2.0, 4.0, 6.0], ], [ [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], ], [ [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], [3.0, 6.0, 9.0], ], ]]) def create_model(group_size=1): x = img_input = tf.keras.layers.Input(shape=(4, 4, 3)) x = QDepthwiseConv2DTranspose( filters=2, kernel_size=(2, 2), strides=(2, 2), padding="same", name="conv2d_tran", depthwise_activation=None, depthwise_kernel_quantizer=None, bias_quantizer=None, group_size=group_size, )(x) model = tf.keras.Model(inputs=img_input, outputs=x) return model def create_quantized_model(group_size=1): x = img_input = tf.keras.layers.Input(shape=(4, 4, 3)) x = QDepthwiseConv2DTranspose( filters=2, kernel_size=(2, 2), strides=(1, 1), padding="same", name="conv2d_tran", depthwise_activation="quantized_bits(10, 6, 1)", depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0), bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0), group_size=group_size, )(x) model = tf.keras.Model(inputs=img_input, outputs=x) return model def test_qseparable_conv2d_transpose(): # By setting the weights and input values manually, we can test # the correctness of the output. # Input is (1, 4, 4, 3), with 3 output channels. For i-th channel, # with shape (1, 4, 4, 1), it will convolve with the depthwise kernel at # i-th channel. Depthwise outputs are (1, 8, 8, 3). # Create model. model = create_model() output_shape = model.output_shape ws = model.layers[1].weights x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) inputs = np.concatenate([x, x, x], axis=-1) inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32) # depthwise kernel of shape (2, 2, 3, 1) dw_kernel = np.array([ [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]], [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]], ]) bias = tf.zeros((2,)) model.layers[1].set_weights([dw_kernel, bias]) actual_output = model.predict(inputs).astype(np.float16) assert_equal(output_shape[1:], (8, 8, 3)) assert_equal(len(ws), 2) # Test if the depthwise conv kernel shape is correct. assert_equal(ws[0].shape, (2, 2, 3, 1)) # Test if the bias shape is correct. assert_equal(ws[1].shape, (2,)) # Test if overall output is correct. assert_equal(actual_output, _FLOAT_PREDICTED_OUTPUT) def test_quantization_in_separable_conv2d_transpose(): # Test if quantization is applied correctly. # Create model with quantization. model = create_quantized_model() x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) inputs = np.concatenate([x, x, x], axis=-1) inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32) # depthwise kernel of shape (2, 2, 3, 1) dw_kernel = np.array([ [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]], [[[1.0], [2.0], [3.0]], [[1.0], [2.0], [3.0]]], ]) bias = tf.ones((2,)) model.layers[1].set_weights([dw_kernel, bias]) actual_output = model.predict(inputs).astype(np.float16) qs = model.layers[1].get_quantizers() assert_equal(len(qs), 3) assert_equal(str(qs[0]), "quantized_bits(1,0,1,alpha=1.0)") assert_equal(str(qs[1]), "quantized_bits(2,2,1,alpha=1.0)") assert_equal(str(qs[2]), "quantized_bits(10,6,1)") expected = np.array([[ [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]], [[3.0, 3.0, 3.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0], [6.0, 6.0, 6.0]], [ [5.0, 5.0, 5.0], [10.0, 10.0, 10.0], [10.0, 10.0, 10.0], [10.0, 10.0, 10.0], ], ]]) assert_equal(actual_output, expected) def test_qseparable_conv2d_transpose_with_groups(): model = create_model(group_size=3) output_shape = model.output_shape ws = model.layers[1].weights x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) inputs = np.concatenate([x, x, x], axis=-1) inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32) # depthwise kernel of shape (2, 2, 3, 3) dw_kernel = np.array([ [ [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]], [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]], ], [ [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]], [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]], ], ]) bias = tf.zeros((2,)) model.layers[1].set_weights([dw_kernel, bias]) actual_output = model.predict(inputs).astype(np.float16) predicted = _FLOAT_PREDICTED_OUTPUT * 3.0 # kernel values replicated 3 times assert_equal(output_shape[1:], (8, 8, 3)) assert_equal(len(ws), 2) # Test if the depthwise conv kernel shape is correct. assert_equal(ws[0].shape, (2, 2, 3, 3)) # Test if the bias shape is correct. assert_equal(ws[1].shape, (2,)) # Test if overall output is correct. assert_equal(actual_output, predicted) def test_save_and_load_model(): # Test if the model can be loaded from a saved model. model = create_quantized_model(group_size=3) fd, fname = tempfile.mkstemp(".hdf5") model.save(fname) custom_object = { "QDepthwiseConv2DTranspose": QDepthwiseConv2DTranspose, } model_loaded = tf.keras.models.load_model( fname, custom_objects=custom_object) # Clean the h5 file after loading the model os.close(fd) os.remove(fname) model_weights = model.layers[1].weights loaded_model_weights = model_loaded.layers[1].weights assert_equal(len(model_weights), len(loaded_model_weights)) for i, model_weight in enumerate(model_weights): assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy()) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qlayers_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test layers from qlayers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import logging import os import tempfile import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_equal import pytest import tensorflow as tf from tensorflow.keras import backend as K from tensorflow.keras.backend import clear_session from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from qkeras import QActivation from qkeras import QDense from qkeras import quantized_bits from qkeras import quantized_relu from qkeras.utils import load_qmodel from qkeras.utils import model_save_quantized_weights from qkeras.utils import quantized_model_from_json def qdense_util(layer_cls, kwargs=None, input_data=None, weight_data=None, expected_output=None): """qlayer test utility.""" input_shape = input_data.shape input_dtype = input_data.dtype layer = layer_cls(**kwargs) x = Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) layer.set_weights(weight_data) model = Model(x, y) actual_output = model.predict(input_data) if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-4) @pytest.mark.parametrize( 'layer_kwargs, input_data, weight_data, bias_data, expected_output', [ ( { 'units': 2, 'use_bias': True, 'kernel_initializer': 'glorot_uniform', 'bias_initializer': 'zeros' }, np.array([[1, 1, 1, 1]], dtype=K.floatx()), np.array([[10, 20], [10, 20], [10, 20], [10, 20]], dtype=K.floatx()), # weight_data np.array([0, 0], dtype=K.floatx()), # bias np.array([[40, 80]], dtype=K.floatx())), # expected_output ( { 'units': 2, 'use_bias': True, 'kernel_initializer': 'glorot_uniform', 'bias_initializer': 'zeros', 'kernel_quantizer': 'quantized_bits(2,0,alpha=1.0)', 'bias_quantizer': 'quantized_bits(2,0)', }, np.array([[1, 1, 1, 1]], dtype=K.floatx()), np.array([[10, 20], [10, 20], [10, 20], [10, 20]], dtype=K.floatx()), # weight_data np.array([0, 0], dtype=K.floatx()), # bias np.array([[2, 2]], dtype=K.floatx())), #expected_output ]) def test_qdense(layer_kwargs, input_data, weight_data, bias_data, expected_output): qdense_util( layer_cls=QDense, kwargs=layer_kwargs, input_data=input_data, weight_data=[weight_data, bias_data], expected_output=expected_output) def test_qactivation_loads(): layer_size = 10 # Create a small model with QActivation layer. x = xin = tf.keras.layers.Input(shape=(layer_size,), name='input') x = QDense( layer_size, name='qdense', )(x) x = QActivation(activation=quantized_relu(8), name='relu')(x) model = tf.keras.Model(inputs=xin, outputs=x) # Generate random weights for the model. w_k = np.random.rand(layer_size, layer_size) w_b = np.random.rand( layer_size, ) model.set_weights([w_k, w_b]) # Save the model as an h5 file. fd, fname = tempfile.mkstemp('.h5') model.save(fname) # Load the model. loaded_model = load_qmodel(fname) # Clean the h5 file after loading the model os.close(fd) os.remove(fname) # Compare weights of original and loaded models. model_weights = model.weights loaded_model_weights = loaded_model.weights assert_equal(len(model_weights), len(loaded_model_weights)) for i, model_weight in enumerate(model_weights): assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy()) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/qmac_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test layers from qlayers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import logging import os import tempfile import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_equal import pytest from tensorflow.keras import backend as K from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from qkeras import QScaleShift from qkeras.utils import load_qmodel def create_qmac_model(layer_cls, kwargs=None, input_data=None, weight_data=None): """Create a QMAC model for test purpose.""" layer = layer_cls(**kwargs) x = Input(shape=input_data.shape[1:], dtype=input_data.dtype) y = layer(x) layer.set_weights(weight_data) return Model(x, y) @pytest.mark.parametrize( 'layer_kwargs, input_data, weight_data, bias_data, expected_output', [ ( { 'weight_quantizer': 'quantized_bits(8,2,alpha=1.0)', 'bias_quantizer': 'quantized_bits(8,2,alpha=1.0)', 'activation': 'quantized_bits(8,4,alpha=1.0)' }, np.array([[1, 1], [2, 2]], dtype=K.floatx()), np.array([[1.0]]), np.array([[4.0]]), np.array([[5, 5], [6, 6]], dtype=K.floatx())), ]) def test_qmac(layer_kwargs, input_data, weight_data, bias_data, expected_output): model = create_qmac_model( layer_cls=QScaleShift, kwargs=layer_kwargs, input_data=input_data, weight_data=[weight_data, bias_data]) actual_output = model.predict(input_data) assert_allclose(actual_output, expected_output, rtol=1e-4) # Test model loading and saving. fd, fname = tempfile.mkstemp('.h5') model.save(fname) # Load the model. loaded_model = load_qmodel(fname) # Clean the h5 file after loading the model os.close(fd) os.remove(fname) # Compare weights of original and loaded models. model_weights = model.weights loaded_model_weights = loaded_model.weights assert_equal(len(model_weights), len(loaded_model_weights)) for i, model_weight in enumerate(model_weights): assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy()) # Compare if loaded models have the same prediction as original models. loaded_model_output = loaded_model.predict(input_data) assert_equal(actual_output, loaded_model_output) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/qnoise_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test gradual quantization noise injection with quantizers of quantizers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import logging from numpy.testing import assert_allclose from numpy.testing import assert_equal import pytest from tensorflow.keras import backend as K from qkeras.quantizers import quantized_bits from qkeras.quantizers import quantized_relu def test_qnoise_quantized_bits(): # 1 sign bit, 1 integer bit, and 2 fractional bits. bits = 4 integer = 1 symmetric = True keep_negative = True alpha = 1 use_stochastic_rounding = False qb = quantized_bits( bits=bits, integer=integer, symmetric=symmetric, keep_negative=keep_negative, alpha=alpha, use_stochastic_rounding=use_stochastic_rounding, use_variables=True) inputs = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32) x = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32) xq = np.array([0.0, 0.5, -0.5, 0.5, -0.5, 1.75, -1.75], dtype=np.float32) x_xq = 0.5 * (x + xq) # no quantization qb.update_qnoise_factor(qnoise_factor=0.0) x_q_0 = qb(inputs) assert_equal(x_q_0, x) # full quantization qb.update_qnoise_factor(qnoise_factor=1.0) x_q_1 = qb(inputs) assert_equal(x_q_1, xq) # mixing half and half of x and xq qb.update_qnoise_factor(qnoise_factor=0.5) x_q_05 = qb(inputs) assert_equal(x_q_05, x_xq) def test_qnoise_quantized_relu(): # 0 sign bit, 1 integer bit, and 3 fractional bits. bits = 4 integer = 1 use_sigmoid = False negative_slope = 0 use_stochastic_rounding = False # input to quantized relu inputs = np.array([0.0, 0.5, -0.5, 0.6, 2.0, 3.0], dtype=np.float32) # float relu x = np.array([0.0, 0.5, 0.0, 0.6, 2.0, 3.0], dtype=np.float32) # float relu with upper bound 1.5 x_ub = np.array([0.0, 0.5, 0.0, 0.6, 1.5, 1.5], dtype=np.float32) # float relu with quantized clipping x_clipped = np.array([0.0, 0.5, 0.0, 0.6, 1.875, 1.875], dtype=np.float32) # quantized relu xq = np.array([0.0, 0.5, 0.0, 0.625, 1.875, 1.875], dtype=np.float32) # mixing half and half x_xq = 0.5 * (x + xq) x_clipped_xq = 0.5 * (x_clipped + xq) x_ub_xq = 0.5 * (x_ub + xq) ######################################### # No relu upper bound # No quantized clip for float relu ######################################### qr_qc_false = quantized_relu( bits=bits, integer=integer, use_sigmoid=use_sigmoid, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, relu_upper_bound=None, is_quantized_clip=False, use_variables=True) # no quantization qr_qc_false.update_qnoise_factor(qnoise_factor=0.0) x_q_0 = qr_qc_false(inputs) assert_equal(x_q_0, x) # full quantization qr_qc_false.update_qnoise_factor(qnoise_factor=1.0) x_q_1 = qr_qc_false(inputs) assert_equal(x_q_1, xq) # mixing half and half qr_qc_false.update_qnoise_factor(qnoise_factor=0.5) x_q_05 = qr_qc_false(inputs) assert_equal(x_q_05, x_xq) ######################################### # No relu upper bound # Quantized clip for float relu ######################################### qr_qc_true = quantized_relu( bits=bits, integer=integer, use_sigmoid=use_sigmoid, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, relu_upper_bound=None, is_quantized_clip=True, use_variables=True) # no quantization qr_qc_true.update_qnoise_factor(qnoise_factor=0.0) x_q_0 = qr_qc_true(inputs) assert_equal(x_q_0, x_clipped) # full quantization qr_qc_true.update_qnoise_factor(qnoise_factor=1.0) x_q_1 = qr_qc_true(inputs) assert_equal(x_q_1, xq) # mixing half and half qr_qc_true.update_qnoise_factor(qnoise_factor=0.5) x_q_05 = qr_qc_true(inputs) assert_equal(x_q_05, x_clipped_xq) ######################################### # Relu upper bound # No quantized clip for float relu ######################################### qr_ub_qc_false = quantized_relu( bits=bits, integer=integer, use_sigmoid=use_sigmoid, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, relu_upper_bound=1.5, is_quantized_clip=False, use_variables=True) # no quantization qr_ub_qc_false.update_qnoise_factor(qnoise_factor=0.0) x_q_0 = qr_ub_qc_false(inputs) assert_equal(x_q_0, np.clip(x_ub, a_min=None, a_max=1.5)) # full quantization qr_ub_qc_false.update_qnoise_factor(qnoise_factor=1.0) x_q_1 = qr_ub_qc_false(inputs) assert_equal(x_q_1, np.clip(xq, a_min=None, a_max=1.5)) # mixing half and half qr_ub_qc_false.update_qnoise_factor(qnoise_factor=0.5) x_q_05 = qr_ub_qc_false(inputs) assert_equal(x_q_05, np.clip(x_ub_xq, a_min=None, a_max=1.5)) ######################################### # Relu upper bound # Quantized clip for float relu # (The quantized clip has precedence over the relu upper bound.) ######################################### qr_ub_qc_true = quantized_relu( bits=bits, integer=integer, use_sigmoid=use_sigmoid, negative_slope=negative_slope, use_stochastic_rounding=use_stochastic_rounding, relu_upper_bound=1.5, is_quantized_clip=True, use_variables=True) # no quantization qr_ub_qc_true.update_qnoise_factor(qnoise_factor=0.0) x_q_0 = qr_ub_qc_true(inputs) assert_equal(x_q_0, x_clipped) # full quantization qr_ub_qc_true.update_qnoise_factor(qnoise_factor=1.0) x_q_1 = qr_ub_qc_true(inputs) assert_equal(x_q_1, xq) # mixing half and half qr_ub_qc_true.update_qnoise_factor(qnoise_factor=0.5) x_q_05 = qr_ub_qc_true(inputs) assert_equal(x_q_05, x_clipped_xq) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qpooling_test.py ================================================ # Copyright 2021 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test layers from qpooling.py.""" import numpy as np from numpy.testing import assert_allclose from numpy.testing import assert_raises from numpy.testing import assert_equal import pytest import logging import tempfile import os import tensorflow.compat.v2 as tf from tensorflow.keras import backend as K from tensorflow.keras.layers import Input from tensorflow.keras.layers import AveragePooling2D from tensorflow.keras.layers import GlobalAveragePooling2D from tensorflow.keras.models import Model from tensorflow.keras.backend import clear_session from qkeras import QAveragePooling2D from qkeras import QGlobalAveragePooling2D from qkeras import quantized_bits from qkeras import binary from qkeras import ternary from qkeras.utils import model_save_quantized_weights from qkeras.utils import quantized_model_from_json from qkeras.utils import load_qmodel from qkeras.utils import model_quantize from qkeras import print_qstats from qkeras.qtools import qgraph from qkeras.qtools import generate_layer_data_type_map from qkeras.qtools import interface @pytest.mark.parametrize( ('pooling, input_size, pool_size, strides, padding, data_format,' 'average_quantizer, activation_quantizer, y'), [ ('QAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.375], [0.25, 0.75, 0.5]], [[0.375, 0.25, 0.625], [0.625, 0.5, 0.375]]], [[[0.375, 0.375, 0.5], [0.375, 0.5, 0.625]], [[0.75, 0.625, 0.5], [0.5, 0.5, 0.75]]]]).astype( np.float16)), ('QAveragePooling2D', (4, 4, 3), (3, 3), (3, 3), 'valid', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.625]]], [[[0.625, 0.5, 0.625]]]]).astype( np.float16)), ('QGlobalAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid', 'channels_last', quantized_bits(10, 0, 1), quantized_bits(4, 0, 1), np.array([[0.5, 0.5, 0.375], [0.5, 0.5, 0.625]]).astype(np.float16)), ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'valid', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.375]]], [[[0.375, 0.375, 0.5]]]]).astype( np.float16)), ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'same', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.375], [0.375, 0.75, 0.25]], [[0.75, 0.25, 0.375], [0.75, 0.75, 0.25]]], [[[0.375, 0.375, 0.5], [0.25, 0.625, 0.5]], [[0.625, 0.625, 0.5], [0.625, 0.625, 0.875]]]]).astype( np.float16)), ('QAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid', 'channels_first', quantized_bits( 4, 0, 1), quantized_bits(4, 0, 1), None), ]) def test_q_average_pooling(pooling, input_size, pool_size, strides, padding, data_format, average_quantizer, activation_quantizer, y): """q_average_pooling test utility.""" np.random.seed(33) x = Input(input_size) xin = x if pooling == 'QAveragePooling2D': x = QAveragePooling2D( pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, average_quantizer=average_quantizer, activation=activation_quantizer, name='qpooling')(x) else: x = QGlobalAveragePooling2D( data_format=data_format, average_quantizer=average_quantizer, activation=activation_quantizer, name='qpooling')( x) model = Model(inputs=xin, outputs=x) # Prints qstats to make sure it works with Conv1D layer print_qstats(model) size = (2,) + input_size inputs = np.random.rand(size[0], size[1], size[2], size[3]) if data_format == 'channels_first': assert_raises(tf.errors.InvalidArgumentError, model.predict, inputs) else: p = model.predict(inputs).astype(np.float16) assert_allclose(p, y, rtol=1e-4) # Reloads the model to ensure saving/loading works json_string = model.to_json() clear_session() reload_model = quantized_model_from_json(json_string) p = reload_model.predict(inputs).astype(np.float16) assert_allclose(p, y, rtol=1e-4) # Saves the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp(".h5") model.save(fname) del model # Delete the existing model # Returns a compiled model identical to the previous one loaded_model = load_qmodel(fname) # Cleans the created h5 file after loading the model os.close(fd) os.remove(fname) # Applys quantizer to weights model_save_quantized_weights(loaded_model) p = loaded_model.predict(inputs).astype(np.float16) assert_allclose(p, y, rtol=1e-4) def test_qpooling_in_model_quantize(): input_size = (16, 16, 3) pool_size = (2, 2) x = Input(input_size) xin = x x = AveragePooling2D(pool_size=pool_size, name="pooling")(x) x = GlobalAveragePooling2D(name="global_pooling")(x) model = Model(inputs=xin, outputs=x) quantize_config = { "QAveragePooling2D": { "average_quantizer": "binary", "activation_quantizer": "binary" }, "QGlobalAveragePooling2D": { "average_quantizer": "quantized_bits(4, 0, 1)", "activation_quantizer": "ternary" } } qmodel = model_quantize(model, quantize_config, 4) print_qstats(qmodel) assert_equal(str(qmodel.layers[1].average_quantizer_internal), "binary()") assert_equal(str(qmodel.layers[1].activation), "binary()") assert_equal( str(qmodel.layers[2].average_quantizer_internal), "quantized_bits(4,0,1)") assert_equal(str(qmodel.layers[2].activation), "ternary()") def test_qpooling_in_qtools(): input_size = (16, 16, 3) pool_size = (2, 2) input_quantizers = [quantized_bits(8, 0, 1)] is_inference = False x = Input(input_size) xin = x x = QAveragePooling2D( pool_size=pool_size, average_quantizer=binary(), activation=quantized_bits(4, 0, 1), name="pooling")( x) x = QGlobalAveragePooling2D( average_quantizer=quantized_bits(4, 0, 1), activation=ternary(), name="global_pooling")( x) model = Model(inputs=xin, outputs=x) (graph, source_quantizer_list) = qgraph.CreateGraph( model, input_quantizers) qgraph.GraphPropagateActivationsToEdges(graph) layer_map = generate_layer_data_type_map.generate_layer_data_type_map( graph, source_quantizer_list, is_inference) dtype_dict = interface.map_to_json(layer_map) # Checks the QAveragePpooling layer datatype multiplier = dtype_dict["pooling"]["pool_avg_multiplier"] accumulator = dtype_dict["pooling"]["pool_sum_accumulator"] average_quantizer = dtype_dict["pooling"]["average_quantizer"] output = dtype_dict["pooling"]["output_quantizer"] assert_equal(multiplier["quantizer_type"], "quantized_bits") assert_equal(multiplier["bits"], 10) assert_equal(multiplier["int_bits"], 3) assert_equal(multiplier["is_signed"], 1) assert_equal(multiplier["op_type"], "mux") assert_equal(accumulator["quantizer_type"], "quantized_bits") assert_equal(accumulator["bits"], 10) assert_equal(accumulator["int_bits"], 3) assert_equal(accumulator["is_signed"], 1) assert_equal(accumulator["op_type"], "add") assert_equal(output["quantizer_type"], "quantized_bits") assert_equal(output["bits"], 4) assert_equal(output["int_bits"], 1) assert_equal(output["is_signed"], 1) assert_equal(average_quantizer["quantizer_type"], "binary") assert_equal(average_quantizer["bits"], 1) assert_equal(average_quantizer["int_bits"], 1) assert_equal(average_quantizer["is_signed"], 1) # Checks the QGlobalAveragePooling layer datatype multiplier = dtype_dict["global_pooling"]["pool_avg_multiplier"] accumulator = dtype_dict["global_pooling"]["pool_sum_accumulator"] average_quantizer = dtype_dict["global_pooling"]["average_quantizer"] output = dtype_dict["global_pooling"]["output_quantizer"] assert_equal(multiplier["quantizer_type"], "quantized_bits") assert_equal(multiplier["bits"], 13) assert_equal(multiplier["int_bits"], 7) assert_equal(multiplier["is_signed"], 1) assert_equal(multiplier["op_type"], "mul") assert_equal(accumulator["quantizer_type"], "quantized_bits") assert_equal(accumulator["bits"], 10) assert_equal(accumulator["int_bits"], 7) assert_equal(accumulator["is_signed"], 1) assert_equal(accumulator["op_type"], "add") assert_equal(output["quantizer_type"], "ternary") assert_equal(output["bits"], 2) assert_equal(output["int_bits"], 2) assert_equal(output["is_signed"], 1) assert_equal(average_quantizer["quantizer_type"], "quantized_bits") assert_equal(average_quantizer["bits"], 4) assert_equal(average_quantizer["int_bits"], 1) assert_equal(average_quantizer["is_signed"], 1) def test_QAveragePooling_output(): # Checks if the output of QAveragePooling layer with average_quantizer # is correct. x = np.ones(shape=(2, 6, 6, 1)) x[0, 0, :, :] = 0 x = tf.constant(x) y = QAveragePooling2D( pool_size=(3, 3), strides=3, padding="valid", average_quantizer="quantized_bits(8, 1, 1)")(x) y = y.numpy() assert np.all(y == [[[[0.65625], [0.65625]], [[0.984375], [0.984375]]], [[[0.984375], [0.984375]], [[0.984375], [0.984375]]]]) def test_QGlobalAveragePooling_output(): # Checks if the output of QGlobalAveragePooling layer with average_quantizer # is correct. x = np.ones(shape=(2, 3, 3, 2)) x[0, 0, 1, :] = 0 x = tf.constant(x) y = QGlobalAveragePooling2D(average_quantizer="quantized_bits(8, 1, 1)")(x) y = y.numpy() assert np.all(y == np.array([[0.875, 0.875], [0.984375, 0.984375]])) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qrecurrent_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for qrecurrent.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import json import os import tempfile import numpy as np from numpy.testing import assert_allclose import pytest import tensorflow as tf from tensorflow.keras import backend as K from tensorflow.keras.backend import clear_session from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Bidirectional from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import GRU from tensorflow.keras.layers import Input from tensorflow.keras.layers import LSTM from tensorflow.keras.layers import SimpleRNN from tensorflow.keras.models import Model from tensorflow.keras.models import Sequential from qkeras import QActivation from qkeras import QBidirectional from qkeras import QDense from qkeras import QGRU from qkeras import QLSTM from qkeras import QSimpleRNN from qkeras import quantized_bits from qkeras import quantized_tanh from qkeras.utils import load_qmodel from qkeras.utils import model_quantize from qkeras.utils import model_save_quantized_weights from qkeras.utils import quantized_model_from_json @pytest.mark.skip(reason="Test failing due to random weight initializaiton") @pytest.mark.parametrize('rnn, all_weights_signature, expected_output', [ (QSimpleRNN, np.array([5.109375, -1.8828125, 0.0, -0.5, 0.0], dtype=np.float32), np.array( [[0.281, 0.4956, 0.1047, 0.1188], [0.185, 0.6016, 0.0977, 0.1157], [0.3892, 0.483, 0.03528, 0.0926], [0.4038, 0.511, 0.01686, 0.06824], [0.3354, 0.5376, 0.02602, 0.101], [0.2043, 0.587, 0.04147, 0.1675], [0.2297, 0.6455, 0.0456, 0.0789], [0.4512, 0.4326, 0.01938, 0.0968], [0.6304, 0.2498, 0.03345, 0.0866], [0.4924, 0.3735, 0.011925, 0.1222]], dtype=np.float16)), (QLSTM, np.array([3.7421875, 2.1328125, 15.875, -0.5, 0.0], dtype=np.float32), np.array( [[0.27, 0.1814, 0.3108, 0.2378], [0.2976, 0.2424, 0.248, 0.2119], [0.3054, 0.2004, 0.2705, 0.2238], [0.325, 0.1656, 0.269, 0.2401], [0.271, 0.1796, 0.3, 0.2493], [0.3066, 0.1873, 0.2477, 0.2583], [0.2798, 0.1757, 0.2944, 0.25], [0.2693, 0.2335, 0.2534, 0.2437], [0.2808, 0.2057, 0.2712, 0.2422], [0.2732, 0.2336, 0.2491, 0.244]], dtype=np.float16)), (QGRU, np.array([4.6875, 4.3984375, 0.0, -0.5, 0.0], dtype=np.float32), np.array( [[0.2025, 0.3467, 0.2952, 0.1556], [0.2935, 0.3313, 0.2058, 0.1694], [0.2046, 0.4465, 0.1827, 0.1661], [0.1913, 0.498, 0.1583, 0.1525], [0.1578, 0.477, 0.1973, 0.1677], [0.2018, 0.44, 0.1714, 0.1869], [0.157, 0.551, 0.1709, 0.12115], [0.1973, 0.4353, 0.1672, 0.2001], [0.1622, 0.5146, 0.1741, 0.149], [0.2101, 0.3855, 0.2069, 0.1976]], dtype=np.float16)), ]) def test_qrnn(rnn, all_weights_signature, expected_output): K.set_learning_phase(0) np.random.seed(22) tf.random.set_seed(22) x = x_in = Input((2, 4), name='input') x = rnn( 16, activation=quantized_tanh(bits=8, symmetric=True), kernel_quantizer=quantized_bits(8, 0, 1, alpha=1.0), recurrent_quantizer=quantized_bits(8, 0, 1, alpha=1.0), bias_quantizer=quantized_bits(8, 0, 1, alpha=1.0), state_quantizer=quantized_bits(4, 0, 1, alpha=1.0), name='qrnn_0')( x) x = QDense( 4, kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='dense')( x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Return a compiled model identical to the previous one model = load_qmodel(fname) # Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test forward: inputs = 2 * np.random.rand(10, 2, 4) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4) @pytest.mark.skip(reason="Test failing due to random weight initializaiton") @pytest.mark.parametrize('rnn, all_weights_signature, expected_output', [ (QSimpleRNN, np.array([ -2.6562500e+00, -4.3466797e+00, 8.6736174e-19, 6.2548828e-01, -6.0751953e+00, 8.6736174e-19, -7.5000000e-01, 0.0 ], dtype=np.float32), np.array( [[0.0851, 0.1288, 0.586, 0.2002], [0.1044, 0.1643, 0.7217, 0.00978], [0.04135, 0.0537, 0.8706, 0.03455], [0.03354, 0.0489, 0.889, 0.02852], [0.04358, 0.05246, 0.7563, 0.1478], [0.03403, 0.0743, 0.4177, 0.4739], [0.0859, 0.1567, 0.3972, 0.36], [0.27, 0.1945, 0.4841, 0.05124], [0.12115, 0.05722, 0.728, 0.0938], [0.2864, 0.1262, 0.339, 0.2484]], dtype=np.float16)), (QLSTM, np.array( [-4.1406555, 3.2921143, 16., 7.0236816, 4.1237793, 16., -0.75, 0.], dtype=np.float32), np.array( [[0.301, 0.2236, 0.2275, 0.2478], [0.2135, 0.2627, 0.2439, 0.2798], [0.1671, 0.2252, 0.2844, 0.3232], [0.2211, 0.2178, 0.2817, 0.2795], [0.3384, 0.1732, 0.2451, 0.2434], [0.296, 0.1979, 0.2468, 0.2593], [0.2698, 0.1753, 0.288, 0.267], [0.258, 0.1888, 0.3228, 0.2301], [0.2169, 0.1578, 0.3699, 0.2554], [0.2783, 0.1816, 0.2986, 0.2415]], dtype=np.float16)), (QGRU, np.array([ -6.7578125e-01, 3.6837769e-01, 2.6020852e-18, 4.1682129e+00, -7.5769043e-01, 2.6020852e-18, -7.5000000e-01, 0.0 ], dtype=np.float32), np.array( [[0.278, 0.1534, 0.314, 0.2546], [0.1985, 0.1788, 0.3823, 0.2402], [0.1997, 0.1621, 0.3792, 0.259], [0.2534, 0.1605, 0.281, 0.3052], [0.3794, 0.1266, 0.2296, 0.2642], [0.285, 0.1754, 0.2847, 0.255], [0.2878, 0.1339, 0.3042, 0.274], [0.2874, 0.1475, 0.279, 0.2861], [0.2379, 0.1356, 0.3186, 0.3079], [0.2234, 0.1476, 0.3274, 0.3015]], dtype=np.float16)) ]) def test_qbidirectional(rnn, all_weights_signature, expected_output): K.set_learning_phase(0) np.random.seed(22) tf.random.set_seed(22) x = x_in = Input((2, 4), name='input') x = QBidirectional( rnn(16, activation='quantized_po2(8)', kernel_quantizer='quantized_po2(8)', recurrent_quantizer='quantized_po2(8)', bias_quantizer='quantized_po2(8)', state_quantizer='quantized_po2(8)', name='qbirnn_0'))( x) x = QDense( 4, kernel_quantizer=quantized_bits(8, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(8, 0, 1), name='dense')( x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Return a compiled model identical to the previous one model = load_qmodel(fname) # Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for _, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test forward: inputs = 2 * np.random.rand(10, 2, 4) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4) def create_network_rnn(rnn): xi = Input((16, 1,)) x = rnn(8)(xi) return Model(inputs=xi, outputs=x) @pytest.mark.parametrize('rnn', [SimpleRNN, LSTM, GRU]) def test_rnn_conversion(rnn): m = create_network_rnn(rnn) m.use_legacy_config = True name = 'Q' + m.layers[1].__class__.__name__ d = { name: { 'kernel_quantizer': 'binary', 'recurrent_quantizer': 'binary', 'bias_quantizer': 'binary', 'state_quantizer': 'binary', 'activation_quantizer': 'binary', } } if name != 'QSimpleRNN': d[name]['recurrent_activation_quantizer'] = 'binary' qq = model_quantize(m, d, 4) assert str(qq.layers[1].kernel_quantizer) == 'binary' assert str(qq.layers[1].recurrent_quantizer) == 'binary' assert str(qq.layers[1].bias_quantizer) == 'binary' assert str(qq.layers[1].state_quantizer) == 'binary' assert str(qq.layers[1].activation) == 'binary()' if name != 'QSimpleRNN': assert str(qq.layers[1].recurrent_activation) == 'binary()' def create_network_birnn(rnn): xi = Input((16, 1,)) x = Bidirectional(rnn(8))(xi) return Model(inputs=xi, outputs=x) @pytest.mark.parametrize('rnn', [SimpleRNN, LSTM, GRU]) def test_birnn_conversion(rnn): m = create_network_birnn(rnn) m.use_legacy_config = True name = 'Q' + m.layers[1].layer.__class__.__name__ d = { 'QBidirectional': { 'kernel_quantizer': 'binary', 'recurrent_quantizer': 'binary', 'bias_quantizer': 'binary', 'state_quantizer': 'binary', 'activation_quantizer': 'binary', } } if name != 'QSimpleRNN': d['QBidirectional']['recurrent_activation_quantizer'] = 'binary' qq = model_quantize(m, d, 4) layer = qq.layers[1].layer assert str(layer.kernel_quantizer) == 'binary' assert str(layer.recurrent_quantizer) == 'binary' assert str(layer.bias_quantizer) == 'binary' assert str(layer.state_quantizer) == 'binary' assert str(layer.activation) == 'binary()' if name != 'QSimpleRNN': assert str(layer.recurrent_activation) == 'binary()' backward_layer = qq.layers[1].backward_layer # backwards weight quantizers are dict because of contraints.serialize assert str(backward_layer.kernel_quantizer['class_name']) == 'binary' assert str(backward_layer.recurrent_quantizer['class_name']) == 'binary' assert str(backward_layer.bias_quantizer['class_name']) == 'binary' assert str(backward_layer.state_quantizer['class_name']) == 'binary' assert str(backward_layer.activation) == 'binary()' if name != 'QSimpleRNN': assert str(backward_layer.recurrent_activation) == 'binary()' def test_birnn_subrnn(): model = Sequential([Bidirectional(LSTM(16)), LSTM(8)]) d = { 'QLSTM': { 'activation_quantizer': 'ternary', 'recurrent_activation_quantizer': 'ternary', 'kernel_quantizer': 'ternary', 'recurrent_quantizer': 'ternary', 'bias_quantizer': 'ternary', 'state_quantizer': 'ternary', }, 'QBidirectional': { 'activation_quantizer': 'binary', 'recurrent_activation_quantizer': 'binary', 'kernel_quantizer': 'binary', 'recurrent_quantizer': 'binary', 'bias_quantizer': 'binary', 'state_quantizer': 'binary', } } qmodel = model_quantize(model, d, 4) layer = qmodel.layers[1] assert str(layer.kernel_quantizer) == 'ternary' assert str(layer.recurrent_quantizer) == 'ternary' assert str(layer.bias_quantizer) == 'ternary' assert str(layer.state_quantizer) == 'ternary' assert str(layer.activation) == 'ternary()' if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/qseparable_conv2d_transpose_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test layers from qconvolutional.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tempfile import numpy as np from numpy.testing import assert_allclose, assert_equal import pytest import tensorflow as tf from qkeras import QSeparableConv2DTranspose from qkeras import quantized_bits def create_model(): x = img_input = tf.keras.layers.Input(shape=(4, 4, 3)) x = QSeparableConv2DTranspose( filters=2, kernel_size=(2, 2), strides=(2, 2), padding="same", name="conv2d_tran", depthwise_activation=None, pointwise_activation=None, depthwise_kernel_quantizer=None, pointwise_kernel_quantizer=None, bias_quantizer=None, )(x) model = tf.keras.Model(inputs=img_input, outputs=x) return model def create_quantized_model(): x = img_input = tf.keras.layers.Input(shape=(4, 4, 3)) x = QSeparableConv2DTranspose( filters=2, kernel_size=(2, 2), strides=(1, 1), padding="same", name="conv2d_tran", depthwise_activation="quantized_bits(10, 6, 1)", pointwise_activation="quantized_bits(5, 3, 1)", depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0), pointwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0), bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0), )(x) model = tf.keras.Model(inputs=img_input, outputs=x) return model def test_qseparable_conv2d_transpose(): # By setting the weights and input values manually, we can test # the correctness of the output. # Input is (1, 4, 4, 3), with 3 output channels. For i-th channel, # with shape (1, 4, 4, 1), it will convolve with the depthwise kernel at # i-th channel. Depthwise outputs are (1, 8, 8, 3). DW output is then # mapped from input channel(3) to output channel (2) by pointwise conv. # Pointwise conv output is (1, 8, 8, 2). # Create model. model = create_model() output_shape = model.output_shape ws = model.layers[1].weights x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) inputs = np.concatenate([x, x, x], axis=-1) inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32) k = tf.ones((2, 2, 1, 1)) dw_kernel = tf.concat([k, 1+k, 2+k], axis=-1) k = tf.ones((1, 1, 1, 3)) pt_kernel = tf.concat([k, 1+k], axis=-2) bias = tf.zeros((2,)) model.layers[1].set_weights([dw_kernel, pt_kernel, bias]) actual_output = model.predict(inputs).astype(np.float16) predicted = np.array( [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.]], [[6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.], [6., 12.]], [[12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.]], [[12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.], [12., 24.]], [[18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.]], [[18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.], [18., 36.]]]]) assert_equal(output_shape[1:], (8, 8, 2)) assert_equal(len(ws), 3) # Test if the depthwise conv kernel shape is correct. assert_equal(ws[0].shape, (2, 2, 1, 3)) # Test if the pointwise conv kernel shape is correct. assert_equal(ws[1].shape, (1, 1, 2, 3)) # Test if the bias shape is correct. assert_equal(ws[2].shape, (2,)) # Test if overall output is correct. assert_equal(actual_output, predicted) def test_quantization_in_separable_conv2d_transpose(): # Test if quantization is applied correctly. # Create model with quantization. model = create_quantized_model() x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) inputs = np.concatenate([x, x, x], axis=-1) inputs = tf.constant(inputs.reshape((1, 4, 4, -1)), dtype=tf.float32) k = tf.ones((2, 2, 1, 1)) dw_kernel = tf.concat([k, 1+k, 2+k], axis=-1) k = tf.ones((1, 1, 1, 3)) pt_kernel = tf.concat([k, 1+k], axis=-2) bias = tf.ones((2,)) model.layers[1].set_weights([dw_kernel, pt_kernel, bias]) actual_output = model.predict(inputs).astype(np.float16) qs = model.layers[1].get_quantizers() assert_equal(len(qs), 5) assert_equal(str(qs[0]), "quantized_bits(1,0,1,alpha=1.0)") assert_equal(str(qs[1]), "quantized_bits(1,0,1,alpha=1.0)") assert_equal(str(qs[2]), "quantized_bits(2,2,1,alpha=1.0)") assert_equal(str(qs[3]), "quantized_bits(10,6,1)") assert_equal(str(qs[4]), "quantized_bits(5,3,1)") expected = np.array( [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[3., 3.], [6., 6.], [6., 6.], [6., 6.]], [[7.5, 7.5], [7.5, 7.5], [7.5, 7.5], [7.5, 7.5]], [[7.5, 7.5], [7.5, 7.5], [7.5, 7.5], [7.5, 7.5]]]] ) assert_equal(actual_output, expected) def test_save_and_load_model(): # Test if the model can be loaded from a saved model. model = create_quantized_model() fd, fname = tempfile.mkstemp(".hdf5") model.save(fname) custom_object = { "QSeparableConv2DTranspose": QSeparableConv2DTranspose, } model_loaded = tf.keras.models.load_model( fname, custom_objects=custom_object) # Clean the h5 file after loading the model os.close(fd) os.remove(fname) model_weights = model.layers[1].weights loaded_model_weights = model_loaded.layers[1].weights assert_equal(len(model_weights), len(loaded_model_weights)) for i, model_weight in enumerate(model_weights): assert_equal(model_weight.numpy(), loaded_model_weights[i].numpy()) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qtools_model_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for various model architectures.""" import json import numpy as np import pytest import tensorflow.keras as keras import tensorflow as tf from qkeras import QActivation from qkeras import QAdaptiveActivation from qkeras import QBatchNormalization from qkeras import QConv2D from qkeras import QDepthwiseConv2D from qkeras import QDense from qkeras import quantizers from qkeras.qtools import interface from qkeras.qtools import qgraph from qkeras.qtools import run_qtools from qkeras.qtools import settings as qtools_settings from qkeras.qtools.quantized_operators import divider_factory from qkeras.qtools import generate_layer_data_type_map from qkeras.utils import model_save_quantized_weights from qkeras.qtools.quantized_operators import adder_impl from qkeras.qtools.quantized_operators import quantizer_impl from qkeras.qtools.DnC import divide_and_conquer def qdense_model_fork(): x = x_in = keras.layers.Input((23,), name="input") x = QDense( 10, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_po2(3, 1), name="qdense_0")(x) x = QDense( 20, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qdense_1")(x) x = QActivation("quantized_relu(4)", name="QA_2")(x) x_1 = QDense( 30, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="qdense_3")(x) x_2 = QActivation("quantized_relu(6,2)", name="QA_3")(x) model = keras.Model( inputs=[x_in], outputs=[x_1, x_2,]) return model def qconv_model(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QActivation("quantized_relu(4)", name="QA_0")(x) x = QConv2D( 16, 2, 2, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.ternary(), name="qconv2d_1")(x) x = QConv2D( 8, 2, 2, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qconv2D_2")(x) x = QConv2D( 2, 2, 2, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qconv2d_3")(x) x = QActivation("quantized_bits(6, 0, 1)", name="QA_4")(x) model = keras.Model( inputs=[x_in], outputs=[x]) return model def po2_qbits_model(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QActivation("quantized_relu_po2(3, 2)", name="QA_0")(x) x = QConv2D( 16, 2, 2, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="qconv2d_1")(x) model = keras.Model(inputs=[x_in], outputs=[x]) return model def float_po2_model(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QConv2D( 16, 2, 2, kernel_quantizer=quantizers.quantized_po2(5, 0), bias_quantizer=quantizers.quantized_po2(5, 0), name="qconv2d_1")(x) x = QActivation("quantized_relu_po2(3, 2)", name="QA_0")(x) x = QConv2D( 10, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 2, 1), bias_quantizer=quantizers.quantized_bits(5, 2, 1), name="qconv2d_0")(x) model = keras.Model( inputs=[x_in], outputs=[x]) for layer in model.layers: print(layer) print(layer.output_shape) return model def qbn_model( act="binary(use_01=0)", gamma=quantizers.quantized_relu_po2(4, 2), variance=quantizers.quantized_relu_po2(4, 2), beta=None, mean=None): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QActivation(act, name="QA_0")(x) x = QBatchNormalization( gamma_quantizer=gamma, variance_quantizer=variance, beta_quantizer=beta, mean_quantizer=mean, gamma_range=8, beta_range=4, name="qbn_1")(x) model = keras.Model( inputs=[x_in], outputs=[x]) return model def qbn_model_inference(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QConv2D( 4, 2, 23, kernel_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0), bias_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0), use_bias=False, name="qconv2d_1")(x) x = QBatchNormalization( mean_quantizer=quantizers.quantized_bits(6, 0, 1), gamma_quantizer=None, variance_quantizer=None, beta_quantizer=quantizers.quantized_bits(6, 0, 1), inverse_quantizer=quantizers.quantized_bits(16, 0, 1), scale=False, center=False, gamma_range=8, beta_range=4, name="qbn_2")(x) x = QActivation(activation="quantized_bits(5, 0, 1)", name="act")(x) x = QConv2D( 2, 1, 1, kernel_quantizer=quantizers.quantized_bits(3, 0), bias_quantizer=quantizers.quantized_bits(3, 2), name="qconv2d_3")(x) # Add an extra QNormalization here to test auto_po2 type of inverse_quantizer # in batchnorm fusing. x = QBatchNormalization( mean_quantizer=quantizers.quantized_bits(6, 0, 1), gamma_quantizer=None, variance_quantizer=None, beta_quantizer=quantizers.quantized_bits(6, 0, 1), inverse_quantizer=quantizers.quantized_bits(8, 0, 1, alpha="auto_po2"), scale=False, center=False, gamma_range=8, beta_range=4, name="qbn_4")(x) model = keras.Model(inputs=[x_in], outputs=[x]) model.compile(loss="mse", run_eagerly=True) model.get_layer("qconv2d_1").set_weights([ np.array([[[[0.11, -0.5, -0.14, -0.41]], [[-0.4, 0.9, 0.6, -1.]]], [[[-0.35, 1., 0.54, 0.17]], [[0.39, -0.2, -0.41, -0.7]]]]) ]) model.get_layer("qbn_2").set_weights( [np.array([0., 0, 0, 0.]), np.array([1, 1, 1, 1])]) model.get_layer("qconv2d_3").set_weights([ np.array([[[[1.2, -1.5], [10., 1.3], [-0.7, 1.2], [1.7, 1.5]]]]), np.array([0.7, 0.8]) ]) model.get_layer("qbn_4").set_weights( [np.array([0, 0]), np.array([0.3, 16.8])]) hw_weight_dict = model_save_quantized_weights(model) return (hw_weight_dict, model) def add_qmodel(quantizer1, quantizer2, quantizer3): # Layer that add a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16,), name="input_0") x1 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer1, name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32,), name="input_1") x2 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer2, name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64,), name="input_2") x3 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer3, name="dense_2")(x3) x = keras.layers.add([x1, x2, x3], name="add") model = keras.Model( inputs=[input1, input2, input3], outputs=[x]) return model def multiply_qmodel(): # element-wise multiply a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16,), name="input_0") x1 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(4, 0, 1), name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32,), name="input_1") x2 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(5, 0, 1), name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64,), name="input_2") x3 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(6, 0, 1), name="dense_2")(x3) x = keras.layers.multiply([x1, x2, x3], name="multiply") model = keras.Model( inputs=[input1, input2, input3], outputs=[x]) return model def pooling_qmodel(): # Average pooling and global average pooling operation for spatial data. x = input1 = keras.layers.Input((16, 16, 3), name="input") x = keras.layers.AveragePooling2D(pool_size=(2, 2), padding="valid", name="avg_pooling")(x) x = keras.layers.GlobalAveragePooling2D(name="global_avg_pooling")(x) model = keras.Model(inputs=[input1], outputs=[x]) return model def maximum_qmodel(quantizer1, quantizer2, quantizer3): # element-wise maximum/minimum/average of a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16,), name="input_0") x1 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer1, name="qdense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32,), name="input_1") x2 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer2, name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64,), name="input_2") x3 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer3, name="dense_2")(x3) x = keras.layers.maximum([x1, x2, x3], name="maximum") model = keras.Model( inputs=[input1, input2, input3], outputs=[x]) return model def concatenate_qmodel(quantizer1, quantizer2, quantizer3): # Layer that concatenates a list of inputs. # It takes as input a list of tensors, all of the same shape except # for the concatenation axis, and returns a single tensor, # the concatenation of all inputs.. x1 = input1 = keras.layers.Input((16, 16, 1), name="input_0") x1 = QConv2D( 16, 2, 2, kernel_quantizer=quantizer1, bias_quantizer=quantizer1, name="conv2d_0")(x1) x2 = input2 = keras.layers.Input((16, 16, 1), name="input_1") x2 = QConv2D( 32, 2, 2, kernel_quantizer=quantizer2, bias_quantizer=quantizer2, name="conv2d_1")(x2) x3 = input3 = keras.layers.Input((16, 16, 1), name="input_2") x3 = QConv2D( 64, 2, 2, kernel_quantizer=quantizer3, bias_quantizer=quantizer3, name="conv2d_2")(x3) x = keras.layers.concatenate([x1, x2, x3], axis=-1, name="concatenate") model = keras.Model(inputs=[input1, input2, input3], outputs=[x]) return model def run(model, input_quantizers, is_inference=False, verbose=False, hw_weight_dict=None): (graph, source_quantizer_list) = qgraph.CreateGraph( model, input_quantizers) # qgraph.PrintGraph(graph) qgraph.GraphPropagateActivationsToEdges(graph) layer_map = generate_layer_data_type_map.generate_layer_data_type_map( graph=graph, source_quantizer_list=source_quantizer_list, is_inference=is_inference, hw_weight_dict=hw_weight_dict) # interface.print_layer_data_type_map(dict) output_dict = interface.map_to_json(layer_map) if verbose: dict_to_json = json.dumps(output_dict, indent=4) print(dict_to_json) return output_dict def test_wrong_input_quantizers(): input_quantizers = [ quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1), quantizers.quantized_bits(6, 0, 1) ] # INPUT_QUANTIZERS = None x1 = input1 = keras.layers.Input((16,), name="input_0") x1 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32,), name="input_1") x2 = QDense( 8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="dense_1")(x2) x = keras.layers.add([x1, x2], name="add") model = keras.Model( inputs=[input1, input2], outputs=[x]) with pytest.raises(qgraph.WrongInputQuantizerError): run(model, input_quantizers) def test_qbn_inference(): input_quantizers = [quantizers.quantized_bits(4, 0, 1)] (hw_weight_dict, model) = qbn_model_inference() dtype_dict = run(model, input_quantizers, is_inference=True, hw_weight_dict=hw_weight_dict) multiplier = dtype_dict["qconv2d_1"]["multiplier"] accumulator = dtype_dict["qconv2d_1"]["accumulator"] output = dtype_dict["qconv2d_1"]["output_quantizer"] fused_accumulator = dtype_dict["qconv2d_1"]["fused_accumulator"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 7 assert multiplier["int_bits"] == 1 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "mul" assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 9 assert accumulator["int_bits"] == 3 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" assert fused_accumulator["quantizer_type"] == "quantized_bits" assert fused_accumulator["bits"] == 25 assert fused_accumulator["int_bits"] == 4 assert accumulator["is_signed"] == 1 assert fused_accumulator["op_type"] == "add" # Tests auto_po2 type of quantizer in conv2d and batchnorm fusing. Here # we set the layer weights in a way that scale value would be !=1 so that # we need to check bits and int_bits are adjusted properly to incorporate # the scale value. multiplier = dtype_dict["qconv2d_3"]["multiplier"] accumulator = dtype_dict["qconv2d_3"]["accumulator"] output = dtype_dict["qconv2d_3"]["output_quantizer"] fused_accumulator = dtype_dict["qconv2d_3"]["fused_accumulator"] # w_bits = 3, w_intbits =0 # x_bits = 5, x_intbits =0 # weight scale = [[[[16. 2.]]]] # before scale adjustment: m_bits=(3-1)+(5-1)+1=7 m_intbits = 0 # after scale adjustment: m_bits=7+(log16-log2)=10 m_intbits = 0+log16=4 # Note: dict here added sign bit to the intbit to match hardware format. assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 10 assert multiplier["int_bits"] == 5 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "mul" assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 13 assert accumulator["int_bits"] == 8 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" # Calculates fused_accumulator according to fused_bn_factory/FusedBNFactory. # For example, wiht inv_quantizer scale:[2. 2.] we have here, # multiplier_x before adjust - bits:19 int_bits:6 # multiplier_x after adjust - bits:19 int_bits:7 assert fused_accumulator["quantizer_type"] == "quantized_bits" assert fused_accumulator["bits"] == 20 assert fused_accumulator["int_bits"] == 9 assert accumulator["is_signed"] == 1 assert fused_accumulator["op_type"] == "add" def test_invalid_denominator_qbn(): input_quantizers = None act = "binary(use_01=0)" gamma = quantizers.ternary() variance = gamma model = qbn_model( act=act, gamma=gamma, variance=variance, beta=None, mean=None) with pytest.raises(divider_factory.UnacceptedQuantizerError): run(model, input_quantizers) def test_conv2d(): input_quantizers = None act = "quantized_bits(6, 0, 1)" weight = quantizers.quantized_relu_po2(4, 2) x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QActivation(act, name="QA_0")(x) x = QConv2D( 16, 2, 2, kernel_quantizer=weight, bias_quantizer=weight, name="qconv2d_1")(x) model = keras.Model(inputs=[x_in], outputs=[x]) dtype_dict = run(model, input_quantizers) multiplier = dtype_dict["qconv2d_1"]["multiplier"] accumulator = dtype_dict["qconv2d_1"]["accumulator"] op_count = dtype_dict["qconv2d_1"]["operation_count"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 15 assert multiplier["int_bits"] == 2 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "shifter" assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 18 assert accumulator["int_bits"] == 5 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" assert op_count == 7744 def test_qdense_model_fork(): input_quantizers = [quantizers.quantized_bits(4, 0, 1)] model = qdense_model_fork() dtype_dict = run(model, input_quantizers) multiplier = dtype_dict["qdense_3"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 5 assert multiplier["int_bits"] == 1 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "mux" accumulator = dtype_dict["qdense_3"]["accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 11 assert accumulator["int_bits"] == 7 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" def test_util_layers(): input_quantizers = None # quantizers.quantized_bits(4, 0, 1) act = "quantized_bits(6, 0, 1)" x = x_in = keras.layers.Input((24, 24, 1), name="input") x = QActivation(act, name="QA_0")(x) x = keras.layers.Reshape((12 * 12, 4, 1), name="reshape_1")(x) x = keras.layers.MaxPooling2D( pool_size=(2, 2), name="maxpooling_2")(x) x = keras.layers.Flatten(name="flatten_3")(x) x = QDense( 30, kernel_quantizer=quantizers.binary(use_01=1), bias_quantizer=quantizers.binary(use_01=1), activation=quantizers.quantized_po2(3, 2), name="qdense_4")(x) model = keras.Model(inputs=[x_in], outputs=[x]) dtype_dict = run(model, input_quantizers) multiplier = dtype_dict["qdense_4"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 6 assert multiplier["int_bits"] == 1 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "and" accumulator = dtype_dict["qdense_4"]["accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 15 assert accumulator["int_bits"] == 10 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" output = dtype_dict["qdense_4"]["output_quantizer"] assert output["quantizer_type"] == "quantized_po2" assert output["bits"] == 3 assert output["is_signed"] == 1 assert output["max_value"] == 2 def test_merge_layers(): input_quantizers = [ quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1), quantizers.quantized_bits(6, 0, 1)] model = add_qmodel( quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0), quantizers.quantized_bits(6, 0, 1)) dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["add"]["Add_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 7 assert merge_quantizer["int_bits"] == 2 assert merge_quantizer["is_signed"] == 1 model = multiply_qmodel() dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["multiply"]["Multiply_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 13 assert merge_quantizer["int_bits"] == 1 assert merge_quantizer["is_signed"] == 1 assert merge_quantizer["op_type"] == "mul" model = maximum_qmodel( quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0), quantizers.quantized_bits(6, 0, 1)) dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["maximum"]["Maximum_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 6 assert merge_quantizer["int_bits"] == 1 assert merge_quantizer["is_signed"] == 1 model = concatenate_qmodel( quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0), quantizers.quantized_bits(6, 0, 1)) dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["concatenate"]["Concatenate_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 14 assert merge_quantizer["int_bits"] == 4 assert merge_quantizer["is_signed"] == 1 def test_pooling(): input_quantizers = [quantizers.quantized_bits(8, 0, 1)] model = pooling_qmodel() dtype_dict = run(model, input_quantizers) accumulator = dtype_dict["avg_pooling"]["pool_sum_accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 10 assert accumulator["int_bits"] == 3 accumulator = dtype_dict["global_avg_pooling"]["pool_sum_accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 16 assert accumulator["int_bits"] == 9 def test_qenergy(): x = x_in = keras.layers.Input((784,), name="input") x = QDense( 300, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="d0")(x) x = QActivation("quantized_relu(4,0)", name="d0_qr4")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QAdaptiveActivation("quantized_relu", 4, name="d1_qr4")(x) x = QDense( 10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) model = keras.Model(inputs=[x_in], outputs=[x]) # print(model.summary()) reference_internal = "int8" reference_accumulator = "int32" # get reference energy cost q = run_qtools.QTools( model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=True) ref_energy_dict = q.pe( weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8*16*1024*1024, rd_wr_on_io=False) reference_size = q.extract_energy_sum( qtools_settings.cfg.include_energy, ref_energy_dict) # get trial energy cost q = run_qtools.QTools( model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=False) trial_energy_dict = q.pe( weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8*16*1024*1024, rd_wr_on_io=False) trial_size = q.extract_energy_sum( qtools_settings.cfg.include_energy, trial_energy_dict) # Reference energy number is now updated with keras_accumulator as # output quantizer tmp = ref_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(570.57, abs=0.1) assert tmp["parameters"] == pytest.approx(111975.96, abs=0.1) assert tmp["op_cost"] == pytest.approx(70560.0, abs=0.1) tmp = ref_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(570.57, abs=0.1) assert tmp["outputs"] == pytest.approx(190.19, abs=0.1) assert tmp["parameters"] == pytest.approx(14313.66, abs=0.1) assert tmp["op_cost"] == pytest.approx(26500.0, abs=0.1) tmp = ref_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(190.19, abs=0.1) assert tmp["outputs"] == pytest.approx(19.02, abs=0.1) assert tmp["parameters"] == pytest.approx(483.08, abs=0.1) assert tmp["op_cost"] == pytest.approx(883.33, abs=0.1) # Trial tmp = trial_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(342.34, abs=0.1) assert tmp["parameters"] == pytest.approx(13997.95, abs=0.1) assert tmp["op_cost"] == pytest.approx(15729.0, abs=0.1) tmp = trial_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(72.27, abs=0.1) assert tmp["outputs"] == pytest.approx(110.31, abs=0.1) assert tmp["parameters"] == pytest.approx(7158.73, abs=0.1) assert tmp["op_cost"] == pytest.approx(3250.0, abs=0.1) tmp = trial_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(26.63, abs=0.1) assert tmp["outputs"] == pytest.approx(11.41, abs=0.1) assert tmp["parameters"] == pytest.approx(243.44, abs=0.1) assert tmp["op_cost"] == pytest.approx(102.08, abs=0.1) # print(ref_energy_dict) # print(trial_energy_dict) assert int(reference_size) == 226629 assert int(trial_size) == 41070 def test_quntized_reference_energy_same_as_floating_trial(): # Test if reference energy from quantized model and floating model is the # same def get_model(quantize=False): x1 = input1 = keras.layers.Input((16, 16, 3), name="input_0") if quantize: x1 = QConv2D( 16, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="conv_0")(x1) else: x1 = keras.layers.Conv2D(16, 2, 2, name="conv_0")(x1) x2 = input2 = keras.layers.Input(shape=(16, 16, 3), name="input_1") if quantize: x2 = QConv2D( 16, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="conv_1")(x2) else: x2 = keras.layers.Conv2D(16, 2, 2, name="conv_1")(x2) x = keras.layers.add([x1, x2], name="add") if quantize: x = QActivation(activation="quantized_relu(8, 2)", name="relu")(x) else: x = keras.layers.Activation("relu", name="relu")(x) if quantize: x = QConv2D( 2, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="conv_2")(x) else: x = keras.layers.Conv2D(2, 2, 2, name="conv_2")(x) model = keras.Model(inputs=[input1, input2], outputs=[x]) return model def get_qenergy(model, qenergy_config, for_reference): q = run_qtools.QTools( model, process=qenergy_config["process"], source_quantizers=qenergy_config["reference_internal"], is_inference=qenergy_config["trained_model"], weights_path=None, keras_quantizer=qenergy_config["reference_internal"], keras_accumulator=qenergy_config["reference_accumulator"], for_reference=for_reference) # caculate energy of the derived data type map. energy_dict = q.pe( weights_on_memory=qenergy_config["parameters_on_memory"], activations_on_memory=qenergy_config["activations_on_memory"], min_sram_size=qenergy_config["min_sram_size"], rd_wr_on_io=qenergy_config["rd_wr_on_io"]) total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy, energy_dict) return q, total_energy qenergy_config = { "trained_model": True, "delta_p": 8.0, "delta_n": 8.0, "rate": 2.0, "stress": 1.0, "process": "horowitz", "parameters_on_memory": "sram", "activations_on_memory": "sram", "rd_wr_on_io": False, "min_sram_size": 0, "source_quantizers": ["quantizers.quantized_bits(8, 0, 1)"], "reference_internal": "int8", "reference_accumulator": "int32" } float_model = get_model(quantize=False) quantized_model = get_model(quantize=True) _, float_reference_energy = get_qenergy( float_model, qenergy_config, for_reference=False) _, float_trial_energy = get_qenergy( float_model, qenergy_config, for_reference=True) _, quantized_reference_energy = get_qenergy( quantized_model, qenergy_config, for_reference=True) assert float_reference_energy == quantized_reference_energy assert float_reference_energy == float_trial_energy def test_auto_po2(): def gen_model(img_shape): img_input = x = keras.Input(shape=img_shape) x = QConv2D( filters=5, kernel_size=4, strides=4, kernel_quantizer=quantizers.quantized_bits(8, 3, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="conv")(x) x = QActivation(activation=quantizers.quantized_relu(4, 0), name="act")(x) x = keras.layers.Flatten(name="flatten")(x) x = QDense(5, kernel_quantizer=quantizers.quantized_bits( 8, 0, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="dense")(x) model = keras.Model(inputs=img_input, outputs=[x]) return model model = gen_model((32, 32, 3,)) model.compile(loss="mse", run_eagerly=True) model.layers[1].quantizers[0].scale = tf.constant( [[[[0.0625, 0.0625, 0.0625, 0.0625, 0.03125]]]]) model.layers[4].quantizers[0].scale = tf.constant([[0.5, 0.5, 1, 0.5, 0.25]]) input_quantizers = [ quantizers.quantized_bits(bits=8, integer=0, keep_negative=False) ] dtype_dict = run(model, input_quantizers) # Original multiplier has 16 bits(16=8+8) and 3 int_bits multiplier = dtype_dict["conv"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 16 assert multiplier["int_bits"] == 4 # Original accumulator has 16+log2(4*4*3)+1 bits, # and 4+log2(4*4*3)+1 int_bits accumulator = dtype_dict["conv"]["accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 23 assert accumulator["int_bits"] == 11 # adjusting multiplier with auto_po2: # bits = max_fractional_bits + max_int_bits = bits + max_shift - min_shift # max_shift = log2(0.0625) = -4 # min_shift=log2(0.03125) = -5 # So adjusted multiplier bits=17, 1 bit bigger than original multiplier. # Modified multiplier int_bits = int_bits + max_shift = 3 - 4 = -1 # Because in datatype map we add int_bits with 1 extra sign bit, # adjusted multiplier int_bits = 0, 4 bit smaller than original multiplier. # When we pass the adjusted multiplier to fused_accumulator, we # get bits = 23+1=24, and int_bits = 11-4=7 fused_accumulator = dtype_dict["conv"]["fused_accumulator"] assert fused_accumulator["quantizer_type"] == "quantized_bits" assert fused_accumulator["bits"] == 24 assert fused_accumulator["int_bits"] == 7 multiplier = dtype_dict["dense"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 12 assert multiplier["int_bits"] == 1 def test_big_bias_quantizer(): q1 = quantizer_impl.QuantizedBits() q1.convert_qkeras_quantizer(quantizers.quantized_bits(8, 3)) q2 = quantizer_impl.QuantizedBits() q2.convert_qkeras_quantizer(quantizers.quantized_bits(16, 4)) r = adder_impl.FixedPointAdder(q1, q2) # int_bits = max(q1.int_bits, q2.int_bits) + 1 # bits = int_bits + sign_bit + max(q1_fraction_bit, q2_fraction bit) assert r.output.bits == 17 assert r.output.int_bits == 5 def test_qdepthwiseconv2d(): x = x_in = keras.layers.Input((64, 64, 3), name="input") x = QDepthwiseConv2D( kernel_size=(1, 7), depthwise_quantizer=quantizers.quantized_bits(8, 0, 1, alpha=1.0), bias_quantizer=quantizers.quantized_bits(12, 6, 1, alpha=1.0), name="dw_conv")(x) x = QConv2D( filters=16, kernel_size=(1, 1), bias_quantizer=quantizers.quantized_bits(12, 4, 1, alpha=1.0), kernel_quantizer=quantizers.quantized_bits(4,0, 1, alpha=1.0), name="pw_conv")(x) model = keras.Model(inputs=[x_in], outputs=[x]) input_quantizers = [quantizers.quantized_bits(8, 0, 1)] dtype_dict = run(model, input_quantizers) # multiplier_int_bits = 0(x_int_bits) + 0(w_int_bits) = 0 (excluding sign_bit) # multiplier_fractional_bits = 7(x_fractional) + 7(w_fractional) = 14 # multiplier_bits = 0 + 14 + sign_bit = 15 assert dtype_dict["dw_conv"]["multiplier"]["bits"] == 15 assert dtype_dict["dw_conv"]["multiplier"]["int_bits"] == 1 # accumulator_int_bits = max(bias_int_bits, log7 + 0) + 1 = 7 # accumulator_fractional_bits = max(bias_fractional, 14) = 14 # accumulator_bits = int_bits + fractional_bits + sign_bit = 22 assert dtype_dict["dw_conv"]["accumulator"]["bits"] == 22 assert dtype_dict["dw_conv"]["accumulator"]["int_bits"] == 8 assert dtype_dict["pw_conv"]["multiplier"]["bits"] == 25 assert dtype_dict["pw_conv"]["multiplier"]["int_bits"] == 8 assert dtype_dict["pw_conv"]["accumulator"]["bits"] == 28 assert dtype_dict["pw_conv"]["accumulator"]["int_bits"] == 11 def test_divide_and_conquer_sequential_conv2d(): # These following values are verified manually to be globally optimal. # The test has two purposes: # 1) check if the code runs ok; # 2) for a simple conv2d model, the output is as expected. # We will need to add more tests with more complex graph architecture # in the future as our solution grows. xin = x = tf.keras.layers.Input(shape=(16, 16, 1), name="input_layer") x = QConv2D( kernel_size=3, filters=3, use_bias=False, kernel_quantizer=quantizers.quantized_bits(4, 0, alpha=1.0), name="conv_1", )(x) x = QConv2D( kernel_size=3, filters=5, use_bias=False, kernel_quantizer=quantizers.quantized_bits(4, 0, alpha=1.0), name="conv_2", )(x) # Create a model model = tf.keras.Model(inputs=xin, outputs=x) # Test if the flow works perperly. In the future we will construct more # detailed tests regarding cost once the cost design matures. assert divide_and_conquer.estimate_model_cost( model, input_quantizer_bits=8, target_OutElementPerClk=10, target_out_throughput=1.0, compute_to_memory_max_ratio=1, memory_to_unroll_max_ratio=1, mode=divide_and_conquer.CostMode.ACE, ) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/qtools_util_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for qtools_util module.""" import json import numpy as np import pytest import tensorflow.keras as keras import tensorflow as tf from qkeras import quantizers from qkeras.qtools import qtools_util from qkeras.qtools import quantized_operators from qkeras.qtools.quantized_operators import quantizer_factory as quantizer_factory_module @pytest.mark.parametrize( "w_bits, w_int_bits, weight_quantizer_scale_type, " "expected_bits_before_adjustment, expected_int_bits_before_adjustment, " "expected_bits_after_adjustment, expected_int_bits_after_adjustment", [ (8, 0, "1.0", 11, 2, 11, 2), (4, 2, "auto_po2", 7, 4, 10, 5), (4, 0, "post_training_scale", 7, 2, 10, 5), ], ) def test_adjust_multiplier_for_auto_po2( w_bits, w_int_bits, weight_quantizer_scale_type, expected_bits_before_adjustment, expected_int_bits_before_adjustment, expected_bits_after_adjustment, expected_int_bits_after_adjustment): """Test adjust_multiplier_for_auto_po2 with auto_po2 weight quantizer.""" multiplier_factory = quantized_operators.MultiplierFactory() quantizer_factory = quantizer_factory_module.QuantizerFactory() qkeras_input_quantizer = quantizers.quantized_bits(4, 2, 1) # Generate the weight quantizer. if weight_quantizer_scale_type in ["auto_po2", "post_training_scale"]: # Compute the scale for auto_po2 quantizer. qkeras_weight_quantizer = quantizers.quantized_bits( bits=w_bits, integer=w_int_bits, keep_negative=True, symmetric=True, alpha="auto_po2") weight_arr = np.array([1.07, -1.7, 3.06, 1.93, 0.37, -2.43, 6.3, -2.9] ).reshape((2, 4)) qkeras_weight_quantizer(weight_arr) if weight_quantizer_scale_type == "post_training_scale": # Set the post_training_scale as fixed scale. auto_po2_scale = qkeras_weight_quantizer.scale.numpy() qkeras_weight_quantizer = quantizers.quantized_bits( bits=w_bits, integer=w_int_bits, alpha="auto_po2", post_training_scale=auto_po2_scale) else: qkeras_weight_quantizer = quantizers.quantized_bits(w_bits, w_int_bits) input_quantizer = quantizer_factory.make_quantizer( qkeras_input_quantizer) weight_quantizer = quantizer_factory.make_quantizer( qkeras_weight_quantizer) multiplier = multiplier_factory.make_multiplier( weight_quantizer, input_quantizer) np.testing.assert_equal(multiplier.output.bits, expected_bits_before_adjustment) np.testing.assert_equal(multiplier.output.int_bits, expected_int_bits_before_adjustment) qtools_util.adjust_multiplier_for_auto_po2( multiplier, qkeras_weight_quantizer) print(f"after adjustment: {multiplier.output.bits}, {multiplier.output.int_bits}") np.testing.assert_equal(multiplier.output.bits, expected_bits_after_adjustment) np.testing.assert_equal(multiplier.output.int_bits, expected_int_bits_after_adjustment) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/quantizer_impl_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for methods in quantizer_impl.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import pytest import tensorflow as tf from tensorflow.keras.layers import * from tensorflow.keras.models import * from qkeras import * from qkeras.qtools.quantized_operators import quantizer_impl from qkeras import quantizers from numpy.testing import assert_equal # pylint: disable=invalid-name def test_QuantizedBits(): qkeras_quantizer = quantizers.quantized_bits() qtools_quantizer = quantizer_impl.QuantizedBits() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( symmetric=qkeras_quantizer.symmetric, alpha=qkeras_quantizer.alpha, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, scale_axis=qkeras_quantizer.scale_axis, qnoise_factor=qkeras_quantizer.qnoise_factor) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_QuantizedBits_ElementsPerScale(): """Tests quantized_bits with elements_per_scale.""" def _get_min_max_po2_exponent(x): """Get min and max po2 exponent of x.""" po2_x = K.log(x)/np.log(2.0) return (tf.math.reduce_min(po2_x).numpy(), tf.math.reduce_max(po2_x).numpy()) qkeras_quantizer = quantizers.quantized_bits( alpha="auto_po2", elements_per_scale=[1, 1], scale_axis=[1, 2], min_po2_exponent=-3, max_po2_exponent=3) qtools_quantizer = quantizer_impl.QuantizedBits() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( symmetric=qkeras_quantizer.symmetric, alpha=qkeras_quantizer.alpha, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, scale_axis=qkeras_quantizer.scale_axis, qnoise_factor=qkeras_quantizer.qnoise_factor, elements_per_scale=qkeras_quantizer.elements_per_scale, min_po2_exponent=qkeras_quantizer.min_po2_exponent, max_po2_exponent=qkeras_quantizer.max_po2_exponent, ) # for quantized_bits the scale is multiplied by the integer scale as well # the integer scale depends on the sign bit integer_po2_scale = new_quantizer.bits - new_quantizer.keep_negative # Test for input tensors of rank 3 x_r3 = tf.random.uniform([1, 8, 8]) new_quantizer(x_r3) x_r3_scale = new_quantizer.scale xq_r3_min_exp, xq_r3_max_exp = _get_min_max_po2_exponent(x_r3_scale) assert_equal(new_quantizer.scale.shape, [1, 8, 8]) assert xq_r3_min_exp >= -3*integer_po2_scale assert xq_r3_max_exp <= 3*integer_po2_scale # Test for input tensors of rank 4 x_r4 = tf.random.uniform([1, 2, 4, 8]) new_quantizer(x_r4) x_r4_scale = new_quantizer.scale xq_r4_min_exp, xq_r4_max_exp = _get_min_max_po2_exponent(x_r4_scale) assert_equal(new_quantizer.scale.shape, [1, 2, 4, 1]) assert xq_r4_min_exp >= -3*integer_po2_scale assert xq_r4_max_exp <= 3*integer_po2_scale def test_QuantizedTanh(): qkeras_quantizer = quantizers.quantized_tanh() qtools_quantizer = quantizer_impl.QuantizedTanh() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, symmetric=qkeras_quantizer.symmetric) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_QuantizedUlaw(): qkeras_quantizer = quantizers.quantized_ulaw() qtools_quantizer = quantizer_impl.QuantizedUlaw() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( symmetric=qkeras_quantizer.symmetric, u=qkeras_quantizer.u) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_Binary(): qkeras_quantizer = quantizers.binary() qtools_quantizer = quantizer_impl.Binary() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( alpha=qkeras_quantizer.alpha, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_StochasticBinary(): qkeras_quantizer = quantizers.stochastic_binary() qtools_quantizer = quantizer_impl.StochasticBinary() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( alpha=qkeras_quantizer.alpha, temperature=qkeras_quantizer.temperature, use_real_sigmoid=qkeras_quantizer.use_real_sigmoid) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_Bernoulli(): qkeras_quantizer = quantizers.bernoulli() qtools_quantizer = quantizer_impl.Bernoulli() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( alpha=qkeras_quantizer.alpha, temperature=qkeras_quantizer.temperature, use_real_sigmoid=qkeras_quantizer.use_real_sigmoid) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_QuantizedRelu(): qkeras_quantizer = quantizers.quantized_relu() qtools_quantizer = quantizer_impl.QuantizedRelu() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( use_sigmoid=qkeras_quantizer.use_sigmoid, negative_slope=qkeras_quantizer.negative_slope, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, relu_upper_bound=qkeras_quantizer.relu_upper_bound, is_quantized_clip=qkeras_quantizer.is_quantized_clip, qnoise_factor=qkeras_quantizer.qnoise_factor) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_Ternary(): qkeras_quantizer = quantizers.ternary() qtools_quantizer = quantizer_impl.Ternary() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( alpha=qkeras_quantizer.alpha, threshold=qkeras_quantizer.threshold, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, number_of_unrolls=qkeras_quantizer.number_of_unrolls) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_StochasticTernary(): qkeras_quantizer = quantizers.stochastic_ternary() qtools_quantizer = quantizer_impl.StochasticTernary() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( alpha=qkeras_quantizer.alpha, threshold=qkeras_quantizer.threshold, temperature=qkeras_quantizer.temperature, use_real_sigmoid=qkeras_quantizer.use_real_sigmoid, number_of_unrolls=qkeras_quantizer.number_of_unrolls) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_PowerOfTwo(): qkeras_quantizer = quantizers.quantized_po2() qtools_quantizer = quantizer_impl.PowerOfTwo(is_signed=True) qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( negative_slope=None, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, quadratic_approximation=qkeras_quantizer.quadratic_approximation) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_ReluPowerOfTwo(): qkeras_quantizer = quantizers.quantized_relu_po2() qtools_quantizer = quantizer_impl.ReluPowerOfTwo() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( negative_slope=qkeras_quantizer.negative_slope, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, quadratic_approximation=qkeras_quantizer.quadratic_approximation) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key]) def test_GetScale_PerChannelScale(): # Rank1 tensors x_r1 = tf.ones([4]) q_r1 = tf.ones([4]) scale_r1_pcs_true = quantizers._get_scale( "auto", x_r1, q_r1, scale_axis=None, per_channel_scale=True) scale_r1_pcs_false = quantizers._get_scale( "auto", x_r1, q_r1, scale_axis=None, per_channel_scale=False) assert_equal(tf.shape(scale_r1_pcs_true).numpy(), [4]) assert_equal(tf.shape(scale_r1_pcs_false).numpy(), [1]) # Rank2 tensors x_r2 = tf.ones([2, 4]) q_r2 = tf.ones([2, 4]) scale_r2_pcs_true = quantizers._get_scale( "auto", x_r2, q_r2, scale_axis=None, per_channel_scale=True) scale_r2_pcs_false = quantizers._get_scale( "auto", x_r2, q_r2, scale_axis=None, per_channel_scale=False) assert_equal(tf.shape(scale_r2_pcs_true).numpy(), [1, 4]) assert_equal(tf.shape(scale_r2_pcs_false).numpy(), [1, 1]) # Rank3 tensors x_r3 = tf.ones([3, 3, 4]) q_r3 = tf.ones([3, 3, 4]) scale_r3_pcs_true = quantizers._get_scale( "auto", x_r3, q_r3, scale_axis=None, per_channel_scale=True) scale_r3_pcs_false = quantizers._get_scale( "auto", x_r3, q_r3, scale_axis=None, per_channel_scale=False) assert_equal(tf.shape(scale_r3_pcs_true).numpy(), [1, 1, 4]) assert_equal(tf.shape(scale_r3_pcs_false).numpy(), [1, 1, 1]) # Rank4 tensors x_r4 = tf.ones([1, 1, 3, 4]) q_r4 = tf.ones([1, 1, 3, 4]) scale_r4_pcs_true = quantizers._get_scale( "auto", x_r4, q_r4, scale_axis=None, per_channel_scale=True) scale_r4_pcs_false = quantizers._get_scale( "auto", x_r4, q_r4, scale_axis=None, per_channel_scale=False) assert_equal(tf.shape(scale_r4_pcs_true).numpy(), [1, 1, 1, 4]) assert_equal(tf.shape(scale_r4_pcs_false).numpy(), [1, 1, 1, 1]) def _get_num_unique_elements(input_tensor): return len(np.unique(input_tensor.numpy())) def test_GetScale_ElementsPerScale_Scalar_ScaleAxis_EPS(): # Test get_scale function when elements_per_scale and scale_axis have scalar # values and the input x and q tensors have rank 2 x_r2 = tf.random.uniform([4, 8]) q_r2 = tf.random.uniform([4, 8]) scale_r2_eps_none_ua_none = quantizers._get_scale( "auto", x_r2, q_r2, elements_per_scale=None, scale_axis=None) scale_r2_eps_2_ua_0 = quantizers._get_scale( "auto", x_r2, q_r2, elements_per_scale=2, scale_axis=0) scale_r2_eps_2_ua_1 = quantizers._get_scale( "auto", x_r2, q_r2, elements_per_scale=2, scale_axis=1) assert_equal(tf.shape(scale_r2_eps_none_ua_none).numpy(), [1, 8]) assert_equal(_get_num_unique_elements(scale_r2_eps_none_ua_none), 8) assert_equal(tf.shape(scale_r2_eps_2_ua_0).numpy(), [4, 1]) assert_equal(_get_num_unique_elements(scale_r2_eps_2_ua_0), 2) assert_equal(tf.shape(scale_r2_eps_2_ua_1).numpy(), [1, 8]) assert_equal(_get_num_unique_elements(scale_r2_eps_2_ua_1), 4) # Test get_scale function when elements_per_scale and scale_axis have scalar # values and the input x and q tensors have rank 3 x_r3 = tf.random.uniform([2, 4, 8]) q_r3 = tf.random.uniform([2, 4, 8]) scale_r3_eps_none_ua_none = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=None, scale_axis=None) scale_r3_eps_2_ua_0 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=2, scale_axis=0) scale_r3_eps_2_ua_1 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=2, scale_axis=1) scale_r3_eps_2_ua_2 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=2, scale_axis=2) assert_equal(tf.shape(scale_r3_eps_none_ua_none).numpy(), [1, 1, 8]) assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_none), 8) assert_equal(tf.shape(scale_r3_eps_2_ua_0).numpy(), [2, 1, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_0), 1) assert_equal(tf.shape(scale_r3_eps_2_ua_1).numpy(), [1, 4, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_1), 2) assert_equal(tf.shape(scale_r3_eps_2_ua_2).numpy(), [1, 1, 8]) assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_2), 4) # Test get_scale function when elements_per_scale and scale_axis have scalar # values and the input x and q tensors have rank 4 x_r4 = tf.random.uniform([2, 4, 8, 16]) q_r4 = tf.random.uniform([2, 4, 8, 16]) scale_r4_eps_none_ua_none = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=None, scale_axis=None) scale_r4_eps_2_ua_0 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=0) scale_r4_eps_2_ua_1 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=1) scale_r4_eps_2_ua_2 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=2) scale_r4_eps_2_ua_3 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=2, scale_axis=3) assert_equal(tf.shape(scale_r4_eps_none_ua_none).numpy(), [1, 1, 1, 16]) assert_equal(_get_num_unique_elements(scale_r4_eps_none_ua_none), 16) assert_equal(tf.shape(scale_r4_eps_2_ua_0).numpy(), [2, 1, 1, 1]) assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_0), 1) assert_equal(tf.shape(scale_r4_eps_2_ua_1).numpy(), [1, 4, 1, 1]) assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_1), 2) assert_equal(tf.shape(scale_r4_eps_2_ua_2).numpy(), [1, 1, 8, 1]) assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_2), 4) assert_equal(tf.shape(scale_r4_eps_2_ua_3).numpy(), [1, 1, 1, 16]) assert_equal(_get_num_unique_elements(scale_r4_eps_2_ua_3), 8) def test_GetScale_ElementsPerScale_List_ScaleAxis_EPS(): # Test get_scale function when elements_per_scale and scale_axis are lists of # rank 1 and the input x and q tensors have rank 3 x_r3 = tf.random.uniform([2, 4, 8]) q_r3 = tf.random.uniform([2, 4, 8]) scale_r3_eps_none_ua_0 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=None, scale_axis=[0]) scale_r3_eps_2_ua_0 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=[2], scale_axis=[0]) scale_r3_eps_2_ua_1 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=[2], scale_axis=[1]) scale_r3_eps_2_ua_2 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=[2], scale_axis=[2]) assert_equal(tf.shape(scale_r3_eps_none_ua_0).numpy(), [2, 1, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_0), 2) assert_equal(tf.shape(scale_r3_eps_2_ua_0).numpy(), [2, 1, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_0), 1) assert_equal(tf.shape(scale_r3_eps_2_ua_1).numpy(), [1, 4, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_1), 2) assert_equal(tf.shape(scale_r3_eps_2_ua_2).numpy(), [1, 1, 8]) assert_equal(_get_num_unique_elements(scale_r3_eps_2_ua_2), 4) # Test get_scale function when elements_per_scale and scale_axis are lists of # rank 2 and the input x and q tensors have rank 3 x_r3 = tf.random.uniform([2, 4, 8]) q_r3 = tf.random.uniform([2, 4, 8]) scale_r3_eps_none_ua_01 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=None, scale_axis=[0, 1]) scale_r3_eps_22_ua_01 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=[2, 2], scale_axis=[0, 1]) scale_r3_eps_11_ua_12 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=[2, 2], scale_axis=[1, 2]) scale_r3_eps_11_ua_02 = quantizers._get_scale( "auto", x_r3, q_r3, elements_per_scale=[1, 1], scale_axis=[0, 2]) assert_equal(tf.shape(scale_r3_eps_none_ua_01).numpy(), [2, 4, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_none_ua_01), 8) assert_equal(tf.shape(scale_r3_eps_22_ua_01).numpy(), [2, 4, 1]) assert_equal(_get_num_unique_elements(scale_r3_eps_22_ua_01), 2) assert_equal(tf.shape(scale_r3_eps_11_ua_12).numpy(), [1, 4, 8]) assert_equal(_get_num_unique_elements(scale_r3_eps_11_ua_12), 8) assert_equal(tf.shape(scale_r3_eps_11_ua_02).numpy(), [2, 1, 8]) assert_equal(_get_num_unique_elements(scale_r3_eps_11_ua_02), 16) # Test get_scale function when elements_per_scale and scale_axis are lists of # rank 3 and the input x and q tensors have rank 4 x_r4 = tf.random.uniform([2, 4, 8, 16]) q_r4 = tf.random.uniform([2, 4, 8, 16]) scale_r4_eps_none_ua_012 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=None, scale_axis=[0, 1, 2]) scale_r4_eps_221_ua_012 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[0, 1, 2]) scale_r4_eps_221_ua_123 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[1, 2, 3]) scale_r4_eps_221_ua_013 = quantizers._get_scale( "auto", x_r4, q_r4, elements_per_scale=[2, 2, 1], scale_axis=[0, 1, 3]) assert_equal(tf.shape(scale_r4_eps_none_ua_012).numpy(), [2, 4, 8, 1]) assert_equal(_get_num_unique_elements(scale_r4_eps_none_ua_012), 64) assert_equal(tf.shape(scale_r4_eps_221_ua_012).numpy(), [2, 4, 8, 1]) assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_012), 16) assert_equal(tf.shape(scale_r4_eps_221_ua_123).numpy(), [1, 4, 8, 16]) assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_123), 128) assert_equal(tf.shape(scale_r4_eps_221_ua_013).numpy(), [2, 4, 1, 16]) assert_equal(_get_num_unique_elements(scale_r4_eps_221_ua_013), 32) def test_GetScale_MinPO2Exponent_MaxPO2Exponent(): """Verify get_scale function with min and max po2_exponent clipping.""" def _get_min_max_po2_exponent(x): """Get min and max po2 exponent of x.""" po2_x = K.log(x)/np.log(2.0) return (tf.math.reduce_min(po2_x).numpy(), tf.math.reduce_max(po2_x).numpy()) # generate small decimal numbers to verify that po2 clipping works properly x = 2**tf.random.uniform(shape=[2, 4, 8], minval=-50, maxval=0) q = 2**tf.random.uniform(shape=[2, 4, 8], minval=-50, maxval=0) # set various min and max po2 exponents for the scale scale_min_neg3_max_1 = quantizers._get_scale( "auto_po2", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-3, max_po2_exponent=1) scale_min_neg8_max_0 = quantizers._get_scale( "auto_po2", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-8, max_po2_exponent=0) scale_min_neg10_max_1 = quantizers._get_scale( "auto_po2", x, q, elements_per_scale=4, scale_axis=2, min_po2_exponent=-10, max_po2_exponent=1) # verify that the output scales have the correct min and max ranges assert_equal(tf.shape(scale_min_neg3_max_1).numpy(), [1, 1, 8]) min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg3_max_1) assert min_po2_exp >= -3 assert max_po2_exp <= 1 assert_equal(tf.shape(scale_min_neg8_max_0).numpy(), [1, 1, 8]) min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg8_max_0) assert min_po2_exp >= -8 assert max_po2_exp <= 0 assert_equal(tf.shape(scale_min_neg10_max_1).numpy(), [1, 1, 8]) min_po2_exp, max_po2_exp = _get_min_max_po2_exponent(scale_min_neg10_max_1) assert min_po2_exp >= -10 assert max_po2_exp <= 1 def test_GetUnrolledShape_GetRolledBackShape(): x_r4 = [4, 4, 8, 16] # Scalar unroll_factor and unroll_axis - Test _get_unrolled_shape unrolled_x_r4_uf_2_ua_0 = quantizers._get_unrolled_shape( x_r4, unroll_factor=2, unroll_axis=0) unrolled_x_r4_uf_2_ua_1 = quantizers._get_unrolled_shape( x_r4, unroll_factor=2, unroll_axis=1) unrolled_x_r4_uf_2_ua_2 = quantizers._get_unrolled_shape( x_r4, unroll_factor=2, unroll_axis=2) unrolled_x_r4_uf_2_ua_3 = quantizers._get_unrolled_shape( x_r4, unroll_factor=2, unroll_axis=3) assert_equal(unrolled_x_r4_uf_2_ua_0, ([2, 2, 4, 8, 16], 0)) assert_equal(unrolled_x_r4_uf_2_ua_1, ([4, 2, 2, 8, 16], 1)) assert_equal(unrolled_x_r4_uf_2_ua_2, ([4, 4, 4, 2, 16], 2)) assert_equal(unrolled_x_r4_uf_2_ua_3, ([4, 4, 8, 8, 2], 3)) # Scalar unroll_factor and unroll_axis - Test _get_rolled_back_shape rolled_back_x_r4_uf_2_ua_0 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_2_ua_0[0], roll_axis=unrolled_x_r4_uf_2_ua_0[1]) rolled_back_x_r4_uf_2_ua_1 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_2_ua_1[0], roll_axis=unrolled_x_r4_uf_2_ua_1[1]) rolled_back_x_r4_uf_2_ua_2 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_2_ua_2[0], roll_axis=unrolled_x_r4_uf_2_ua_2[1]) rolled_back_x_r4_uf_2_ua_3 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_2_ua_3[0], roll_axis=unrolled_x_r4_uf_2_ua_3[1]) assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_0) assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_1) assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_2) assert_equal(x_r4, rolled_back_x_r4_uf_2_ua_3) # List[2] unroll_factor and unroll_axis - Test _get_unrolled_shape unrolled_x_r4_uf_24_ua_01 = quantizers._get_unrolled_shape( x_r4, unroll_factor=[2, 4], unroll_axis=[0, 1]) unrolled_x_r4_uf_24_ua_12 = quantizers._get_unrolled_shape( x_r4, unroll_factor=[2, 4], unroll_axis=[1, 2]) unrolled_x_r4_uf_24_ua_13 = quantizers._get_unrolled_shape( x_r4, unroll_factor=[2, 4], unroll_axis=[1, 3]) unrolled_x_r4_uf_24_ua_34 = quantizers._get_unrolled_shape( x_r4, unroll_factor=[2, 4], unroll_axis=[2, 3]) assert_equal(unrolled_x_r4_uf_24_ua_01, ([2, 2, 1, 4, 8, 16], [0, 2])) assert_equal(unrolled_x_r4_uf_24_ua_12, ([4, 2, 2, 2, 4, 16], [1, 3])) assert_equal(unrolled_x_r4_uf_24_ua_13, ([4, 2, 2, 8, 4, 4], [1, 4])) assert_equal(unrolled_x_r4_uf_24_ua_34, ([4, 4, 4, 2, 4, 4], [2, 4])) # List[2] unroll_factor and unroll_axis - Test _get_rolled_back_shape rolled_back_x_r4_uf_24_ua_01 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_24_ua_01[0], roll_axis=unrolled_x_r4_uf_24_ua_01[1]) rolled_back_x_r4_uf_24_ua_12 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_24_ua_12[0], roll_axis=unrolled_x_r4_uf_24_ua_12[1]) rolled_back_x_r4_uf_24_ua_13 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_24_ua_13[0], roll_axis=unrolled_x_r4_uf_24_ua_13[1]) rolled_back_x_r4_uf_24_ua_34 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_24_ua_34[0], roll_axis=unrolled_x_r4_uf_24_ua_34[1]) assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_01) assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_12) assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_13) assert_equal(x_r4, rolled_back_x_r4_uf_24_ua_34) # List[3] unroll_factor and unroll_axis - Test _get_unrolled_shape unrolled_x_r4_uf_242_ua_012 = quantizers._get_unrolled_shape( x_r4, unroll_factor=[2, 4, 2], unroll_axis=[0, 1, 2]) unrolled_x_r4_uf_242_ua_023 = quantizers._get_unrolled_shape( x_r4, unroll_factor=[2, 4, 2], unroll_axis=[0, 2, 3]) assert_equal(unrolled_x_r4_uf_242_ua_012, ([2, 2, 1, 4, 4, 2, 16], [0, 2, 4])) assert_equal(unrolled_x_r4_uf_242_ua_023, ([2, 2, 4, 2, 4, 8, 2], [0, 3, 5])) # List[3] unroll_factor and unroll_axis - Test _get_rolled_back_shape rolled_back_x_r4_uf_242_ua_012 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_242_ua_012[0], roll_axis=unrolled_x_r4_uf_242_ua_012[1]) rolled_back_x_r4_uf_242_ua_023 = quantizers._get_rolled_back_shape( unrolled_x_r4_uf_242_ua_023[0], roll_axis=unrolled_x_r4_uf_242_ua_023[1]) assert_equal(x_r4, rolled_back_x_r4_uf_242_ua_012) assert_equal(x_r4, rolled_back_x_r4_uf_242_ua_023) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/quantizer_registry_test.py ================================================ # Copyright 2024 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Unit tests for QKeras quantizer registry.""" import numpy as np import pytest from qkeras import quantizer_registry from qkeras import quantizers @pytest.mark.parametrize( "quantizer_name", [ "quantized_linear", "quantized_bits", "bernoulli", "ternary", "stochastic_ternary", "binary", "stochastic_binary", "quantized_relu", "quantized_ulaw", "quantized_tanh", "quantized_sigmoid", "quantized_po2", "quantized_relu_po2", "quantized_hswish", ], ) def test_lookup(quantizer_name): quantizer = quantizer_registry.lookup_quantizer(quantizer_name) is_class_instance = isinstance(quantizer, type) np.testing.assert_equal(is_class_instance, True) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/range_test.py ================================================ # Copyright 2020 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test range values that are used for codebook computation""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose import pytest from tensorflow.keras import backend as K from qkeras import quantized_relu from qkeras import quantized_bits @pytest.mark.parametrize( 'bits, integer, expected_values', [ (3, 0, np.array([0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875])), (3, 1, np.array([0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75])), (3, 2, np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5])), (3, 3, np.array([0, 1, 2, 3, 4, 5, 6, 7])), (6, 1, np.array( [0.0, 0.03125, 0.0625, 0.09375, 0.125, 0.15625, 0.1875, 0.21875, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.53125, 0.5625, 0.59375, 0.625, 0.65625, 0.6875, 0.71875, 0.75, 0.78125, 0.8125, 0.84375, 0.875, 0.90625, 0.9375, 0.96875, 1.0, 1.03125, 1.0625, 1.09375, 1.125, 1.15625, 1.1875, 1.21875, 1.25, 1.28125, 1.3125, 1.34375, 1.375, 1.40625, 1.4375, 1.46875, 1.5, 1.53125, 1.5625, 1.59375, 1.625, 1.65625, 1.6875, 1.71875, 1.75, 1.78125, 1.8125, 1.84375, 1.875, 1.90625, 1.9375, 1.96875])) ]) def test_quantized_relu_range(bits, integer, expected_values): """Test quantized_relu range function.""" q = quantized_relu(bits, integer) result = q.range() assert_allclose(result, expected_values, rtol=1e-05) @pytest.mark.parametrize( 'bits, integer, expected_values', [ (3, 0, np.array([0.0, 0.25, 0.5, 0.75, -1.0, -0.75, -0.5, -0.25])), (3, 1, np.array([0.0, 0.5, 1.0, 1.5, -2.0, -1.5, -1.0, -0.5])), (3, 2, np.array([0.0, 1.0, 2.0, 3.0, -4.0, -3.0, -2.0, -1.0])), (3, 3, np.array([0.0, 2.0, 4.0, 6.0, -8.0, -6.0, -4.0, -2.0])), (6, 1, np.array( [0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 1.0625, 1.125, 1.1875, 1.25, 1.3125, 1.375, 1.4375, 1.5, 1.5625, 1.625, 1.6875, 1.75, 1.8125, 1.875, 1.9375, -2.0, -1.9375, -1.875, -1.8125, -1.75, -1.6875, -1.625, -1.5625, -1.5, -1.4375, -1.375, -1.3125, -1.25, -1.1875, -1.125, -1.0625, -1.0, -0.9375, -0.875, -0.8125, -0.75, -0.6875, -0.625, -0.5625, -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625])) ]) def test_quantized_bits_range(bits, integer, expected_values): """Test quantized_bits range function.""" q = quantized_bits(bits, integer) result = q.range() assert_allclose(result, expected_values, rtol=1e-05) if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/registry_test.py ================================================ # Copyright 2024 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Unit tests for registry.""" from numpy.testing import assert_equal from numpy.testing import assert_raises import pytest from qkeras import registry def sample_function(arg): """Sample function for testing.""" return arg class SampleClass(object): """Sample class for testing.""" def __init__(self, arg): self._arg = arg def get_arg(self): return self._arg def test_register_function(): reg = registry.Registry() reg.register(sample_function) registered_function = reg.lookup('sample_function') # Call the function to validate. assert_equal(registered_function, sample_function) def test_register_class(): reg = registry.Registry() reg.register(SampleClass) registered_class = reg.lookup('SampleClass') # Create and call class object to validate. assert_equal(SampleClass, registered_class) def test_register_with_name(): reg = registry.Registry() name = 'NewSampleClass' reg.register(SampleClass, name=name) registered_class = reg.lookup(name) # Create and call class object to validate. assert_equal(SampleClass, registered_class) def test_lookup_missing_item(): reg = registry.Registry() assert_raises(KeyError, reg.lookup, 'foo') def test_lookup_missing_name(): reg = registry.Registry() sample_class = SampleClass(arg=1) # objects don't have a default __name__ attribute. assert_raises(AttributeError, reg.register, sample_class) # check that the object can be retrieved with a registered name. reg.register(sample_class, 'sample_class') assert_equal(sample_class, reg.lookup('sample_class')) if __name__ == '__main__': pytest.main([__file__]) ================================================ FILE: tests/safe_eval_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Implements a safe evaluation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import operator import pytest from qkeras.safe_eval import GetParams from qkeras.safe_eval import safe_eval add = operator.add def test_get_params1(): s = "(3, 0.3, sep=5 )" args, kwargs = GetParams(s) assert args == [3, 0.3] assert kwargs == {"sep": 5} def test_get_params2(): s = "( )" args, kwargs = GetParams(s) assert not args assert not kwargs def test_get_params3(): s = ("(3, 0.3, -1.0, True, False, 'string1', num1=0.1, num2=-3.0, " "str1='string2', bool1=True, bool2=False)") args, kwargs = GetParams(s) assert args == [3, 0.3, -1.0, True, False, "string1"] assert kwargs == { "num1": 0.1, "num2": -3.0, "str1": "string2", "bool1": True, "bool2": False } def test_safe_eval1(): s = "add(3,3)" assert safe_eval(s, globals()) == 6 def i_func(s): return -s def myadd2(a, b): return i_func(a) + i_func(b) def myadd(a=32, b=10): return a + b class myaddcls(object): def __call__(self, a=32, b=10): return a + b def test_safe_eval2(): s_add = [3, 39] assert safe_eval("add", globals(), *s_add) == 42 def test_safe_eval3(): assert safe_eval("myadd()", globals()) == 42 assert safe_eval("myadd(a=39)", globals(), b=3) == 42 def test_safe_eval4(): assert safe_eval("myadd2(a=39)", globals(), b=3) == -42 assert safe_eval("myadd2(a= 39)", globals(), b=3) == -42 assert safe_eval("myadd2(a= 39, b = 3)", globals()) == -42 def test_safe_eval5(): assert safe_eval("myadd", globals())(3,39) == 42 assert safe_eval("myaddcls", globals())(3,39) == 42 assert safe_eval("myaddcls()", globals())(3,39) == 42 if __name__ == "__main__": pytest.main([__file__]) ================================================ FILE: tests/utils_test.py ================================================ # Copyright 2019 Google LLC # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for methods in utils.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import pytest import os import tempfile from tensorflow.keras.layers import * from tensorflow.keras.models import * from qkeras import * from qkeras.utils import get_model_sparsity from qkeras.utils import model_quantize from qkeras.utils import convert_to_folded_model from qkeras.utils import is_TFOpLambda_layer from qkeras.utils import find_bn_fusing_layer_pair from qkeras.utils import add_bn_fusing_weights from qkeras.utils import clone_model_and_freeze_auto_po2_scale from qkeras.utils import load_qmodel def create_quantized_network(): """Creates a simple quantized conv net model.""" # Create a simple model xi = Input((28, 28, 1)) x = Conv2D(32, (3, 3))(xi) x = Activation("relu")(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = Activation("softmax")(x) model = Model(inputs=xi, outputs=x) # Quantize the model quantizer_config = { "QConv2D": { "kernel_quantizer": "quantized_bits(4)", "bias_quantizer": "quantized_bits(4)" }, "QActivation": { "relu": "ternary" } } activation_bits = 4 qmodel = model_quantize(model, quantizer_config, activation_bits) return qmodel def create_quantized_po2_network(): """Creates a simple quantized conv net model with po2 quantizers.""" xi = Input((28, 28, 1)) x = QConv2D(32, (3, 3), kernel_quantizer=quantized_po2(4))(xi) x = QActivation(quantized_bits(8))(x) x = QConv2D(32, (3, 3), kernel_quantizer=quantized_po2(4))(x) x = QActivation(quantized_bits(8))(x) qmodel = Model(xi, x, name='simple_po2_qmodel') return qmodel def set_network_sparsity(model, sparsity): """Set the sparsity of the given model using random weights.""" for layer in model.layers: new_weights = [] for w in layer.get_weights(): # Create weights with desired sparsity sparse_weights = np.random.rand(w.size)+0.1 sparse_weights[:int(w.size*sparsity)] = 0 np.random.shuffle(sparse_weights) new_weights.append(sparse_weights.reshape(w.shape)) layer.set_weights(new_weights) return model def test_get_model_sparsity(): """Tests if the method get_model_sparsity in utils.py works correctly.""" qmodel = create_quantized_network() # Generate sparsity levels to test sparsity_levels = np.concatenate((np.random.rand(10), [1.0, 0.0])).round(2) # Test various sparsity levels for true_sparsity in sparsity_levels: qmodel = set_network_sparsity(qmodel, true_sparsity) calc_sparsity = get_model_sparsity(qmodel) assert np.abs(calc_sparsity - true_sparsity) < 0.01 def test_get_po2_model_sparsity(): """Tests get_model_sparsity on a po2-quantized model. Models quantized with po2 quantizers should have a sparsity near 0 because if the exponent is set to 0, the value of the weight will equal 2^0 == 1 != 0 """ qmodel = create_quantized_po2_network() qmodel.use_legacy_config = True # Generate sparsity levels to test sparsity_levels = np.concatenate((np.random.rand(10), [1.0, 0.0])).round(2) # Test various sparsity levels for set_sparsity in sparsity_levels: qmodel = set_network_sparsity(qmodel, set_sparsity) calc_sparsity = get_model_sparsity(qmodel) assert np.abs(calc_sparsity - 0) < 0.01 def test_convert_to_folded_model(): """Test convert_to_folded_model to work properly on non-sequential model.""" def get_add_model(): x = x_in = Input(shape=(4, 4, 1), name="input") x1 = Conv2D(4, kernel_size=(2, 2), padding="valid", strides=(1, 1), name="conv2d_1")(x) x1 = BatchNormalization(name="bn_1")(x1) x1 = Activation("relu", name="relu_1")(x1) x2 = Conv2D(4, kernel_size=(2, 2), padding="valid", strides=(1, 1), name="conv2d_2")(x) x2 = BatchNormalization(name="bn_2")(x2) x2 = Activation("relu", name="relu_2")(x2) x = Add(name="add")([x1, x2]) x = Softmax()(x) return Model(inputs=[x_in], outputs=[x]) model = get_add_model() fmodel, _ = convert_to_folded_model(model) assert fmodel.layers[5].name == "add" # test if convert_to_folded_model work with TFOpLambda layers def hard_sigmoid(x): return ReLU(6.)(x + 3.) * (1. / 6.) def hard_swish(x): return Multiply()([hard_sigmoid(x), x]) def get_lambda_model(): x = x_in = Input(shape=(4, 4, 1), name="input") x = Conv2D( 4, kernel_size=(2, 2), padding="valid", strides=(1, 1), name="conv2d_1")(x) x = hard_swish(x) return Model(inputs=[x_in], outputs=[x]) model = get_lambda_model() fmodel, _ = convert_to_folded_model(model) assert is_TFOpLambda_layer(model.layers[2]) assert is_TFOpLambda_layer(model.layers[4]) assert isinstance(fmodel.layers[5], Multiply) def test_find_bn_fusing_layer_pair(): x = x_in = Input((23, 23, 1), name="input") x1 = QConv2D( 2, 2, 1, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), use_bias=False, name="conv1")(x) x1 = QBatchNormalization( mean_quantizer=quantized_bits(4, 0, 1), gamma_quantizer=None, variance_quantizer=None, beta_quantizer=quantized_bits(4, 0, 1), inverse_quantizer=quantized_bits(8, 0, 1), name="bn1")(x1) x2 = QConv2D( 2, 2, 1, kernel_quantizer=quantized_bits(3, 0), bias_quantizer=quantized_bits(3, 2), name="conv2")(x) x2 = QBatchNormalization( mean_quantizer=quantized_bits(4, 0, 1), gamma_quantizer=None, variance_quantizer=None, beta_quantizer=quantized_bits(4, 0, 1), inverse_quantizer=quantized_bits(8, 0, 1), name="bn2")(x2) x = Add(name="add")([x1, x2]) model = Model(inputs=[x_in], outputs=[x]) (conv_bn_pair_dict, _) = find_bn_fusing_layer_pair(model) assert conv_bn_pair_dict["conv1"] == "bn1" assert conv_bn_pair_dict["conv2"] == "bn2" conv_layer = model.layers[1] bn_layer = model.layers[3] conv_layer.set_weights([ np.array([[[[0.5, 0.75]], [[1.5, -0.625]]], [[[-0.875, 1.25]], [[-1.25, -2.5]]]]) ]) bn_layer.set_weights([ np.array([1., 0.25]), np.array([0.5, 1.0]), np.array([0.5, 2.5]), np.array([1.5, 1.]) ]) saved_weights = {} saved_weights[conv_layer.name] = {} add_bn_fusing_weights(conv_layer, bn_layer, saved_weights) d = saved_weights[conv_layer.name] assert d["enable_bn_fusing"] assert d["fused_bn_layer_name"] == "bn1" assert np.all(d["bn_inv"] == np.array([0.8125, 0.25])) assert np.all(d["fused_bias"] == np.array([0.09375, 0.65625])) def create_test_model_for_scale_freezing(bias_quantizer): def _create_simple_model(bias_quantizer): x = x_in = tf.keras.Input((4, 4, 1), name="input") x = QConv2D( filters=4, kernel_size=2, strides=2, kernel_quantizer=quantized_bits(4, 2, 1, alpha="auto_po2"), bias_quantizer=quantized_bits(4, 2, 1), use_bias=False, name="conv")(x) x = QDepthwiseConv2D( kernel_size=2, strides=1, depthwise_quantizer=quantized_bits(6, 3, 1, alpha="auto_po2"), use_bias=False, bias_quantizer=quantized_bits(4, 2, 1), name="dw_conv")(x) x = QBatchNormalization( mean_quantizer=quantized_bits(4, 2, 1), gamma_quantizer=None, variance_quantizer=None, beta_quantizer=quantized_bits(4, 0, 1), inverse_quantizer=quantized_bits(8, 0, 1, alpha="auto_po2"), name="bn")(x) x = QActivation(activation=quantized_bits(4, 0), name="relu")(x) x = tf.keras.layers.Flatten(name="flatten")(x) x = QDense(units=2, kernel_quantizer=quantized_bits(4, 2, 1, alpha="auto_po2"), bias_quantizer=bias_quantizer, name="dense")(x) model = tf.keras.Model(inputs=x_in, outputs=x) return model def _set_weights(model): conv_w = [np.array( [0.23, 2.76, 0.1, 0.33, 0.53, 0.16, 0.3, 1.7, -0.9, 1.43, 2.31, -0.2, -1.7, 0.39, -2.03, 1.79]).reshape(2, 2, 1, 4)] dw_conv_w = [np.array([ 0.03, 3.6, 2.1, 1.2, 0.13, 1.3, -0.3, 1.2, -0.7, -10.3, 11.7, -0.92, -10.7, 0.59, -1.93, 2.8]).reshape((2, 2, 4, 1))] bn_w = [np.array([0.28, 1.33, 2.27, 3.36]), np.array([0.31, 0.1, 0.03, 4.26]), np.array([0.89, -0.21, 1.97, 2.06]), np.array([1.2, 0.9, 13.2, 10.9])] dense_w = np.array( [0.13, 0.66, 0.21, 0.23, 1.07, -0.79, 1.83, 1.81]) dense_w = [dense_w.reshape((4, 2)), np.array([-1.3, 0.7])] model.get_layer("conv").set_weights(conv_w) model.get_layer("dw_conv").set_weights(dw_conv_w) model.get_layer("bn").set_weights(bn_w) model.get_layer("dense").set_weights(dense_w) orig_model = _create_simple_model(bias_quantizer) _set_weights(orig_model) return orig_model def test_clone_model_and_freeze_auto_po2_scale(): """Test clone_model_and_freeze_auto_po2_scale to work properly.""" orig_model = create_test_model_for_scale_freezing(quantized_bits(4, 2, 1)) _, new_hw = clone_model_and_freeze_auto_po2_scale( orig_model, quantize_model_weights=True) # Check if the new model's weights and scales are derived properly. np.testing.assert_array_equal( new_hw["conv"]["weights"][0], np.array( [[[[0.5, 6, 0, 0.5]], [[1, 0, 0.5, 3.5]]], [[[-2., 3., 3.5, -0.5]], [[-3.5, 1., -3.5, 3.5]]]])) np.testing.assert_array_equal( new_hw["conv"]["scales"][0], np.array([[[[0.25, 0.5, 0.25, 0.25]]]])) np.testing.assert_array_equal( new_hw["dw_conv"]["weights"][0].numpy().flatten(), np.array([ 0., 14, 8, 4, 0, 6, -2, 4, -2, -42, 46, -4, -42, 2, -8, 12])) np.testing.assert_array_equal( new_hw["dense"]["scales"][0], np.array([[0.25, 0.25]])) def test_clone_model_and_freeze_auto_po2_scale_serialization(): # Test if the cloned model can be saved and loaded properly. orig_model = create_test_model_for_scale_freezing(quantized_bits(4, 2, 1)) new_model, _ = clone_model_and_freeze_auto_po2_scale( orig_model, quantize_model_weights=True) fd, fname = tempfile.mkstemp(".hdf5") new_model.save(fname) _ = load_qmodel(fname) os.close(fd) os.remove(fname) def test_clone_model_and_freeze_auto_po2_scale_error(): orig_model = create_test_model_for_scale_freezing( quantized_bits(4, 2, 1, alpha="auto_po2")) # Test if the function raises an error when there are more than one # auto_po2 quantizers in a layer. with pytest.raises(ValueError): clone_model_and_freeze_auto_po2_scale( orig_model, quantize_model_weights=False) if __name__ == "__main__": pytest.main([__file__])