Repository: mit-han-lab/deepcompressor Branch: main Commit: 69f3473f5e1c Files: 229 Total size: 3.7 MB Directory structure: gitextract_0u4zluxv/ ├── .gitignore ├── LICENSE ├── README.md ├── assets/ │ ├── diffusion/ │ │ └── .gitkeep │ └── llm/ │ └── .gitkeep ├── deepcompressor/ │ ├── __init__.py │ ├── app/ │ │ ├── __init__.py │ │ ├── diffusion/ │ │ │ ├── __init__.py │ │ │ ├── cache/ │ │ │ │ ├── __init__.py │ │ │ │ └── config.py │ │ │ ├── config.py │ │ │ ├── dataset/ │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── calib.py │ │ │ │ ├── collect/ │ │ │ │ │ ├── calib.py │ │ │ │ │ └── utils.py │ │ │ │ └── data/ │ │ │ │ ├── COCO/ │ │ │ │ │ ├── COCO.py │ │ │ │ │ └── __init__.py │ │ │ │ ├── DCI/ │ │ │ │ │ ├── DCI.py │ │ │ │ │ └── __init__.py │ │ │ │ ├── MJHQ/ │ │ │ │ │ ├── MJHQ.py │ │ │ │ │ └── __init__.py │ │ │ │ ├── __init__.py │ │ │ │ └── dump.py │ │ │ ├── eval/ │ │ │ │ ├── __init__.py │ │ │ │ ├── config.py │ │ │ │ └── metrics/ │ │ │ │ ├── __init__.py │ │ │ │ ├── fid.py │ │ │ │ ├── image_reward.py │ │ │ │ ├── multimodal.py │ │ │ │ ├── run.py │ │ │ │ └── similarity.py │ │ │ ├── nn/ │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── patch.py │ │ │ │ └── struct.py │ │ │ ├── pipeline/ │ │ │ │ ├── __init__.py │ │ │ │ └── config.py │ │ │ ├── ptq.py │ │ │ ├── quant/ │ │ │ │ ├── __init__.py │ │ │ │ ├── activation.py │ │ │ │ ├── config.py │ │ │ │ ├── quantizer/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config.py │ │ │ │ │ └── quantizer.py │ │ │ │ ├── rotate.py │ │ │ │ ├── smooth.py │ │ │ │ ├── utils.py │ │ │ │ └── weight.py │ │ │ └── utils.py │ │ └── llm/ │ │ ├── __init__.py │ │ ├── cache/ │ │ │ ├── __init__.py │ │ │ └── config.py │ │ ├── config.py │ │ ├── eval/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── config.py │ │ │ ├── custom.py │ │ │ ├── lm_eval.py │ │ │ └── longbench/ │ │ │ ├── __init__.py │ │ │ ├── eval.py │ │ │ ├── metrics.py │ │ │ └── task2prompt.json │ │ ├── model/ │ │ │ ├── __init__.py │ │ │ └── config.py │ │ ├── nn/ │ │ │ ├── __init__.py │ │ │ ├── patch.py │ │ │ └── struct.py │ │ ├── ptq.py │ │ └── quant/ │ │ ├── __init__.py │ │ ├── activation.py │ │ ├── config.py │ │ ├── dataset.py │ │ ├── quantizer/ │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ └── quantizer.py │ │ ├── reorder.py │ │ ├── rotate.py │ │ ├── smooth.py │ │ ├── utils.py │ │ └── weight.py │ ├── backend/ │ │ ├── __init__.py │ │ ├── nunchaku/ │ │ │ ├── __init__.py │ │ │ ├── convert.py │ │ │ ├── convert_lora.py │ │ │ └── utils.py │ │ ├── qserve/ │ │ │ ├── __init__.py │ │ │ ├── convert.py │ │ │ └── utils.py │ │ ├── tinychat/ │ │ │ ├── __init__.py │ │ │ ├── convert.py │ │ │ ├── csrc/ │ │ │ │ ├── load.py │ │ │ │ ├── pybind.cpp │ │ │ │ ├── quantization/ │ │ │ │ │ ├── dequantize.cuh │ │ │ │ │ ├── gemm/ │ │ │ │ │ │ ├── gemm_cuda.cu │ │ │ │ │ │ ├── gemm_cuda.h │ │ │ │ │ │ └── semaphore.h │ │ │ │ │ └── gemv/ │ │ │ │ │ ├── gemv_cuda.cu │ │ │ │ │ └── gemv_cuda.h │ │ │ │ └── utils.cuh │ │ │ ├── linear.py │ │ │ └── utils.py │ │ └── utils.py │ ├── calib/ │ │ ├── __init__.py │ │ ├── config/ │ │ │ ├── __init__.py │ │ │ ├── lowrank.py │ │ │ ├── range.py │ │ │ ├── reorder.py │ │ │ ├── rotation.py │ │ │ ├── search.py │ │ │ └── smooth.py │ │ ├── lowrank.py │ │ ├── metric.py │ │ ├── range.py │ │ ├── reorder.py │ │ ├── rotate.py │ │ ├── search.py │ │ └── smooth.py │ ├── csrc/ │ │ ├── load.py │ │ ├── pybind.cpp │ │ └── quantize/ │ │ ├── quantize.cu │ │ └── quantize.h │ ├── data/ │ │ ├── __init__.py │ │ ├── cache.py │ │ ├── codebook.py │ │ ├── common.py │ │ ├── dtype.py │ │ ├── range.py │ │ ├── scale.py │ │ ├── tensor.py │ │ ├── utils/ │ │ │ ├── __init__.py │ │ │ ├── dtype.py │ │ │ ├── reshape.py │ │ │ ├── scale.py │ │ │ └── shape.py │ │ └── zero.py │ ├── dataset/ │ │ ├── __init__.py │ │ ├── action.py │ │ ├── cache.py │ │ └── config.py │ ├── nn/ │ │ ├── __init__.py │ │ ├── patch/ │ │ │ ├── __init__.py │ │ │ ├── conv.py │ │ │ ├── linear.py │ │ │ ├── lowrank.py │ │ │ └── sdpa.py │ │ └── struct/ │ │ ├── __init__.py │ │ ├── attn.py │ │ └── base.py │ ├── quantizer/ │ │ ├── __init__.py │ │ ├── config/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── kernel.py │ │ │ └── lowrank.py │ │ ├── impl/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── info.py │ │ │ ├── scale.py │ │ │ ├── simple.py │ │ │ └── ste.py │ │ ├── kernel/ │ │ │ ├── __init__.py │ │ │ ├── gptq.py │ │ │ └── rtn.py │ │ └── processor.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── common.py │ │ ├── config/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── model.py │ │ │ ├── output.py │ │ │ └── path.py │ │ ├── dataclass.py │ │ ├── hooks/ │ │ │ ├── __init__.py │ │ │ ├── branch.py │ │ │ ├── hook.py │ │ │ ├── packager.py │ │ │ └── processor.py │ │ ├── math/ │ │ │ ├── __init__.py │ │ │ ├── functional.py │ │ │ └── hadamard.py │ │ ├── patch.py │ │ └── tools/ │ │ ├── __init__.py │ │ ├── logging.py │ │ └── sys.py │ └── version.py ├── environment.yml ├── examples/ │ ├── diffusion/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── configs/ │ │ │ ├── __default__.yaml │ │ │ ├── collect/ │ │ │ │ └── qdiff.yaml │ │ │ ├── lora/ │ │ │ │ ├── __default__.yaml │ │ │ │ └── flux.1-dev/ │ │ │ │ ├── anime.yaml │ │ │ │ ├── ghibsky.yaml │ │ │ │ ├── realism.yaml │ │ │ │ ├── sketch.yaml │ │ │ │ └── yarn.yaml │ │ │ ├── model/ │ │ │ │ ├── flux.1-dev.yaml │ │ │ │ ├── flux.1-schnell.yaml │ │ │ │ ├── pixart-sigma.yaml │ │ │ │ └── sana-1.6b.yaml │ │ │ ├── svdquant/ │ │ │ │ ├── __default__.yaml │ │ │ │ ├── fast.yaml │ │ │ │ ├── gptq.yaml │ │ │ │ ├── int4.yaml │ │ │ │ └── nvfp4.yaml │ │ │ └── text/ │ │ │ ├── __default__.yaml │ │ │ └── awq.yaml │ │ ├── prompts/ │ │ │ ├── lora/ │ │ │ │ ├── anime.yaml │ │ │ │ ├── ghibsky.yaml │ │ │ │ ├── realism.yaml │ │ │ │ ├── sketch.yaml │ │ │ │ └── yarn.yaml │ │ │ └── qdiff.yaml │ │ └── scripts/ │ │ └── svdquant.sh │ └── llm/ │ ├── .gitignore │ ├── README.md │ ├── configs/ │ │ ├── __default__.yaml │ │ ├── awq.yaml │ │ ├── gptq.yaml │ │ ├── ooo.yaml │ │ ├── qoq-g128.yaml │ │ ├── qoq-gchn.yaml │ │ ├── smoothquant-dynamic.yaml │ │ └── smoothquant-static.yaml │ └── scripts/ │ ├── awq.sh │ ├── gptq.sh │ ├── qoq.sh │ └── smoothquant.sh └── pyproject.toml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/#use-with-ide .pdm.toml # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ # VS Code .vscode/ !.vscode/settings.json .DS_Store *.log *.pt .tmp/ runs exps runs/ exps/ wandb wandb/ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [2024] Yujun Lin, Muyang Li, Zhekai Zhang, Haotian Tang, Shang Yang, Song Han Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================

DeepCompressor Logo

Model Compression Toolbox for Large Language Models and Diffusion Models

Apache License

## News - [2025/02] 🎉 [**QServe**](https://arxiv.org/abs/2405.04532) has been accepted to MLSys 2025! - [2025/01] 🎉 [**SVDQuant**](https://arxiv.org/abs/2411.05007) has been accepted to ICLR 2025 (Spotlight)! - [2024/12] 🎉 [**QServe**](https://github.com/mit-han-lab/qserve) has been integratedd into NVIDIA [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/llama)! - [2024/11] 🔥 Our latest **W4A4** diffusion model quantization work [**SVDQuant**](https://arxiv.org/abs/2411.05007) algorithm and [**Nunchaku**](https://github.com/mit-han-lab/nunchaku) system is publicly released! Check our [paper](http://arxiv.org/abs/2411.05007)! - [2024/05] 🔥 Our latest **W4A8KV4** LLM quantization work **QoQ** algorithm and **QServe** system is publicly released! **QoQ** is short for *quattuor-octō-quattuor* which is 4-8-4 in latin. Check our [paper](https://arxiv.org/abs/2405.04532)! ## Key Features ***DeepCompressor*** is an open source model compression toolbox for large language models and diffusion models based on PyTorch. DeepCompressor currently supports fake quantization with any integer and floating-point data type within 8 bits, e.g., INT8, INT4 and FP4_E2M1. Here are examples that implement the following algorithms. + [Post-training quantization for large language models](/examples/llm/): + Weight-only Quantization + [AWQ (W4A16)](/examples/llm/configs/awq.yaml) + [GPTQ (W4A16)](/examples/llm/configs/gptq.yaml) + Weight-Activation Quantization + [SmoothQuant (W8A8)](/examples/llm/configs/smoothquant-static.yaml) + Weight-Activation and KV-Cache Quantization + [QoQ (W4A8KV4)](/examples/llm/) + [Post-training quantization for diffusion models](/examples/diffusion/): + Weight-Activation Quantization + [SVDQuant (W4A4)](/examples/diffusion/) DeepCompressor also contains examples that integrate with other inference libraries. + [Deploy weight-only quantized LLMs with TinyChat](/examples/llm/) + [Deploy quantized LLMs with QServe]((/examples/llm/)) + [Deploy quantized diffusion models with Nunchaku](/examples/diffusion/) ## Installation ### Install from Source 1. Clone this repository and navigate to deepcompressor folder ``` git clone https://github.com/mit-han-lab/deepcompressor cd deepcompressor ``` 2. Install Package ``` conda env create -f environment.yml poetry install ``` ## Highlights ### SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models [[Website](https://hanlab.mit.edu/projects/svdquant)][[Paper](http://arxiv.org/abs/2411.05007)][[Nunchaku Inference System](https://github.com/mit-han-lab/nunchaku)] Diffusion models have been proven highly effective at generating high-quality images. However, as these models grow larger, they require significantly more memory and suffer from higher latency, posing substantial challenges for deployment. In this work, we aim to accelerate diffusion models by quantizing their weights and activations to 4 bits. At such an aggressive level, both weights and activations are highly sensitive, where conventional post-training quantization methods for large language models like smoothing become insufficient. To overcome this limitation, we propose **SVDQuant**, a new 4-bit quantization paradigm. Different from smoothing which redistributes outliers between weights and activations, our approach absorbs these outliers using a low-rank branch. We first consolidate the outliers by shifting them from activations to weights, then employ a high-precision low-rank branch to take in the weight outliers with Singular Value Decomposition (SVD). This process eases the quantization on both sides. However, naïvely running the low-rank branch independently incurs significant overhead due to extra data movement of activations, negating the quantization speedup. To address this, we co-design an inference engine **Nunchaku** that fuses the kernels of the low-rank branch into those of the low-bit branch to cut off redundant memory access. It can also seamlessly support off-the-shelf low-rank adapters (LoRAs) without the need for re-quantization. Extensive experiments on SDXL, PixArt-∑, and FLUX.1 validate the effectiveness of SVDQuant in preserving image quality. We reduce the memory usage for the 12B FLUX.1 models by 3.5×, achieving 3.0× speedup over the 4-bit weight-only quantized baseline on the 16GB laptop 4090 GPU, paving the way for more interactive applications on PCs. ![Teaser](/assets/diffusion/svdquant/teaser.jpg) ![SVDQuant](/assets/diffusion/svdquant/svdquant.gif) #### Quality Evaluation Below is the quality and similarity evaluated with 5000 samples from MJHQ-30K dataset. IR means ImageReward. Our 4-bit results outperform other 4-bit baselines, effectively preserving the visual quality of 16-bit models. | Model | Precision | Method | FID ($\downarrow$) | IR ($\uparrow$) | LPIPS ($\downarrow$) | PSNR( $\uparrow$) | |----------------------------|-----------|-----------|--------------------|-----------------|----------------------|-------------------| | FLUX.1-dev (50 Steps) | BF16 | -- | 20.3 | 0.953 | -- | -- | | | W4A16 | NF4 | 20.6 | 0.910 | 0.272 | 19.5 | | | INT W4A4 | | 20.2 | 0.908 | 0.322 | 18.5 | | | INT W4A4 | SVDQuant | 19.9 | 0.935 | 0.223 | 21.0 | | | NVFP4 | | 20.3 | 0.961 | 0.345 | 16.3 | | | NVFP4 | SVDQuant | 20.3 | 0.945 | 0.205 | 21.5 | | FLUX.1-schnell (4 Steps) | BF16 | -- | 19.2 | 0.938 | -- | -- | | | W4A16 | NF4 | 18.9 | 0.943 | 0.257 | 18.2 | | | INT W4A4 | | 18.1 | 0.962 | 0.345 | 16.3 | | | INT W4A4 | SVDQuant | 18.3 | 0.951 | 0.257 | 18.3 | | | NVFP4 | | 19.0 | 0.952 | 0.276 | 17.6 | | | NVFP4 | SVDQuant | 18.9 | 0.966 | 0.228 | 19.0 | | SANA-1.6b (20 Steps) | BF16 | -- | 20.6 | 0.952 | -- | -- | | | INT W4A4 | | 20.5 | 0.894 | 0.339 | 15.3 | | | INT W4A4 | SVDQuant | 19.3 | 0.935 | 0.220 | 17.8 | | | NVFP4 | | 19.7 | 0.929 | 0.236 | 17.4 | | | NVFP4 | SVDQuant | 20.2 | 0.941 | 0.176 | 19.0 | | PixArt-Sigma (20 Steps) | FP16 | -- | 16.6 | 0.944 | -- | -- | | | INT W4A8 | ViDiT-Q | 37.3 | 0.573 | 0.611 | 12.0 | | | INT W4A4 | SVDQuant | 19.2 | 0.878 | 0.323 | 17.6 | | | NVFP4 | | 31.8 | 0.660 | 0.517 | 14.8 | | | NVFP4 | SVDQuant | 16.6 | 0.940 | 0.271 | 18.5 | ### QServe: W4A8KV4 Quantization for Efficient LLM Serving [[Website](https://hanlab.mit.edu/projects/qserve)][[Paper](https://arxiv.org/abs/2405.04532)][[QoQ Algorithm Code](/examples/llm)][[QServe GPU System](https://github.com/mit-han-lab/qserve)] Quantization can accelerate large language model (LLM) inference. Going beyond INT8 quantization, the research community is actively exploring even lower precision, such as INT4. Nonetheless, state-of-the-art INT4 quantization techniques only accelerate low-batch, edge LLM inference, failing to deliver performance gains in large-batch, cloud-based LLM serving. We uncover a critical issue: existing INT4 quantization methods suffer from significant runtime overhead (20-90%) when **dequantizing either weights or partial sums** on GPUs. To address this challenge, we introduce **QoQ**, a W4A8KV4 quantization algorithm with 4-bit weight, 8-bit activation, and 4-bit KV cache. QoQ stands for **quattuor-octo-quattuor**, which represents 4-8-4 in Latin. QoQ is implemented by the **QServe** inference library that achieves measured speedup. The key insight driving QServe is that the efficiency of LLM serving on GPUs is critically influenced by **operations on low-throughput CUDA cores**. Building upon this insight, in QoQ algorithm, we introduce progressive quantization that can allow low dequantization overhead in W4A8 GEMM. Additionally, we develop SmoothAttention to effectively mitigate the accuracy degradation incurred by 4-bit KV quantization. In the QServe system, we perform compute-aware weight reordering and take advantage of register-level parallelism to reduce dequantization latency. We also make fused attention memory-bound, harnessing the performance gain brought by KV4 quantization. As a result, QServe improves the maximum achievable serving throughput of Llama-3-8B by **1.2×** on A100, **1.4×** on L40S; and Qwen1.5-72B by **2.4×** on A100, **3.5×** on L40S, compared to TensorRT-LLM. ![QoQ-QServe](/assets/llm/qoq/qoq-qserve.png) ![QoQ](/assets/llm/qoq/qoq.png) #### Perplexity Evaluation Below is the WikiText2 perplexity evaluated with 2048 sequence length. The lower is the better. | Methods | Precision | Llama-3.1 70B | Llama-3.1 8B | Llama-3 70B | Llama-3 8B | Llama-2 7B | Llama-2 13B | Llama-2 70B | Llama 7B | Llama 13B | Llama 30B | Mistral 7B | Yi 34B | |-------------|--------------|---------------|--------------|-------------| ------------|------------|-------------|-------------|----------|-----------|-----------|------------|--------| | FP16 | | 2.81 | 6.24 | 2.85 | 6.14 | 5.47 | 4.88 | 3.32 | 5.68 | 5.09 | 4.10 | 5.25 | 4.60 | | SmoothQuant | W8A8 | 3.23 | 6.38 | 3.14 | 6.28 | 5.54 | 4.95 | 3.36 | 5.73 | 5.13 | 4.23 | 5.29 | 4.69 | | GPTQ-R | W4A16 g128 | 3.46 | 6.64 | 3.42 | 6.56 | 5.63 | 4.99 | 3.43 | 5.83 | 5.20 | 4.22 | 5.39 | 4.68 | | AWQ | W4A16 g128 | 3.22 | 6.60 | 3.20 | 6.54 | 5.60 | 4.97 | 3.41 | 5.78 | 5.19 | 4.21 | 5.37 | 4.67 | | QuaRot | W4A4 | 5.97 | 8.32 | 6.75 | 8.33 | 6.19 | 5.45 | 3.83 | 6.34 | 5.58 | 4.64 | 5.77 | - | | SpinQuant | W4A4 | 4.80 | 7.42 | 6.27 | 7.37 | 5.96 | 5.24 | 3.71 | 6.14 | 5.39 | 4.56 | - | - | | Atom | W4A4 g128 | - | - | 4.33 | 7.78 | 6.12 | 5.31 | 3.73 | 6.25 | 5.52 | 4.61 | 5.76 | 4.97 | | QoQ | W4A8KV4 | 3.68 | 6.87 | 3.65 | 6.81 | 5.75 | 5.11 | 3.50 | 5.92 | 5.27 | 4.31 | 5.44 | 4.73 | | QoQ | W4A8KV4 g128 | 3.51 | 6.77 | 3.50 | 6.70 | 5.67 | 5.06 | 3.46 | 5.88 | 5.23 | 4.27 | 5.41 | 4.73 | \* SmoothQuant is evaluated with per-tensor static KV cache quantization. #### Efficiency Benchmarks When serving the large language models Llama-3-8B and Qwen1.5-72B on L40S and A100 GPUs, QServe demonstrates superior performance, achieving **1.2x-1.4x higher throughput** compared to the leading industry solution, TensorRT-LLM, for Llama-3-8B, and a **2.4x-3.5x higher throughput** for Qwen1.5-72B. See more about benchmarking setting in [QServe GPU Inference System](https://github.com/mit-han-lab/qserve). | L40S (48G) | Llama-3-8B | Llama-2-7B | Mistral-7B | Llama-2-13B | Llama-30B | Yi-34B | Llama-2-70B | Qwen-1.5-72B | |----------------------|------------|------------|------------|-------------|-----------|-----------|-------------|--------------| | TRT-LLM-FP16 | 1326 | 444 | 1566 | 92 | OOM | OOM | OOM | OOM | | TRT-LLM-W4A16 | 1431 | 681 | 1457 | 368 | 148 | 313 | 119 | 17 | | TRT-LLM-W8A8 | 2634 | 1271 | 2569 | 440 | 123 | 364 | OOM | OOM | | Atom-W4A4 | -- | 2120 | -- | -- | -- | -- | -- | -- | | QuaRot-W4A4 | -- | 805 | -- | 413 | 133 | -- | -- | 15 | | QServe-W4A8KV4 | **3656** | **2394** | **3774** | **1327** | **504** | **869** | **286** | **59** | | Throughput Increase* | **1.39x** | **1.13x** | **1.47x** | **3.02x** | **3.41x** | **2.39x** | **2.40x** | **3.47x** | | A100 (80G) | Llama-3-8B | Llama-2-7B | Mistral-7B | Llama-2-13B | Llama-30B | Yi-34B | Llama-2-70B | Qwen-1.5-72B | |----------------------|------------| -----------|------------|-------------|-----------|-----------|-------------|--------------| | TRT-LLM-FP16 | 2503 | 1549 | 2371 | 488 | 80 | 145 | OOM | OOM | | TRT-LLM-W4A16 | 2370 | 1549 | 2403 | 871 | 352 | 569 | 358 | 143 | | TRT-LLM-W8A8 | 2396 | 2334 | 2427 | 1277 | 361 | 649 | 235 | 53 | | Atom-W4A4 | -- | 1160 | -- | -- | -- | -- | -- | -- | | QuaRot-W4A4 | -- | 1370 | -- | 289 | 267 | -- | -- | 68 | | QServe-W4A8KV4 | **3005** | **2908** | **2970** | **1741** | **749** | **803** | **419** | **340** | | Throughput Increase* | **1.20x** | **1.25x** | **1.22x** | **1.36x** | **2.07x** | **1.23x** | **1.17x** | **2.38x** | The absolute token generation throughputs of QServe and baseline systems (Unit: tokens/second. `--` means unsupported). All experiments were conducted under the same device memory budget. Throughput increase of QServe is calculated with regard to the best baseline in each column. ## Reference If you find `deepcompressor` useful or relevant to your research, please kindly cite our paper: ```bibtex @article{lin2024qserve, title={QServe: W4A8KV4 Quantization and System Co-design for Efficient LLM Serving}, author={Lin*, Yujun and Tang*, Haotian and Yang*, Shang and Zhang, Zhekai and Xiao, Guangxuan and Gan, Chuang and Han, Song}, journal={arXiv preprint arXiv:2405.04532}, year={2024} } @article{ li2024svdquant, title={SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models}, author={Li*, Muyang and Lin*, Yujun and Zhang*, Zhekai and Cai, Tianle and Li, Xiuyu and Guo, Junxian and Xie, Enze and Meng, Chenlin and Zhu, Jun-Yan and Han, Song}, journal={arXiv preprint arXiv:2411.05007}, year={2024} } ``` ## Related Projects The following projects are highly related to QServe. Our group has developed full-stack application-algorithm-system-hardware support for efficient large models, receiving **9k+ GitHub stars** and **over 1M Huggingface community downloads**. You are also welcome to check out [MIT HAN Lab](https://hanlab.mit.edu) for other exciting projects on **Efficient Generative AI**! - [**System**] [QServe: W4A8KV4 Quantization for Efficient LLM Serving](https://github.com/mit-han-lab/qserve) - [**System**] [TinyChat: Efficient and Lightweight Chatbot with AWQ](https://github.com/mit-han-lab/llm-awq/tree/main/tinychat) - [**Application**] [VILA: On Pretraining of Visual-Language Models](https://github.com/Efficient-Large-Model/VILA) - [**Algorithm**] [AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration](https://github.com/mit-han-lab/llm-awq) - [**Algorithm**] [SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models](https://github.com/mit-han-lab/smoothquant) - [**Algorithm**] [DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models](https://github.com/mit-han-lab/distrifuser) - [**Hardware**] [SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning](https://arxiv.org/abs/2012.09852) ## Acknowledgments DeepCompressor is inspired by many open-source libraries, including (but not limited to) [GPTQ](https://arxiv.org/abs/2210.17323), [QuaRot](https://arxiv.org/abs/2404.00456) and [Atom](https://arxiv.org/abs/2310.19102). ================================================ FILE: assets/diffusion/.gitkeep ================================================ ================================================ FILE: assets/llm/.gitkeep ================================================ ================================================ FILE: deepcompressor/__init__.py ================================================ from .version import __version__ # noqa: F401 ================================================ FILE: deepcompressor/app/__init__.py ================================================ ================================================ FILE: deepcompressor/app/diffusion/__init__.py ================================================ ================================================ FILE: deepcompressor/app/diffusion/cache/__init__.py ================================================ from .config import DiffusionPtqCacheConfig, DiffusionQuantCacheConfig ================================================ FILE: deepcompressor/app/diffusion/cache/config.py ================================================ # -*- coding: utf-8 -*- """LLM quantization cache configuration.""" import functools import re import typing as tp from dataclasses import dataclass, field from omniconfig import configclass from deepcompressor.utils.config.path import BasePathConfig from ..nn.struct import DiffusionModelStruct __all__ = ["DiffusionQuantCacheConfig", "DiffusionPtqCacheConfig"] @dataclass class DiffusionQuantCacheConfig(BasePathConfig): """Denoising diffusion model quantization cache path. Args: smooth (`str`, *optional*, default=`""`): The smoothing scales cache path. branch (`str`, *optional*, default=`""`): The low-rank branches cache path. wgts (`str`, *optional*, default=`""`): The weight quantizers state dict cache path. acts (`str`, *optional*, default=`""`): The activation quantizers state dict cache path """ smooth: str = "" branch: str = "" wgts: str = "" acts: str = "" @staticmethod def simplify_path(path: str, key_map: dict[str, set[str]]) -> str: """Simplify the cache path.""" to_replace = {} # we first extract all the parts matching the pattern "(skip|include).\[[a-zA-Z0-9_\+]+\]" for part in re.finditer(r"(skip|include)\.\[[a-zA-Z0-9_\+]+\]", path): # remove the "skip." or "include." prefix part = part.group(0) if part[0] == "s": prefix, keys = part[:4], part[6:-1] else: prefix, keys = part[:7], part[9:-1] # simplify the keys keys = "+".join( ( "".join((s[0] for s in x.split("_"))) for x in DiffusionModelStruct._simplify_keys(keys.split("+"), key_map=key_map) ) ) to_replace[part] = f"{prefix}.[{keys}]" # we then replace the parts for key, value in to_replace.items(): path = path.replace(key, value) return path def simplify(self, key_map: dict[str, set[str]]) -> tp.Self: """Simplify the cache paths.""" return self.apply(functools.partial(self.simplify_path, key_map=key_map)) @configclass @dataclass class DiffusionPtqCacheConfig: root: str dirpath: DiffusionQuantCacheConfig = field(init=False) path: DiffusionQuantCacheConfig = field(init=False) ================================================ FILE: deepcompressor/app/diffusion/config.py ================================================ # -*- coding: utf-8 -*- """Top-level config of post-training quantization for a diffusion model.""" import os from dataclasses import dataclass, field import diffusers.training_utils import omniconfig import torch from omniconfig import ConfigParser, configclass from deepcompressor.app.llm.config import LlmCacheConfig, LlmQuantConfig from deepcompressor.data.utils import ScaleUtils from deepcompressor.utils.config.output import OutputConfig from .cache import DiffusionPtqCacheConfig, DiffusionQuantCacheConfig from .eval import DiffusionEvalConfig from .nn.struct import DiffusionModelStruct from .pipeline import DiffusionPipelineConfig from .quant import DiffusionQuantConfig __all__ = [ "DiffusionPtqRunConfig", "DiffusionPtqCacheConfig", "DiffusionQuantCacheConfig", "DiffusionEvalConfig", "DiffusionPipelineConfig", "DiffusionQuantConfig", ] @configclass @dataclass class DiffusionPtqRunConfig: """Top-level config of post-training quantization for a diffusion model. Args: cache (`DiffusionPtqCacheConfig`): The cache configuration. output (`OutputConfig`): The output directory configuration. pipeline (`DiffusionPipelineConfig`): The diffusion pipeline configuration eval (`DiffusionEvalConfig`): The evaluation configuration. quant (`DiffusionQuantConfig`): The post-training quantization configuration. seed (`int`, *optional*, defaults to `12345`): The seed for reproducibility. skip_gen (`bool`, *optional*, defaults to `False`): Whether to skip generation. skip_eval (`bool`, *optional*, defaults to `False`): Whether to skip evaluation. load_model (`str`, *optional*, defaults to `""`): Directory path to load the model checkpoint. save_model (`str`, *optional*, defaults to `""`): Directory path to save the model checkpoint. copy_on_save (`bool`, *optional*, defaults to `False`): Whether to copy the quantization cache on save. """ cache: DiffusionPtqCacheConfig | None output: OutputConfig pipeline: DiffusionPipelineConfig eval: DiffusionEvalConfig quant: DiffusionQuantConfig = field(metadata={omniconfig.ARGPARSE_KWARGS: {"prefix": ""}}) text: LlmQuantConfig | None = None text_cache: LlmCacheConfig = field(default_factory=LlmCacheConfig) seed: int = 12345 skip_gen: bool = False skip_eval: bool = False load_from: str = "" save_model: str = "" copy_on_save: bool = False def __post_init__(self): # region set text encoder quanatization scale default dtype if self.text is not None and self.text.enabled_wgts: self.text.wgts.scale_dtypes = tuple( ScaleUtils.infer_scale_dtypes(self.text.wgts.scale_dtypes, default_dtype=self.pipeline.dtype) ) if self.text is not None and self.text.enabled_ipts: self.text.ipts.scale_dtypes = tuple( ScaleUtils.infer_scale_dtypes(self.text.ipts.scale_dtypes, default_dtype=self.pipeline.dtype) ) if self.text is not None and self.text.enabled_opts: self.text.opts.scale_dtypes = tuple( ScaleUtils.infer_scale_dtypes(self.text.opts.scale_dtypes, default_dtype=self.pipeline.dtype) ) # endregion self.eval.num_gpus = min(torch.cuda.device_count(), self.eval.num_gpus) if self.eval.batch_size_per_gpu is None: self.eval.batch_size_per_gpu = max(1, self.eval.batch_size // self.eval.num_gpus) self.eval.batch_size = self.eval.batch_size_per_gpu * self.eval.num_gpus else: self.eval.batch_size = self.eval.batch_size_per_gpu * self.eval.num_gpus # region setup calib dataset path self.quant.calib.path = self.quant.calib.path.format( dtype=self.pipeline.dtype, family=self.pipeline.family, model=self.pipeline.name, protocol=self.eval.protocol, data=self.quant.calib.data, ) if self.quant.calib.path: self.quant.calib.path = os.path.abspath(os.path.expanduser(self.quant.calib.path)) # endregion # region setup eval reference root self.eval.ref_root = self.eval.ref_root.format( dtype=self.pipeline.dtype, family=self.pipeline.family, model=self.pipeline.name, protocol=self.eval.protocol, ) if self.eval.ref_root: self.eval.ref_root = os.path.abspath(os.path.expanduser(self.eval.ref_root)) # endregion # region setup cache directory if self.cache is not None: if self.quant.enabled_wgts or self.quant.enabled_ipts or self.quant.enabled_opts: self.cache.dirpath = self.quant.generate_cache_dirpath( root=self.cache.root, shift=self.pipeline.shift_activations, default_dtype=self.pipeline.dtype ) self.cache.path = self.cache.dirpath.clone().add_children(f"{self.pipeline.name}.pt") else: self.cache.dirpath = self.cache.path = None if self.text is not None and self.text.is_enabled(): if not self.text_cache.root: self.text_cache.root = os.path.join(self.cache.root, "diffusion") self.text_cache.dirpath = self.text.generate_cache_dirpath(root=self.text_cache.root, seed=self.seed) self.text_cache.path = self.text_cache.dirpath.clone().add_children(f"{self.pipeline.name}.pt") # endregion # region setup output directory if self.output.dirname == "reference": assert self.eval.ref_root self.output.job = f"run-{self.eval.num_samples}" self.output.dirpath = self.eval.ref_root self.eval.ref_root = "" self.eval.gen_root = "{output}" else: if self.output.dirname == "default": self.output.dirname = self.generate_default_dirname() calib_dirname = self.quant.generate_calib_dirname() or "-" self.output.dirpath = os.path.join( self.output.root, "diffusion", self.pipeline.family, self.pipeline.name, *self.quant.generate_dirnames(default_dtype=self.pipeline.dtype)[:-1], calib_dirname, self.output.dirname, ) if (self.eval.chunk_start > 0 or self.eval.chunk_step > 1) and not self.eval.chunk_only: self.output.job += f".c{self.eval.chunk_start}.{self.eval.chunk_step}" # endregion diffusers.training_utils.set_seed(self.seed) def generate_default_dirname(self) -> str: name = "-shift" if self.pipeline.shift_activations else "" if self.quant.is_enabled(): name += f"-{self.quant.generate_default_dirname()}" if self.text is not None and self.text.is_enabled(): name += f"-text-{self.text.generate_default_dirname()}" size_name = "" if self.eval.height: size_name += f".h{self.eval.height}" if self.eval.width: size_name += f".w{self.eval.width}" if size_name: name += f"-{size_name[1:]}" sampling_name = "" if self.eval.num_steps is not None: sampling_name += f".t{self.eval.num_steps}" if self.eval.guidance_scale is not None: sampling_name += f".g{self.eval.guidance_scale}" if sampling_name: name += f"-{sampling_name[1:]}" if self.eval.num_samples != -1: name += f"-s{self.eval.num_samples}" if self.eval.chunk_only: name += f".c{self.eval.chunk_start}.{self.eval.chunk_step}" assert name[0] == "-" return name[1:] @classmethod def get_parser(cls) -> ConfigParser: """Get a parser for post-training quantization of a diffusion model. Returns: `ConfigParser`: A parser for post-training quantization of a diffusion model. """ parser = ConfigParser("Diffusion Run configuration") DiffusionQuantConfig.set_key_map(DiffusionModelStruct._get_default_key_map()) parser.add_config(cls) return parser ================================================ FILE: deepcompressor/app/diffusion/dataset/__init__.py ================================================ # -*- coding: utf-8 -*- from .base import DiffusionDataset from .calib import DiffusionCalibCacheLoader, DiffusionCalibCacheLoaderConfig ================================================ FILE: deepcompressor/app/diffusion/dataset/base.py ================================================ # -*- coding: utf-8 -*- """Dataset for diffusion models.""" import os import random import typing as tp import numpy as np import torch import torch.utils.data from torch.nn import functional as F from deepcompressor.utils.common import tree_collate, tree_map __all__ = ["DiffusionDataset"] class DiffusionDataset(torch.utils.data.Dataset): path: str filenames: list[str] filepaths: list[str] def __init__(self, path: str, num_samples: int = -1, seed: int = 0, ext: str = ".npy") -> None: if os.path.exists(path): self.path = path if "caches" in os.listdir(path): path = os.path.join(path, "caches") filenames = [f for f in sorted(os.listdir(path)) if f.endswith(ext)] if num_samples > 0 and num_samples < len(filenames): random.Random(seed).shuffle(filenames) filenames = filenames[:num_samples] filenames = sorted(filenames) self.filenames = filenames self.filepaths = [os.path.join(path, f) for f in filenames] else: raise ValueError(f"Invalid data path: {path}") def __len__(self) -> int: return len(self.filepaths) def __getitem__(self, idx) -> dict[str, tp.Any]: data = np.load(self.filepaths[idx], allow_pickle=True).item() if isinstance(data["input_args"][0], str): name = data["input_args"][0] latent = np.load(os.path.join(self.path, "latents", name)) data["input_args"][0] = latent if isinstance(data["input_kwargs"]["encoder_hidden_states"], str): name = data["input_kwargs"]["encoder_hidden_states"] text_emb = np.load(os.path.join(self.path, "text_embs", name)) data["input_kwargs"]["encoder_hidden_states"] = text_emb data = tree_map(lambda x: torch.from_numpy(x), data) # Pad encoder_hidden_states to 300 for pixart if "encoder_attention_mask" in data["input_kwargs"]: encoder_attention_mask = data["input_kwargs"]["encoder_attention_mask"] encoder_hidden_states = data["input_kwargs"]["encoder_hidden_states"] encoder_hidden_states = F.pad( encoder_hidden_states, (0, 0, 0, encoder_attention_mask.shape[1] - encoder_hidden_states.shape[1]), ) data["input_kwargs"]["encoder_hidden_states"] = encoder_hidden_states return data def build_loader(self, **kwargs) -> torch.utils.data.DataLoader: return torch.utils.data.DataLoader(self, collate_fn=tree_collate, **kwargs) ================================================ FILE: deepcompressor/app/diffusion/dataset/calib.py ================================================ # -*- coding: utf-8 -*- """Calibration dataset for diffusion models.""" import random import typing as tp from collections import OrderedDict from dataclasses import MISSING, dataclass import torch import torch.nn as nn import torch.utils.data from diffusers.models.attention import JointTransformerBlock from diffusers.models.attention_processor import Attention from diffusers.models.transformers.transformer_flux import ( FluxSingleTransformerBlock, FluxTransformerBlock, ) from omniconfig import configclass from deepcompressor.data.cache import ( IOTensorsCache, ModuleForwardInput, TensorCache, TensorsCache, ) from deepcompressor.data.utils.reshape import AttentionInputReshapeFn, LinearReshapeFn from deepcompressor.dataset.action import CacheAction, ConcatCacheAction from deepcompressor.dataset.cache import BaseCalibCacheLoader from deepcompressor.dataset.config import BaseDataLoaderConfig from ..nn.struct import DiffusionBlockStruct, DiffusionModelStruct from .base import DiffusionDataset __all__ = [ "DiffusionCalibCacheLoaderConfig", "DiffusionCalibDataset", "DiffusionConcatCacheAction", "DiffusionCalibCacheLoader", ] @configclass @dataclass(kw_only=True) class DiffusionCalibCacheLoaderConfig(BaseDataLoaderConfig): """Configuration for collecting calibration dataset for quantization. Args: data (`str`): Dataset name. num_samples (`int`): Number of dataset samples. batch_size (`int`): Batch size when loading dataset. path (`str`): Path to the dataset directory. num_workers (`int`): Number of workers for data loading. """ path: str num_workers: int = 8 def build_dataset(self) -> "DiffusionCalibDataset": """Build the calibration dataset.""" return DiffusionCalibDataset(self.path, num_samples=self.num_samples) def build_loader(self) -> "DiffusionCalibCacheLoader": """Build the data loader.""" return DiffusionCalibCacheLoader(self) class DiffusionCalibDataset(DiffusionDataset): data: list[dict[str, tp.Any]] def __init__(self, path: str, num_samples: int = -1, seed: int = 0) -> None: super().__init__(path, num_samples=num_samples, seed=seed, ext=".pt") data = [torch.load(path) for path in self.filepaths] random.Random(seed).shuffle(data) self.data = data def __len__(self) -> int: return len(self.data) def __getitem__(self, idx) -> dict[str, tp.Any]: return self.data[idx] class DiffusionConcatCacheAction(ConcatCacheAction): def info( self, name: str, module: nn.Module, tensors: dict[int | str, torch.Tensor], cache: TensorsCache, ) -> None: """Update cache information. Args: name (`str`): Module name. module (`nn.Module`): Module. tensors (`dict[int | str, torch.Tensor]`): Tensors to cache. cache (`TensorsCache`): Cache. """ if isinstance(module, Attention): encoder_hidden_states = tensors.get("encoder_hidden_states", None) if encoder_hidden_states is None: tensors.pop("encoder_hidden_states", None) cache.tensors.pop("encoder_hidden_states", None) else: encoder_hidden_states_cache = cache.tensors["encoder_hidden_states"] encoder_channels_dim = 1 if encoder_hidden_states.dim() == 4 else -1 if encoder_hidden_states_cache.channels_dim is None: encoder_hidden_states_cache.channels_dim = encoder_channels_dim if encoder_channels_dim == -1: encoder_hidden_states_cache.reshape = LinearReshapeFn() else: encoder_hidden_states_cache.reshape = AttentionInputReshapeFn(encoder_channels_dim) else: assert encoder_hidden_states_cache.channels_dim == encoder_channels_dim hidden_states, hidden_states_cache = tensors["hidden_states"], cache.tensors["hidden_states"] channels_dim = 1 if hidden_states.dim() == 4 else -1 if hidden_states_cache.channels_dim is None: hidden_states_cache.channels_dim = channels_dim if channels_dim == -1: hidden_states_cache.reshape = LinearReshapeFn() else: hidden_states_cache.reshape = AttentionInputReshapeFn(channels_dim) else: assert hidden_states_cache.channels_dim == channels_dim return super().info(name, module, tensors, cache) class DiffusionCalibCacheLoader(BaseCalibCacheLoader): config: DiffusionCalibCacheLoaderConfig dataset: DiffusionCalibDataset def __init__(self, config: DiffusionCalibCacheLoaderConfig) -> None: """Initialize the cache for the diffusion calibration dataset. Args: config (`DiffusionCalibCacheLoaderConfig`): Configuration for the calibration cache loader. """ super().__init__(dataset=config.build_dataset(), batch_size=config.batch_size) self.batch_size = min(config.batch_size, len(self.dataset)) self.config = config def _init_cache(self, name: str, module: nn.Module) -> IOTensorsCache: """Initialize cache. Args: name (`str`): Module name. module (`nn.Module`): Module. Returns: `IOTensorsCache`: Cache for inputs and outputs. """ if isinstance(module, FluxSingleTransformerBlock): return IOTensorsCache( inputs=TensorsCache( OrderedDict( hidden_states=TensorCache(channels_dim=-1, reshape=LinearReshapeFn()), temb=TensorCache(channels_dim=1, reshape=LinearReshapeFn()), ) ), outputs=TensorCache(channels_dim=-1, reshape=LinearReshapeFn()), ) elif isinstance(module, Attention): return IOTensorsCache( inputs=TensorsCache( OrderedDict( hidden_states=TensorCache(channels_dim=None, reshape=None), encoder_hidden_states=TensorCache(channels_dim=None, reshape=None), ), ), outputs=TensorCache(channels_dim=None, reshape=None), ) else: return super()._init_cache(name, module) def iter_samples(self) -> tp.Generator[ModuleForwardInput, None, None]: dataloader = self.dataset.build_loader( batch_size=self.batch_size, shuffle=False, drop_last=True, num_workers=self.config.num_workers ) for data in dataloader: yield ModuleForwardInput(args=data["input_args"], kwargs=data["input_kwargs"]) def _convert_layer_inputs( self, m: nn.Module, args: tuple[tp.Any, ...], kwargs: dict[str, tp.Any], save_all: bool = False ) -> ModuleForwardInput: """Convert layer inputs to module forward input. Args: m (`nn.Module`): Layer. args (`tuple[Any, ...]`): Layer input arguments. kwargs (`dict[str, Any]`): Layer input keyword arguments. save_all (`bool`, *optional*, defaults to `False`): Whether to save all inputs. Returns: `ModuleForwardInput`: Module forward input. """ kwargs = {k: v for k, v in kwargs.items()} # noqa: C416 if "res_hidden_states_tuple" in kwargs: kwargs["res_hidden_states_tuple"] = None if "hidden_states" in kwargs: hidden_states = kwargs.pop("hidden_states") assert len(args) == 0, f"Invalid args: {args}" else: hidden_states = args[0] if isinstance(m, (FluxTransformerBlock, JointTransformerBlock)): if "encoder_hidden_states" in kwargs: encoder_hidden_states = kwargs.pop("encoder_hidden_states") else: encoder_hidden_states = args[1] return ModuleForwardInput( args=[ hidden_states.detach().cpu() if save_all else MISSING, encoder_hidden_states.detach().cpu() if save_all else MISSING, ], kwargs=kwargs, ) else: return ModuleForwardInput( args=[hidden_states.detach().cpu() if save_all else MISSING, *args[1:]], kwargs=kwargs ) def _convert_layer_outputs(self, m: nn.Module, outputs: tp.Any) -> dict[str | int, tp.Any]: """Convert layer outputs to dictionary for updating the next layer inputs. Args: m (`nn.Module`): Layer. outputs (`Any`): Layer outputs. Returns: `dict[str | int, Any]`: Dictionary for updating the next layer inputs. """ if isinstance(m, (FluxTransformerBlock, JointTransformerBlock)): assert isinstance(outputs, tuple) and len(outputs) == 2 encoder_hidden_states, hidden_states = outputs return {0: hidden_states.detach().cpu(), 1: encoder_hidden_states.detach().cpu()} else: return super()._convert_layer_outputs(m, outputs) def iter_layer_activations( # noqa: C901 self, model: nn.Module | DiffusionModelStruct, *args, needs_inputs_fn: tp.Callable[[str, nn.Module], bool], needs_outputs_fn: tp.Callable[[str, nn.Module], bool] | None = None, action: CacheAction | None = None, skip_pre_modules: bool = True, skip_post_modules: bool = True, **kwargs, ) -> tp.Generator[ tuple[ str, tuple[ DiffusionBlockStruct | nn.Module, dict[str, IOTensorsCache], dict[str, tp.Any], ], ], None, None, ]: """Iterate over model activations in layers. Args: model (`nn.Module`): Model. action (`CacheAction`): Action for caching activations. needs_inputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `True`): Function for determining whether to cache inputs for a module given its name and itself. needs_outputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `None`): Function for determining whether to cache outputs for a module given its name and itself. *args: Arguments for ``iter_samples``. **kwargs: Keyword arguments for ``iter_samples``. Yields: Generator[ tuple[str, tuple[DiffusionBlockStruct | nn.Module, dict[str, IOTensorsCache], dict[str, tp.Any]]], None, None ]: Generator of tuple of - layer name - a tuple of - layer itself - inputs and outputs cache of each module in the layer - layer input arguments """ if not isinstance(model, DiffusionModelStruct): model_struct = DiffusionModelStruct.construct(model) else: model_struct = model model = model_struct.module assert isinstance(model_struct, DiffusionModelStruct) assert isinstance(model, nn.Module) action = DiffusionConcatCacheAction("cpu") if action is None else action layers, layer_structs, recomputes, use_prev_layer_outputs = model_struct.get_iter_layer_activations_args( skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules, **self.dataset[0]["input_kwargs"], ) for layer_idx, (layer_name, (layer, layer_cache, layer_inputs)) in enumerate( self._iter_layer_activations( model, *args, action=action, layers=layers, needs_inputs_fn=needs_inputs_fn, needs_outputs_fn=needs_outputs_fn, recomputes=recomputes, use_prev_layer_outputs=use_prev_layer_outputs, **kwargs, ) ): layer_kwargs = {k: v for k, v in layer_inputs[0].kwargs.items()} # noqa: C416 layer_kwargs.pop("hidden_states", None) layer_kwargs.pop("encoder_hidden_states", None) layer_kwargs.pop("temb", None) layer_struct = layer_structs[layer_idx] if isinstance(layer_struct, DiffusionBlockStruct): assert layer_struct.name == layer_name assert layer is layer_struct.module for transformer_block_struct in layer_struct.iter_transformer_block_structs(): for attn_struct in transformer_block_struct.iter_attention_structs(): if attn_struct.q_proj_name in layer_cache: if not attn_struct.is_cross_attn(): cache = layer_cache[attn_struct.q_proj_name] layer_cache[attn_struct.k_proj_name] = cache layer_cache[attn_struct.v_proj_name] = cache if attn_struct.add_k_proj_name in layer_cache: assert not attn_struct.is_self_attn() cache = layer_cache[attn_struct.add_k_proj_name] layer_cache[attn_struct.add_v_proj_name] = cache if attn_struct.is_joint_attn(): layer_cache[attn_struct.add_q_proj_name] = cache ffn_struct = transformer_block_struct.ffn_struct num_experts = ffn_struct.config.num_experts if ffn_struct is not None and num_experts > 1: for expert_idx in range(num_experts): if ffn_struct.up_proj_names[expert_idx] in layer_cache: cache = layer_cache[ffn_struct.up_proj_names[expert_idx]] for up_proj_name in ffn_struct.up_proj_names[expert_idx::num_experts]: layer_cache[up_proj_name] = cache if ffn_struct.down_proj_names[expert_idx] in layer_cache: cache = layer_cache[ffn_struct.down_proj_names[expert_idx]] for down_proj_name in ffn_struct.down_proj_names[expert_idx::num_experts]: layer_cache[down_proj_name] = cache yield layer_name, (layer_struct, layer_cache, layer_kwargs) ================================================ FILE: deepcompressor/app/diffusion/dataset/collect/calib.py ================================================ # -*- coding: utf-8 -*- """Collect calibration dataset.""" import os from dataclasses import dataclass import datasets import torch from omniconfig import configclass from torch import nn from tqdm import tqdm from deepcompressor.app.diffusion.config import DiffusionPtqRunConfig from deepcompressor.utils.common import hash_str_to_int, tree_map from ...utils import get_control from ..data import get_dataset from .utils import CollectHook def process(x: torch.Tensor) -> torch.Tensor: dtype = x.dtype return torch.from_numpy(x.float().numpy()).to(dtype) def collect(config: DiffusionPtqRunConfig, dataset: datasets.Dataset): samples_dirpath = os.path.join(config.output.root, "samples") caches_dirpath = os.path.join(config.output.root, "caches") os.makedirs(samples_dirpath, exist_ok=True) os.makedirs(caches_dirpath, exist_ok=True) caches = [] pipeline = config.pipeline.build() model = pipeline.unet if hasattr(pipeline, "unet") else pipeline.transformer assert isinstance(model, nn.Module) model.register_forward_hook(CollectHook(caches=caches), with_kwargs=True) batch_size = config.eval.batch_size print(f"In total {len(dataset)} samples") print(f"Evaluating with batch size {batch_size}") pipeline.set_progress_bar_config(desc="Sampling", leave=False, dynamic_ncols=True, position=1) for batch in tqdm( dataset.iter(batch_size=batch_size, drop_last_batch=False), desc="Data", leave=False, dynamic_ncols=True, total=(len(dataset) + batch_size - 1) // batch_size, ): filenames = batch["filename"] prompts = batch["prompt"] seeds = [hash_str_to_int(name) for name in filenames] generators = [torch.Generator(device=pipeline.device).manual_seed(seed) for seed in seeds] pipeline_kwargs = config.eval.get_pipeline_kwargs() task = config.pipeline.task control_root = config.eval.control_root if task in ["canny-to-image", "depth-to-image", "inpainting"]: controls = get_control( task, batch["image"], names=batch["filename"], data_root=os.path.join( control_root, collect_config.dataset_name, f"{dataset.config_name}-{config.eval.num_samples}" ), ) if task == "inpainting": pipeline_kwargs["image"] = controls[0] pipeline_kwargs["mask_image"] = controls[1] else: pipeline_kwargs["control_image"] = controls result_images = pipeline(prompts, generator=generators, **pipeline_kwargs).images num_guidances = (len(caches) // batch_size) // config.eval.num_steps num_steps = len(caches) // (batch_size * num_guidances) assert ( len(caches) == batch_size * num_steps * num_guidances ), f"Unexpected number of caches: {len(caches)} != {batch_size} * {config.eval.num_steps} * {num_guidances}" for j, (filename, image) in enumerate(zip(filenames, result_images, strict=True)): image.save(os.path.join(samples_dirpath, f"{filename}.png")) for s in range(num_steps): for g in range(num_guidances): c = caches[s * batch_size * num_guidances + g * batch_size + j] c["filename"] = filename c["step"] = s c["guidance"] = g c = tree_map(lambda x: process(x), c) torch.save(c, os.path.join(caches_dirpath, f"{filename}-{s:05d}-{g}.pt")) caches.clear() @configclass @dataclass class CollectConfig: """Configuration for collecting calibration dataset. Args: root (`str`, *optional*, defaults to `"datasets"`): Root directory to save the collected dataset. dataset_name (`str`, *optional*, defaults to `"qdiff"`): Name of the collected dataset. prompt_path (`str`, *optional*, defaults to `"prompts/qdiff.yaml"`): Path to the prompt file. num_samples (`int`, *optional*, defaults to `128`): Number of samples to collect. """ root: str = "datasets" dataset_name: str = "qdiff" data_path: str = "prompts/qdiff.yaml" num_samples: int = 128 if __name__ == "__main__": parser = DiffusionPtqRunConfig.get_parser() parser.add_config(CollectConfig, scope="collect", prefix="collect") configs, _, unused_cfgs, unused_args, unknown_args = parser.parse_known_args() ptq_config, collect_config = configs[""], configs["collect"] assert isinstance(ptq_config, DiffusionPtqRunConfig) assert isinstance(collect_config, CollectConfig) if len(unused_cfgs) > 0: print(f"Warning: unused configurations {unused_cfgs}") if unused_args is not None: print(f"Warning: unused arguments {unused_args}") assert len(unknown_args) == 0, f"Unknown arguments: {unknown_args}" collect_dirpath = os.path.join( collect_config.root, str(ptq_config.pipeline.dtype), ptq_config.pipeline.name, ptq_config.eval.protocol, collect_config.dataset_name, f"s{collect_config.num_samples}", ) print(f"Saving caches to {collect_dirpath}") dataset = get_dataset( collect_config.data_path, max_dataset_size=collect_config.num_samples, return_gt=ptq_config.pipeline.task in ["canny-to-image"], repeat=1, ) ptq_config.output.root = collect_dirpath os.makedirs(ptq_config.output.root, exist_ok=True) collect(ptq_config, dataset=dataset) ================================================ FILE: deepcompressor/app/diffusion/dataset/collect/utils.py ================================================ # -*- coding: utf-8 -*- """Common utilities for collecting data.""" import inspect import typing as tp import torch import torch.nn as nn from diffusers.models.transformers import ( FluxTransformer2DModel, PixArtTransformer2DModel, SanaTransformer2DModel, ) from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from deepcompressor.utils.common import tree_map, tree_split __all__ = ["CollectHook"] class CollectHook: def __init__(self, caches: list[dict[str, tp.Any]] = None, zero_redundancy: bool = False) -> None: self.caches = [] if caches is None else caches self.zero_redundancy = zero_redundancy def __call__( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tuple[torch.Tensor, ...], ) -> tp.Any: new_args = [] signature = inspect.signature(module.forward) bound_arguments = signature.bind(*input_args, **input_kwargs) arguments = bound_arguments.arguments args_to_kwargs = {k: v for k, v in arguments.items() if k not in input_kwargs} input_kwargs.update(args_to_kwargs) if isinstance(module, UNet2DConditionModel): sample = input_kwargs.pop("sample") new_args.append(sample) timestep = input_kwargs["timestep"] timesteps = timestep if not torch.is_tensor(timesteps): is_mps = sample.device.type == "mps" if isinstance(timestep, float): dtype = torch.float32 if is_mps else torch.float64 else: dtype = torch.int32 if is_mps else torch.int64 timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device) elif len(timesteps.shape) == 0: timesteps = timesteps[None].to(sample.device) # broadcast to batch dimension in a way that's compatible with ONNX/Core ML timesteps = timesteps.expand(sample.shape[0]) input_kwargs["timestep"] = timesteps elif isinstance(module, (PixArtTransformer2DModel, SanaTransformer2DModel)): new_args.append(input_kwargs.pop("hidden_states")) elif isinstance(module, FluxTransformer2DModel): new_args.append(input_kwargs.pop("hidden_states")) else: raise ValueError(f"Unknown model: {module}") cache = tree_map(lambda x: x.cpu(), {"input_args": new_args, "input_kwargs": input_kwargs, "outputs": output}) split_cache = tree_split(cache) if isinstance(module, PixArtTransformer2DModel) and self.zero_redundancy: for cache in split_cache: cache_kwargs = cache["input_kwargs"] encoder_hidden_states = cache_kwargs.pop("encoder_hidden_states") assert encoder_hidden_states.shape[0] == 1 encoder_attention_mask = cache_kwargs.get("encoder_attention_mask", None) if encoder_attention_mask is not None: encoder_hidden_states = encoder_hidden_states[:, : max(encoder_attention_mask.sum(), 1)] cache_kwargs["encoder_hidden_states"] = encoder_hidden_states self.caches.extend(split_cache) ================================================ FILE: deepcompressor/app/diffusion/dataset/data/COCO/COCO.py ================================================ # coding=utf-8 # Copyright 2022 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """COCO""" import json import os import random from pathlib import Path import datasets from PIL import Image _CITATION = """ @article{DBLP:journals/corr/LinMBHPRDZ14, author = {Tsung{-}Yi Lin and Michael Maire and Serge J. Belongie and Lubomir D. Bourdev and Ross B. Girshick and James Hays and Pietro Perona and Deva Ramanan and Piotr Doll{\'{a}}r and C. Lawrence Zitnick}, title = {Microsoft {COCO:} Common Objects in Context}, journal = {CoRR}, volume = {abs/1405.0312}, year = {2014}, url = {http://arxiv.org/abs/1405.0312}, eprinttype = {arXiv}, eprint = {1405.0312}, timestamp = {Mon, 13 Aug 2018 16:48:13 +0200}, biburl = {https://dblp.org/rec/journals/corr/LinMBHPRDZ14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } """ _DESCRIPTION = """ MS COCO is a large-scale object detection, segmentation, and captioning dataset. COCO has several features: Object segmentation, Recognition in context, Superpixel stuff segmentation, 330K images (>200K labeled), 1.5 million object instances, 80 object categories, 91 stuff categories, 5 captions per image, 250,000 people with keypoints. """ _HOMEPAGE = "https://cocodataset.org/#home" _LICENSE = "CC BY 4.0" _IMAGES_URLS = { "train": "http://images.cocodataset.org/zips/train2014.zip", "validation": "http://images.cocodataset.org/zips/val2014.zip", } _KARPATHY_FILES_URL = "https://cs.stanford.edu/people/karpathy/deepimagesent/caption_datasets.zip" _FEATURES = datasets.Features( { "filepath": datasets.Value("string"), "filename": datasets.Value("string"), "image": datasets.Image(), "image_path": datasets.Value("string"), "image_root": datasets.Value("string"), "prompt": datasets.Value("string"), "prompt_id": datasets.Value("int32"), "imgid": datasets.Value("int32"), "split": datasets.Value("string"), "cocoid": datasets.Value("int32"), "sentences_raw": [datasets.Value("string")], "sentids": [datasets.Value("int32")], "sentences_sentid": [datasets.Value("int32")], "sentences_tokens": [[datasets.Value("string")]], } ) def hash_string_to_int(s: str) -> int: modulus = 10**9 + 7 # Large prime modulus hash_int = 0 for char in s: hash_int = (hash_int * 31 + ord(char)) % modulus return hash_int class COCOConfig(datasets.BuilderConfig): def __init__(self, max_dataset_size: int = -1, return_gt: bool = False, **kwargs): super(COCOConfig, self).__init__( name=kwargs.get("name", "default"), version=kwargs.get("version", "0.0.0"), data_dir=kwargs.get("data_dir", None), data_files=kwargs.get("data_files", None), description=kwargs.get("description", None), ) self.max_dataset_size = max_dataset_size self.return_gt = return_gt class COCO(datasets.GeneratorBasedBuilder): """COCO""" VERSION = datasets.Version("0.0.0") BUILDER_CONFIG_CLASS = COCOConfig BUILDER_CONFIGS = [ COCOConfig(name="COCO_val", version=VERSION, description="COCO validation prompt set"), COCOConfig(name="COCO_train", version=VERSION, description="COCO train prompt set"), COCOConfig(name="COCO_full", version=VERSION, description="COCO full prompt set"), ] DEFAULT_CONFIG_NAME = "COCO_val" def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=_FEATURES, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, ) def _split_generators(self, dl_manager: datasets.download.DownloadManager): annotation_file = os.path.join(dl_manager.download_and_extract(_KARPATHY_FILES_URL), "dataset_coco.json") image_folders = {k: Path(v) for k, v in dl_manager.download_and_extract(_IMAGES_URLS).items()} if self.config.name == "COCO_full": split_keys = ["validation", "train"] else: split_keys = [self.config.name.split("_")[-1]] return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={ "annotation_file": annotation_file, "image_folders": image_folders, "split_keys": split_keys, }, ), ] def _generate_examples( self, annotation_file: str, image_folders: dict[str, str], split_keys: list[str] | tuple[str, ...] ): with open(annotation_file, "r", encoding="utf-8") as fi: annotations = json.load(fi) metas = [] for split_key in split_keys: for image_metadata in annotations["images"]: if split_key == "train": if image_metadata["split"] != "train" and image_metadata["split"] != "restval": continue elif split_key == "val": if image_metadata["split"] != "val": continue elif split_key == "test": if image_metadata["split"] != "test": continue metas.append(image_metadata) if self.config.max_dataset_size > 0: random.Random(0).shuffle(metas) metas = metas[: self.config.max_dataset_size] metas = sorted(metas, key=lambda x: x["filename"]) for i, meta in enumerate(metas): if "val2014" in meta["filename"]: image_root = os.path.join(image_folders["validation"], "val2014") else: image_root = os.path.join(image_folders["train"], "train2014") filename = meta["filename"].replace(".jpg", "").replace(".png", "") image_path = os.path.join(image_root, filename + ".jpg") sentences_raw = [caption["raw"] for caption in meta["sentences"]] prompt_id = hash_string_to_int(filename) % len(sentences_raw) prompt = sentences_raw[prompt_id] yield ( i, { "filename": filename, "image": Image.open(image_path) if self.config.return_gt else None, "image_path": image_path, "image_root": image_root, "prompt": prompt, "prompt_id": prompt_id, "imgid": meta["imgid"], "split": self.config.name, "coco_id": meta["cocoid"], "sentences_raw": sentences_raw, "sentids": meta["sentids"], "sentences_sentid": [caption["sentid"] for caption in meta["sentences"]], "sentences_tokens": [caption["tokens"] for caption in meta["sentences"]], }, ) ================================================ FILE: deepcompressor/app/diffusion/dataset/data/COCO/__init__.py ================================================ ================================================ FILE: deepcompressor/app/diffusion/dataset/data/DCI/DCI.py ================================================ import os import random import datasets import yaml from PIL import Image _CITATION = """\ @InProceedings{Urbanek_2024_CVPR, author = {Urbanek, Jack and Bordes, Florian and Astolfi, Pietro and Williamson, Mary and Sharma, Vasu and Romero-Soriano, Adriana}, title = {A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style Models on Dense Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {26700-26709} } """ # noqa: E501 _DESCRIPTION = """\ The Densely Captioned Images dataset, or DCI, consists of 7805 images from SA-1B, each with a complete description aiming to capture the full visual detail of what is present in the image. Much of the description is directly aligned to submasks of the image. """ _HOMEPAGE = "https://github.com/facebookresearch/DCI" _LICENSE = "Attribution-NonCommercial 4.0 International (https://github.com/facebookresearch/DCI/blob/main/LICENSE)" IMAGE_URL = "https://huggingface.co/datasets/mit-han-lab/svdquant-datasets/resolve/main/sDCI.gz" PROMPT_URLS = {"sDCI": "https://huggingface.co/datasets/mit-han-lab/svdquant-datasets/resolve/main/sDCI.yaml"} class DCIConfig(datasets.BuilderConfig): def __init__(self, max_dataset_size: int = -1, return_gt: bool = False, **kwargs): super(DCIConfig, self).__init__( name=kwargs.get("name", "default"), version=kwargs.get("version", "0.0.0"), data_dir=kwargs.get("data_dir", None), data_files=kwargs.get("data_files", None), description=kwargs.get("description", None), ) self.max_dataset_size = max_dataset_size self.return_gt = return_gt class DCI(datasets.GeneratorBasedBuilder): VERSION = datasets.Version("0.0.0") BUILDER_CONFIG_CLASS = DCIConfig BUILDER_CONFIGS = [DCIConfig(name="sDCI", version=VERSION, description="sDCI full prompt set")] DEFAULT_CONFIG_NAME = "sDCI" def _info(self): features = datasets.Features( { "filename": datasets.Value("string"), "image": datasets.Image(), "prompt": datasets.Value("string"), "meta_path": datasets.Value("string"), "image_root": datasets.Value("string"), "image_path": datasets.Value("string"), "split": datasets.Value("string"), } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION ) def _split_generators(self, dl_manager: datasets.download.DownloadManager): image_url = IMAGE_URL meta_url = PROMPT_URLS[self.config.name] meta_path = dl_manager.download(meta_url) image_root = dl_manager.download_and_extract(image_url) return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={"meta_path": meta_path, "image_root": image_root} ) ] def _generate_examples(self, meta_path: str, image_root: str): meta = yaml.safe_load(open(meta_path, "r")) names = list(meta.keys()) if self.config.max_dataset_size > 0: random.Random(0).shuffle(names) names = names[: self.config.max_dataset_size] names = sorted(names) for i, name in enumerate(names): prompt = meta[name] image_path = os.path.join(image_root, f"{name}.jpg") yield ( i, { "filename": name, "image": Image.open(image_path) if self.config.return_gt else None, "prompt": prompt, "meta_path": meta_path, "image_root": image_root, "image_path": image_path, "split": self.config.name, }, ) ================================================ FILE: deepcompressor/app/diffusion/dataset/data/DCI/__init__.py ================================================ ================================================ FILE: deepcompressor/app/diffusion/dataset/data/MJHQ/MJHQ.py ================================================ import json import os import random import datasets from PIL import Image _CITATION = """\ @misc{li2024playground, title={Playground v2.5: Three Insights towards Enhancing Aesthetic Quality in Text-to-Image Generation}, author={Daiqing Li and Aleks Kamko and Ehsan Akhgari and Ali Sabet and Linmiao Xu and Suhail Doshi}, year={2024}, eprint={2402.17245}, archivePrefix={arXiv}, primaryClass={cs.CV} } """ _DESCRIPTION = """\ We introduce a new benchmark, MJHQ-30K, for automatic evaluation of a model’s aesthetic quality. The benchmark computes FID on a high-quality dataset to gauge aesthetic quality. """ _HOMEPAGE = "https://huggingface.co/datasets/playgroundai/MJHQ-30K" _LICENSE = ( "Playground v2.5 Community License " "(https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/blob/main/LICENSE.md)" ) IMAGE_URL = "https://huggingface.co/datasets/playgroundai/MJHQ-30K/resolve/main/mjhq30k_imgs.zip" META_URL = "https://huggingface.co/datasets/playgroundai/MJHQ-30K/resolve/main/meta_data.json" class MJHQConfig(datasets.BuilderConfig): def __init__(self, max_dataset_size: int = -1, return_gt: bool = False, **kwargs): super(MJHQConfig, self).__init__( name=kwargs.get("name", "default"), version=kwargs.get("version", "0.0.0"), data_dir=kwargs.get("data_dir", None), data_files=kwargs.get("data_files", None), description=kwargs.get("description", None), ) self.max_dataset_size = max_dataset_size self.return_gt = return_gt class DCI(datasets.GeneratorBasedBuilder): VERSION = datasets.Version("0.0.0") BUILDER_CONFIG_CLASS = MJHQConfig BUILDER_CONFIGS = [MJHQConfig(name="MJHQ", version=VERSION, description="MJHQ-30K full dataset")] DEFAULT_CONFIG_NAME = "MJHQ" def _info(self): features = datasets.Features( { "filename": datasets.Value("string"), "category": datasets.Value("string"), "image": datasets.Image(), "prompt": datasets.Value("string"), "prompt_path": datasets.Value("string"), "image_root": datasets.Value("string"), "image_path": datasets.Value("string"), "split": datasets.Value("string"), } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION ) def _split_generators(self, dl_manager: datasets.download.DownloadManager): meta_path = dl_manager.download(META_URL) image_root = dl_manager.download_and_extract(IMAGE_URL) return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={"meta_path": meta_path, "image_root": image_root} ), ] def _generate_examples(self, meta_path: str, image_root: str): with open(meta_path, "r") as f: meta = json.load(f) names = list(meta.keys()) if self.config.max_dataset_size > 0: random.Random(0).shuffle(names) names = names[: self.config.max_dataset_size] names = sorted(names) for i, name in enumerate(names): category = meta[name]["category"] prompt = meta[name]["prompt"] image_path = os.path.join(image_root, category, f"{name}.jpg") yield ( i, { "filename": name, "category": category, "image": Image.open(image_path) if self.config.return_gt else None, "prompt": prompt, "meta_path": meta_path, "image_root": image_root, "image_path": image_path, "split": self.config.name, }, ) ================================================ FILE: deepcompressor/app/diffusion/dataset/data/MJHQ/__init__.py ================================================ ================================================ FILE: deepcompressor/app/diffusion/dataset/data/__init__.py ================================================ import os import random import datasets import yaml __all__ = ["get_dataset"] def load_dataset_yaml(meta_path: str, max_dataset_size: int = -1, repeat: int = 4) -> dict: meta = yaml.safe_load(open(meta_path, "r")) names = list(meta.keys()) if max_dataset_size > 0: random.Random(0).shuffle(names) names = names[:max_dataset_size] names = sorted(names) ret = {"filename": [], "prompt": [], "meta_path": []} idx = 0 for name in names: prompt = meta[name] for j in range(repeat): ret["filename"].append(f"{name}-{j}") ret["prompt"].append(prompt) ret["meta_path"].append(meta_path) idx += 1 return ret def get_dataset( name: str, config_name: str | None = None, split: str = "train", max_dataset_size: int = -1, return_gt: bool = False, repeat: int = 4, chunk_start: int = 0, chunk_step: int = 1, ) -> datasets.Dataset: prefix = os.path.dirname(__file__) kwargs = { "name": config_name, "split": split, "trust_remote_code": True, "token": True, "max_dataset_size": max_dataset_size, } if name.endswith((".yaml", ".yml")): dataset = datasets.Dataset.from_dict( load_dataset_yaml(name, max_dataset_size=max_dataset_size, repeat=repeat), features=datasets.Features( { "filename": datasets.Value("string"), "prompt": datasets.Value("string"), "meta_path": datasets.Value("string"), } ), ) else: path = os.path.join(prefix, f"{name}") if name == "COCO": dataset = datasets.load_dataset(path, return_gt=return_gt, **kwargs) elif name == "DCI": dataset = datasets.load_dataset(path, return_gt=return_gt, **kwargs) elif name == "MJHQ": dataset = datasets.load_dataset(path, return_gt=return_gt, **kwargs) else: raise ValueError(f"Unknown dataset name: {name}") assert not hasattr(dataset, "_unchunk_size") assert not hasattr(dataset, "_chunk_start") assert not hasattr(dataset, "_chunk_step") unchunk_size = len(dataset) if chunk_step > 1 or chunk_start > 0: assert 0 <= chunk_start < chunk_step dataset = dataset.select(range(chunk_start, len(dataset), chunk_step)) else: chunk_start, chunk_step = 0, 1 dataset._unchunk_size = unchunk_size dataset._chunk_start = chunk_start dataset._chunk_step = chunk_step return dataset ================================================ FILE: deepcompressor/app/diffusion/dataset/data/dump.py ================================================ import argparse import os import yaml from tqdm import tqdm from ...utils import get_control from . import get_dataset if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--benchmarks", type=str, nargs="*", default=["COCO", "DCI", "MJHQ"]) parser.add_argument("--max-dataset-size", type=int, default=-1) parser.add_argument("--dump-root", type=str, default="benchmarks") parser.add_argument("--copy-images", action="store_true") parser.add_argument("--prompts-only", action="store_true") parser.add_argument("--controls", type=str, nargs="*", default=["canny-to-image", "depth-to-image", "inpainting"]) parser.add_argument("--chunk-start", type=int, default=0) parser.add_argument("--chunk-step", type=int, default=1) args = parser.parse_args() if "depth-to-image" in args.controls: from image_gen_aux import DepthPreprocessor processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf").to("cuda") for benchmark in args.benchmarks: dataset = get_dataset( benchmark, max_dataset_size=args.max_dataset_size, return_gt=True, chunk_start=args.chunk_start, chunk_step=args.chunk_step, ) prompts = {} benchmark_root = os.path.join(args.dump_root, benchmark, f"{dataset.config_name}-{dataset._unchunk_size}") for row in tqdm(dataset, desc=f"Dumping {dataset.config_name}"): prompts[row["filename"]] = row["prompt"] if not args.prompts_only: image = row.get("image", None) if image is not None: image_root = os.path.join(benchmark_root, "images") os.makedirs(image_root, exist_ok=True) if args.copy_images: image.save(os.path.join(image_root, row["filename"] + ".png")) else: ext = os.path.basename(row["image_path"]).split(".")[-1] os.symlink( os.path.abspath(os.path.expanduser(row["image_path"])), os.path.abspath(os.path.expanduser(os.path.join(image_root, row["filename"] + f".{ext}"))), ) if "canny-to-image" in args.controls: canny_root = os.path.join(benchmark_root, "canny_images") os.makedirs(canny_root, exist_ok=True) canny = get_control("canny-to-image", image) canny.save(os.path.join(canny_root, row["filename"] + ".png")) if "depth-to-image" in args.controls: depth_root = os.path.join(benchmark_root, "depth_images") os.makedirs(depth_root, exist_ok=True) depth = get_control("depth-to-image", image, processor=processor) depth.save(os.path.join(depth_root, row["filename"] + ".png")) if "inpainting" in args.controls: mask_root = os.path.join(benchmark_root, "mask_images") cropped_image_root = os.path.join(benchmark_root, "cropped_images") os.makedirs(mask_root, exist_ok=True) os.makedirs(cropped_image_root, exist_ok=True) cropped_image, mask_image = get_control("inpainting", image, names=row["filename"]) cropped_image.save(os.path.join(cropped_image_root, row["filename"] + ".png")) mask_image.save(os.path.join(mask_root, row["filename"] + ".png")) if args.chunk_step == 1: os.makedirs(benchmark_root, exist_ok=True) with open(os.path.join(benchmark_root, "prompts.yaml"), "w") as f: yaml.dump(prompts, f) ================================================ FILE: deepcompressor/app/diffusion/eval/__init__.py ================================================ # -*- coding: utf-8 -*- from .config import DiffusionEvalConfig ================================================ FILE: deepcompressor/app/diffusion/eval/config.py ================================================ # -*- coding: utf-8 -*- """Diffusion model evaluation.""" import logging import os import typing as tp from dataclasses import dataclass, field import datasets import diffusers import omniconfig import torch from diffusers import DiffusionPipeline from omniconfig import configclass from torch import multiprocessing as mp from tqdm import tqdm from deepcompressor.app.diffusion.dataset.data import get_dataset from deepcompressor.utils.common import hash_str_to_int from ..utils import get_control from .metrics import compute_image_metrics __all__ = ["DiffusionEvalConfig"] @configclass @dataclass class DiffusionEvalConfig: """Diffusion model evaluation configuration. Args: protocol (`str`): The protocol of the evaluation pipeline. num_gpus (`int`, *optional*, defaults to `1`): The number of GPUs to use. batch_size (`int`, *optional*, defaults to `1`): The batch size used for inference. batch_size_per_gpu (`int`, *optional*, defaults to `None`): The batch size per GPU. height (`int`, *optional*, defaults to `None`): The height of the generated images. width (`int`, *optional*, defaults to `None`): The width of the generated images. clean_caption (`bool`, *optional*, defaults to `None`): Whether to clean the caption. num_steps (`int`, *optional*, defaults to `None`): The number of inference steps. guidance_scale (`float`, *optional*, defaults to `None`): The guidance scale. num_samples (`int`, *optional*, defaults to `1024`): The number of samples to generate. benchmarks (`list[str]`, *optional*, defaults to `["COCO", "DCI", "MJHQ", "GenEval"]`): The benchmark datasets to evaluate on. gt_metrics (`list[str]`, *optional*, defaults to `["clip_iqa", "clip_score", "psnr", "lpips", "ssim", "fid"]`): The ground truth metrics to compute. ref_metrics (`list[str]`, *optional*, defaults to `["psnr", "lpips", "ssim", "fid"]`): The reference metrics to compute. ref_root (`str`, *optional*, defaults to `""`): The root directory path to the reference images. gt_stats_root (`str`, *optional*, defaults to `""`): The root directory path to the ground truth statistics. chunk_start (`int`, *optional*, defaults to `0`): The starting chunk index. chunk_step (`int`, *optional*, defaults to `1`): The chunk step size. """ protocol: str num_gpus: int = field(default=1, metadata={omniconfig.ARGPARSE_ARGS: ("--num-gpus", "-n")}) batch_size: int = 1 batch_size_per_gpu: int | None = None height: int | None = None width: int | None = None clean_caption: bool | None = None num_steps: int | None = None guidance_scale: float | None = None num_samples: int = 1024 benchmarks: list[str] = field( default_factory=lambda: ["COCO", "DCI", "MJHQ", "GenEval"], metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": str}}, ) gt_metrics: list[str] = field( default_factory=lambda: ["clip_iqa", "clip_score", "image_reward", "fid"], metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": str}}, ) ref_metrics: list[str] = field( default_factory=lambda: ["psnr", "lpips", "ssim"], metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": str}}, ) gen_root: str = "" ref_root: str = "" gt_stats_root: str = "" control_root: str | None = None chunk_start: int = 0 chunk_step: int = 1 chunk_only: bool = False def __post_init__(self): assert self.protocol self.protocol = self.protocol.lower().format(num_steps=self.num_steps, guidance_scale=self.guidance_scale) assert 0 <= self.chunk_start < self.chunk_step if self.chunk_start == 0 and self.chunk_step == 1: self.chunk_only = False def get_pipeline_kwargs(self) -> dict[str, tp.Any]: kwargs = {} if self.height is not None: kwargs["height"] = self.height if self.width is not None: kwargs["width"] = self.width if self.clean_caption is not None: kwargs["clean_caption"] = self.clean_caption if self.num_steps is not None: kwargs["num_inference_steps"] = self.num_steps if self.guidance_scale is not None: kwargs["guidance_scale"] = self.guidance_scale return kwargs def _generate( self, rank: int, dataset: datasets.Dataset, pipeline: DiffusionPipeline, dirpath: str, logger: logging.Logger, dataset_name: str | None = None, task: str = "text-to-image", control_root: str | None = None, ) -> None: if self.num_gpus > 1: pipeline = pipeline.to(rank) if rank == 0: logger.info( f" {dataset.config_name} has {len(dataset)} samples " f"(chunk_start={dataset._chunk_start}, chunk_step={dataset._chunk_step}," f" unchunk_size={dataset._unchunk_size})" ) pipeline.set_progress_bar_config( desc="Sampling", leave=False, dynamic_ncols=True, position=1, disable=self.num_gpus > 1, ) if dataset_name is None: dataset_name = dataset.config_name for batch in tqdm( dataset.iter(batch_size=self.batch_size, drop_last_batch=False), desc=dataset_name if self.num_gpus == 1 else f"{dataset_name} (GPU {rank})", leave=False, dynamic_ncols=True, position=rank, total=(len(dataset) + self.batch_size - 1) // self.batch_size, ): filenames = batch["filename"][rank :: self.num_gpus] if len(filenames) == 0: continue if all(os.path.exists(os.path.join(dirpath, f"{filename}.png")) for filename in filenames): continue prompts = batch["prompt"][rank :: self.num_gpus] seeds = [hash_str_to_int(name) for name in filenames] diffusers.training_utils.set_seed(seeds[0]) generators = [torch.Generator().manual_seed(seed) for seed in seeds] pipeline_kwargs = self.get_pipeline_kwargs() if task in ["canny-to-image", "depth-to-image", "inpainting"]: controls = get_control( task, batch["image"], names=batch["filename"], data_root=os.path.join(control_root, f"{dataset_name}-{dataset._unchunk_size}"), ) if task == "inpainting": pipeline_kwargs["image"] = controls[0] pipeline_kwargs["mask_image"] = controls[1] else: pipeline_kwargs["control_image"] = controls output = pipeline(prompts, generator=generators, **pipeline_kwargs) images = output.images for filename, image in zip(filenames, images, strict=True): image.save(os.path.join(dirpath, f"{filename}.png")) def generate( self, pipeline: DiffusionPipeline, gen_root: str = "", task: str = "text-to-image", ) -> None: logger = logging.getLogger(f"{__name__}.DiffusionEval") gen_root = gen_root or self.gen_root for benchmark in self.benchmarks: dataset = get_dataset( benchmark, max_dataset_size=self.num_samples, chunk_start=self.chunk_start, chunk_step=self.chunk_step, return_gt=task in ["canny-to-image"], repeat=1, ) if benchmark.endswith(".yaml") or benchmark.endswith(".yml"): dataset_name = os.path.splitext(os.path.basename(benchmark))[0] dirpath = os.path.join( gen_root, "samples", "YAML", f"{dataset_name}-{dataset._unchunk_size}", ) else: dataset_name = dataset.config_name dirpath = os.path.join( gen_root, "samples", benchmark, f"{dataset.config_name}-{dataset._unchunk_size}", ) if self.chunk_only: dirpath += f".{dataset._chunk_start}.{dataset._chunk_step}" os.makedirs(dirpath, exist_ok=True) args = (dataset, pipeline, dirpath, logger, dataset_name, task, os.path.join(self.control_root, benchmark)) if self.num_gpus == 1: self._generate(0, *args) else: mp.spawn(self._generate, args=args, nprocs=self.num_gpus, join=True) def evaluate( self, pipeline: DiffusionPipeline, gen_root: str = "", skip_gen: bool = False, task: str = "text-to-image" ) -> dict[str, tp.Any] | None: gen_root = gen_root or self.gen_root if not skip_gen: self.generate(pipeline, gen_root=gen_root, task=task) if not self.chunk_only: return compute_image_metrics( gen_root=gen_root, benchmarks=self.benchmarks, max_dataset_size=self.num_samples, chunk_start=self.chunk_start, chunk_step=self.chunk_step, ref_root=self.ref_root, gt_stats_root=self.gt_stats_root, gt_metrics=self.gt_metrics, ref_metrics=self.ref_metrics, ) else: return {} ================================================ FILE: deepcompressor/app/diffusion/eval/metrics/__init__.py ================================================ import logging import os from deepcompressor.app.diffusion.dataset.data import get_dataset from .fid import compute_fid from .image_reward import compute_image_reward from .multimodal import compute_image_multimodal_metrics from .similarity import compute_image_similarity_metrics logging.getLogger("PIL").setLevel(logging.WARNING) __all__ = ["compute_image_metrics"] def compute_image_metrics( gen_root: str, benchmarks: str | tuple[str, ...] = ("DCI", "GenAIBench", "GenEval", "MJHQ", "T2ICompBench"), max_dataset_size: int = -1, chunk_start: int = 0, chunk_step: int = 1, chunk_only: bool = False, ref_root: str = "", gt_stats_root: str = "", gt_metrics: tuple[str, ...] = ("clip_iqa", "clip_score", "image_reward", "fid"), ref_metrics: tuple[str, ...] = ("psnr", "lpips", "ssim", "fid"), ) -> dict: if chunk_start == 0 and chunk_step == 1: chunk_only = False assert chunk_start == 0 and chunk_step == 1, "Chunking is not supported for image data." os.environ["TOKENIZERS_PARALLELISM"] = "false" if isinstance(benchmarks, str): benchmarks = (benchmarks,) gt_multimodal_metrics, gt_similarity_metrics, gt_other_metrics = categorize_metrics(gt_metrics) _, ref_similarity_metrics, ref_other_metrics = categorize_metrics(ref_metrics) results = {} for benchmark in benchmarks: benchmark_results = {} dataset = get_dataset(benchmark, max_dataset_size=max_dataset_size, return_gt=True) dirname = f"{dataset.config_name}-{dataset._unchunk_size}" if dataset._chunk_start == 0 and dataset._chunk_step == 1: filename = f"{dirname}.npz" else: filename = os.path.join(dirname, f"{dataset._chunk_start}-{dataset._chunk_step}.npz") if chunk_only: dirname += f".{dataset._chunk_start}.{dataset._chunk_step}" gen_dirpath = os.path.join(gen_root, "samples", benchmark, dirname) if gt_metrics: gt_results = compute_image_multimodal_metrics(dataset, gen_dirpath, metrics=gt_multimodal_metrics) if "image_reward" in gt_other_metrics: gt_results.update(compute_image_reward(dataset, gen_dirpath)) if benchmark in ("COCO", "DCI", "MJHQ"): gt_results.update(compute_image_similarity_metrics(dataset, gen_dirpath, metrics=gt_similarity_metrics)) if "fid" in gt_other_metrics: gt_results["fid"] = compute_fid( dataset, gen_dirpath, ref_cache_path=(os.path.join(gt_stats_root, benchmark, filename) if gt_stats_root else None), gen_cache_path=os.path.join(gen_root, "fid_stats", benchmark, filename), ) benchmark_results["with_gt"] = gt_results if ref_root and ref_metrics: assert os.path.exists(ref_root), f"Reference root directory {ref_root} does not exist." ref_dirpath = os.path.join(ref_root, "samples", benchmark, dirname) ref_results = compute_image_similarity_metrics(ref_dirpath, gen_dirpath, metrics=ref_similarity_metrics) if "fid" in ref_other_metrics: ref_results["fid"] = compute_fid( ref_dirpath, gen_dirpath, ref_cache_path=os.path.join(ref_root, "fid_stats", benchmark, filename), gen_cache_path=os.path.join(gen_root, "fid_stats", benchmark, filename), ) benchmark_results["with_orig"] = ref_results print(f"{dirname} results:") print(benchmark_results) results[dirname] = benchmark_results return results def categorize_metrics(metrics: tuple[str, ...]) -> tuple[list[str], list[str], list[str]]: """ Categorize metrics into multimodal, similarity, and other metrics. Args: metrics (tuple[str, ...]): List of metrics. Returns: tuple[list[str], list[str], list[str]]: Tuple of multimodal, similarity, and other metrics. """ metrics = tuple(set(metrics)) multimodal_metrics, similarity_metrics, other_metrics = [], [], [] for metric in metrics: if metric in ("clip_iqa", "clip_score"): multimodal_metrics.append(metric) elif metric in ("psnr", "lpips", "ssim"): similarity_metrics.append(metric) else: other_metrics.append(metric) return multimodal_metrics, similarity_metrics, other_metrics ================================================ FILE: deepcompressor/app/diffusion/eval/metrics/fid.py ================================================ import os from datetime import datetime import numpy as np import torch import torchvision from cleanfid import fid from cleanfid.resize import build_resizer from datasets import Dataset from tqdm import tqdm __all__ = ["compute_fid"] def get_dataset_features( dataset: Dataset, model, mode: str = "clean", batch_size: int = 128, device: str | torch.device = "cuda", ) -> np.ndarray: to_tensor = torchvision.transforms.ToTensor() fn_resize = build_resizer(mode) np_feats = [] for batch in tqdm( dataset.iter(batch_size=batch_size, drop_last_batch=False), desc=f"Extracting {dataset.config_name} features", total=(len(dataset) + batch_size - 1) // batch_size, ): resized_images = [fn_resize(np.array(image.convert("RGB"))) for image in batch["image"]] image_tensors = [] for resized_image in resized_images: if resized_image.dtype == "uint8": image_tensor = to_tensor(resized_image) * 255 else: image_tensor = to_tensor(resized_image) image_tensors.append(image_tensor) image_tensors = torch.stack(image_tensors, dim=0) np_feats.append(fid.get_batch_features(image_tensors, model, device)) np_feats = np.concatenate(np_feats, axis=0) return np_feats def get_fid_features( dataset_or_folder: str | Dataset | None = None, cache_path: str | None = None, num: int | None = None, mode: str = "clean", num_workers: int = 8, batch_size: int = 64, device: str | torch.device = "cuda", force_overwrite: bool = False, verbose: bool = True, ) -> tuple[np.ndarray, np.ndarray]: if cache_path is not None and os.path.exists(cache_path) and not force_overwrite: npz = np.load(cache_path) mu, sigma = npz["mu"], npz["sigma"] else: feat_model = fid.build_feature_extractor(mode, device) if isinstance(dataset_or_folder, str): np_feats = fid.get_folder_features( dataset_or_folder, feat_model, num_workers=num_workers, num=num, batch_size=batch_size, device=device, verbose=verbose, mode=mode, description=f"Extracting {dataset_or_folder} features", ) else: assert isinstance(dataset_or_folder, Dataset) np_feats = get_dataset_features( dataset_or_folder, model=feat_model, mode=mode, batch_size=batch_size, device=device ) mu = np.mean(np_feats, axis=0) sigma = np.cov(np_feats, rowvar=False) if cache_path is not None: os.makedirs(os.path.abspath(os.path.dirname(cache_path)), exist_ok=True) np.savez(cache_path, mu=mu, sigma=sigma) return mu, sigma def compute_fid( ref_dirpath_or_dataset: str | Dataset, gen_dirpath: str, ref_cache_path: str | None = None, gen_cache_path: str | None = None, use_symlink: bool = True, timestamp: str | None = None, ) -> float: sym_ref_dirpath, sym_gen_dirpath = None, None if use_symlink: if timestamp is None: timestamp = datetime.now().strftime("%y%m%d.%H%M%S") os.makedirs(".tmp", exist_ok=True) if isinstance(ref_dirpath_or_dataset, str): sym_ref_dirpath = os.path.join(".tmp", f"ref-{hash(str(ref_dirpath_or_dataset))}-{timestamp}") os.symlink(os.path.abspath(ref_dirpath_or_dataset), os.path.abspath(sym_ref_dirpath)) ref_dirpath_or_dataset = sym_ref_dirpath sym_gen_dirpath = os.path.join(".tmp", f"gen-{hash(str(gen_dirpath))}-{timestamp}") os.symlink(os.path.abspath(gen_dirpath), os.path.abspath(sym_gen_dirpath)) gen_dirpath = sym_gen_dirpath mu1, sigma1 = get_fid_features(dataset_or_folder=ref_dirpath_or_dataset, cache_path=ref_cache_path) mu2, sigma2 = get_fid_features(dataset_or_folder=gen_dirpath, cache_path=gen_cache_path) fid_score = fid.frechet_distance(mu1, sigma1, mu2, sigma2) fid_score = float(fid_score) if use_symlink: if sym_ref_dirpath is not None: os.remove(sym_ref_dirpath) os.remove(sym_gen_dirpath) return fid_score ================================================ FILE: deepcompressor/app/diffusion/eval/metrics/image_reward.py ================================================ import os import datasets import torch from tqdm import tqdm __all__ = ["compute_image_reward"] def compute_image_reward( ref_dataset: datasets.Dataset, gen_dirpath: str, ) -> dict[str, float]: # import here to remove dependency on `ImageReward` git repo import ImageReward as RM scores = [] model = RM.load("ImageReward-v1.0") for batch in tqdm( ref_dataset.iter(batch_size=1, drop_last_batch=False), desc=f"{ref_dataset.config_name} image reward", total=len(ref_dataset), dynamic_ncols=True, ): filename = batch["filename"][0] path = os.path.join(gen_dirpath, f"{filename}.png") prompt = batch["prompt"][0] with torch.inference_mode(): score = model.score(prompt, path) scores.append(score) result = {"image_reward": sum(scores) / len(scores)} return result ================================================ FILE: deepcompressor/app/diffusion/eval/metrics/multimodal.py ================================================ import os import datasets import numpy as np import torch import torchmetrics import torchvision from PIL import Image from torch.utils import data from torchmetrics.multimodal import CLIPImageQualityAssessment, CLIPScore from tqdm import tqdm __all__ = ["compute_image_multimodal_metrics"] class PromptImageDataset(data.Dataset): def __init__(self, ref_dataset: datasets.Dataset, gen_dirpath: str): super(data.Dataset, self).__init__() self.ref_dataset, self.gen_dirpath = ref_dataset, gen_dirpath self.transform = torchvision.transforms.ToTensor() def __len__(self): return len(self.ref_dataset) def __getitem__(self, idx: int): row = self.ref_dataset[idx] gen_image = Image.open(os.path.join(self.gen_dirpath, row["filename"] + ".png")).convert("RGB") gen_tensor = torch.from_numpy(np.array(gen_image)).permute(2, 0, 1) prompt = row["prompt"] return [gen_tensor, prompt] def compute_image_multimodal_metrics( ref_dataset: datasets.Dataset, gen_dirpath: str, metrics: tuple[str, ...] = ("clip_iqa", "clip_score"), batch_size: int = 64, num_workers: int = 8, device: str | torch.device = "cuda", ) -> dict[str, float]: if len(metrics) == 0: return {} metric_names = metrics metrics: dict[str, torchmetrics.Metric] = {} for metric_name in metric_names: if metric_name == "clip_iqa": metric = CLIPImageQualityAssessment(model_name_or_path="openai/clip-vit-large-patch14").to(device) elif metric_name == "clip_score": metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device) else: raise NotImplementedError(f"Metric {metric_name} is not implemented") metrics[metric_name] = metric dataset = PromptImageDataset(ref_dataset, gen_dirpath) dataloader = data.DataLoader( dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, drop_last=False ) with torch.no_grad(): for batch in tqdm(dataloader, desc=f"{ref_dataset.config_name} multimodal metrics"): batch[0] = batch[0].to(device) for metric_name, metric in metrics.items(): if metric_name == "clip_iqa": metric.update(batch[0].to(torch.float32)) else: prompts = list(batch[1]) metric.update(batch[0], prompts) result = {metric_name: metric.compute().mean().item() for metric_name, metric in metrics.items()} return result ================================================ FILE: deepcompressor/app/diffusion/eval/metrics/run.py ================================================ # -*- coding: utf-8 -*- """Evaluate generated images or videos using the specified metrics.""" import json import os from ...config import DiffusionPtqRunConfig if __name__ == "__main__": config, _, unused_cfgs, unused_args, unknown_args = DiffusionPtqRunConfig.get_parser().parse_known_args() assert len(unknown_args) == 0, f"Unknown arguments: {unknown_args}" assert len(unused_cfgs) == 0, f"Unused configurations: {unused_cfgs}" assert unused_args is None, f"Unused arguments: {unused_args}" assert isinstance(config, DiffusionPtqRunConfig) results = config.eval.evaluate(pipeline=None, skip_gen=True, task=config.pipeline.task) save_path = os.path.join(config.eval.gen_root, f"results-{config.output.timestamp}.json") os.makedirs(os.path.abspath(os.path.dirname(save_path)), exist_ok=True) with open(save_path, "w") as f: json.dump(results, f, indent=2, sort_keys=True) print(results) ================================================ FILE: deepcompressor/app/diffusion/eval/metrics/similarity.py ================================================ import os import datasets import torch import torchmetrics import torchvision from PIL import Image from torch.utils import data from torchmetrics.image import ( LearnedPerceptualImagePatchSimilarity, PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure, ) from tqdm import tqdm __all__ = ["compute_image_similarity_metrics"] class MultiImageDataset(data.Dataset): def __init__(self, gen_dirpath: str, ref_dirpath_or_dataset: str | datasets.Dataset): super(data.Dataset, self).__init__() self.gen_names = sorted( [name for name in os.listdir(gen_dirpath) if name.endswith(".png") or name.endswith(".jpg")] ) self.gen_dirpath, self.ref_dirpath_or_dataset = gen_dirpath, ref_dirpath_or_dataset if isinstance(ref_dirpath_or_dataset, str): self.ref_names = sorted( [name for name in os.listdir(ref_dirpath_or_dataset) if name.endswith(".png") or name.endswith(".jpg")] ) assert len(self.ref_names) == len(self.gen_names) else: assert isinstance(ref_dirpath_or_dataset, datasets.Dataset) self.ref_names = self.gen_names assert len(ref_dirpath_or_dataset) == len(self.gen_names) self.transform = torchvision.transforms.ToTensor() def __len__(self): return len(self.ref_names) def __getitem__(self, idx: int): if isinstance(self.ref_dirpath_or_dataset, str): name = self.ref_names[idx] assert name == self.gen_names[idx] ref_image = Image.open(os.path.join(self.ref_dirpath_or_dataset, name)).convert("RGB") else: row = self.ref_dirpath_or_dataset[idx] ref_image = row["image"].convert("RGB") name = row["filename"] + ".png" gen_image = Image.open(os.path.join(self.gen_dirpath, name)).convert("RGB") gen_size = gen_image.size ref_size = ref_image.size if ref_size != gen_size: ref_image = ref_image.resize(gen_size, Image.Resampling.BICUBIC) gen_tensor = self.transform(gen_image) ref_tensor = self.transform(ref_image) return [gen_tensor, ref_tensor] def compute_image_similarity_metrics( ref_dirpath_or_dataset: str | datasets.Dataset, gen_dirpath: str, metrics: tuple[str, ...] = ("psnr", "lpips", "ssim"), batch_size: int = 64, num_workers: int = 8, device: str | torch.device = "cuda", ) -> dict[str, float]: if len(metrics) == 0: return {} metric_names = metrics metrics: dict[str, torchmetrics.Metric] = {} for metric_name in metric_names: if metric_name == "psnr": metric = PeakSignalNoiseRatio(data_range=(0, 1), reduction="elementwise_mean", dim=(1, 2, 3)).to(device) elif metric_name == "lpips": metric = LearnedPerceptualImagePatchSimilarity(normalize=True).to(device) elif metric_name == "ssim": metric = StructuralSimilarityIndexMeasure(data_range=(0, 1)).to(device) else: raise NotImplementedError(f"Metric {metric_name} is not implemented") metrics[metric_name] = metric dataset = MultiImageDataset(gen_dirpath, ref_dirpath_or_dataset) dataloader = data.DataLoader( dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, drop_last=False ) with torch.no_grad(): desc = ( ref_dirpath_or_dataset.config_name if isinstance(ref_dirpath_or_dataset, datasets.Dataset) else os.path.basename(ref_dirpath_or_dataset) ) + " similarity metrics" for batch in tqdm(dataloader, desc=desc): batch = [tensor.to(device) for tensor in batch] for metric in metrics.values(): metric.update(batch[0], batch[1]) result = {metric_name: metric.compute().item() for metric_name, metric in metrics.items()} return result ================================================ FILE: deepcompressor/app/diffusion/nn/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/app/diffusion/nn/attention.py ================================================ # -*- coding: utf-8 -*- import typing as tp import diffusers import packaging.version import torch import torch.nn as nn from diffusers.models.attention_processor import ( Attention, AttnProcessor2_0, FluxAttnProcessor2_0, JointAttnProcessor2_0, ) from deepcompressor.nn.patch.sdpa import ScaleDotProductAttention __all__ = ["DiffusionAttentionProcessor"] if packaging.version.Version(diffusers.__version__) >= packaging.version.Version("0.31"): from diffusers.models.embeddings import apply_rotary_emb def apply_flux_rope(query, key, image_rotary_emb): query = apply_rotary_emb(query, image_rotary_emb) key = apply_rotary_emb(key, image_rotary_emb) return query, key else: from diffusers.models.attention_processor import apply_rope as apply_flux_rope class DiffusionAttentionProcessor(nn.Module): def __init__( self, orig: AttnProcessor2_0 | FluxAttnProcessor2_0 | JointAttnProcessor2_0, sdpa: ScaleDotProductAttention | None = None, ) -> None: super().__init__() self.orig = orig if orig.__class__.__name__.startswith("Flux"): self.rope = apply_flux_rope elif isinstance(orig, (AttnProcessor2_0, JointAttnProcessor2_0)): self.rope = None else: raise NotImplementedError(f"Unsupported AttentionProcessor: {orig}") self.sdpa = sdpa or ScaleDotProductAttention() def __call__( # noqa: C901 self, attn: Attention, hidden_states: torch.Tensor, encoder_hidden_states: tp.Optional[torch.Tensor] = None, attention_mask: tp.Optional[torch.Tensor] = None, image_rotary_emb: tp.Optional[torch.Tensor] = None, *args, **kwargs, ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: assert len(args) == 0 and kwargs.get("scale", None) is None assert attn.spatial_norm is None assert attn.group_norm is None assert attn.norm_cross is None assert not attn.residual_connection assert attn.rescale_output_factor == 1.0 heads = attn.heads head_dim = attn.inner_dim // heads kv_heads = attn.inner_kv_dim // head_dim assert attn.scale == head_dim**-0.5 input_ndim, input_shape = hidden_states.dim(), hidden_states.size() if input_ndim > 3: hidden_states = hidden_states.view(input_shape[0], input_shape[1], -1).transpose(1, 2) batch_size, input_length, _ = hidden_states.shape context_ndim, context_shape, context_length = None, None, None if encoder_hidden_states is not None: context_ndim, context_shape = encoder_hidden_states.ndim, encoder_hidden_states.shape assert context_shape[0] == batch_size if context_ndim > 3: encoder_hidden_states = encoder_hidden_states.view(batch_size, context_shape[1], -1).transpose(1, 2) context_length = encoder_hidden_states.shape[1] if attention_mask is not None: attention_mask = attn.prepare_attention_mask(attention_mask, context_length or input_length, batch_size) attention_mask = attention_mask.view(batch_size, heads, -1, attention_mask.shape[-1]) query = attn.to_q(hidden_states) key, value, add_query, add_key, add_value = None, None, None, None, None if hasattr(attn, "add_k_proj"): if attn.to_k is not None: key = attn.to_k(hidden_states) value = attn.to_v(hidden_states) add_key = attn.add_k_proj(encoder_hidden_states) add_value = attn.add_v_proj(encoder_hidden_states) if hasattr(attn, "add_q_proj"): add_query = attn.add_q_proj(encoder_hidden_states) else: if attn.is_cross_attention: key = attn.to_k(encoder_hidden_states) value = attn.to_v(encoder_hidden_states) else: assert encoder_hidden_states is None key = attn.to_k(hidden_states) value = attn.to_v(hidden_states) hidden_states, encoder_hidden_states = None, None query = query.view(batch_size, -1, heads, head_dim).transpose(1, 2) if key is not None: key = key.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) value = value.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) if add_query is not None: add_query = add_query.view(batch_size, -1, heads, head_dim).transpose(1, 2) if add_key is not None: add_key = add_key.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) add_value = add_value.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) if kv_heads != heads: heads_per_kv_head = heads // kv_heads if key is not None: key = torch.repeat_interleave(key, heads_per_kv_head, dim=1) value = torch.repeat_interleave(value, heads_per_kv_head, dim=1) if add_key is not None: add_key = torch.repeat_interleave(add_key, heads_per_kv_head, dim=1) add_value = torch.repeat_interleave(add_value, heads_per_kv_head, dim=1) if attn.norm_q is not None: query = attn.norm_q(query) key = attn.norm_k(key) if attn.norm_added_q is not None: add_query = attn.norm_added_q(add_query) add_key = attn.norm_added_k(add_key) if add_query is not None: query = torch.cat([add_query, query], dim=2) if add_key is not None: if key is None: key, value = add_key, add_value else: key = torch.cat([add_key, key], dim=2) value = torch.cat([add_value, value], dim=2) del add_query, add_key, add_value if image_rotary_emb is not None: query, key = self.rope(query, key, image_rotary_emb) hidden_states = self.sdpa(query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False) hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.inner_dim) hidden_states = hidden_states.to(query.dtype) if hidden_states.shape[1] > input_length: encoder_hidden_states = hidden_states[:, :context_length] hidden_states = hidden_states[:, context_length:] if hasattr(attn, "to_out"): # linear proj hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) if hasattr(attn, "to_add_out"): encoder_hidden_states = attn.to_add_out(encoder_hidden_states) if input_ndim > 3: hidden_states = hidden_states.transpose(-1, -2).reshape(input_shape) if encoder_hidden_states is not None and context_ndim > 3: assert encoder_hidden_states.ndim == 3 encoder_hidden_states = encoder_hidden_states.transpose(-1, -2).reshape(context_shape) if encoder_hidden_states is None: return hidden_states return hidden_states, encoder_hidden_states ================================================ FILE: deepcompressor/app/diffusion/nn/patch.py ================================================ import torch.nn as nn from diffusers.models.attention_processor import Attention from diffusers.models.transformers.transformer_flux import FluxSingleTransformerBlock from deepcompressor.nn.patch.conv import ConcatConv2d, ShiftedConv2d from deepcompressor.nn.patch.linear import ConcatLinear, ShiftedLinear from deepcompressor.utils import patch, tools from .attention import DiffusionAttentionProcessor from .struct import DiffusionFeedForwardStruct, DiffusionModelStruct, DiffusionResnetStruct, UNetStruct __all__ = [ "replace_up_block_conv_with_concat_conv", "replace_fused_linear_with_concat_linear", "replace_attn_processor", "shift_input_activations", ] def replace_up_block_conv_with_concat_conv(model: nn.Module) -> None: """Replace up_block convolutions in UNet with ConcatConv.""" model_struct = DiffusionModelStruct.construct(model) if not isinstance(model_struct, UNetStruct): return logger = tools.logging.getLogger(__name__) logger.info("Replacing up_block convolutions with ConcatConv.") tools.logging.Formatter.indent_inc() parents_map = patch.get_module_parents_map(model) for up_block in model_struct.up_block_structs: logger.info(f"+ Replacing convolutions in up_block {up_block.name}") tools.logging.Formatter.indent_inc() for resnet in up_block.resnet_structs: assert len(resnet.convs[0]) == 1 conv, conv_name = resnet.convs[0][0], resnet.conv_names[0][0] logger.info(f"- Replacing {conv_name} in resnet {resnet.name}") tools.logging.Formatter.indent_inc() if resnet.idx == 0: if up_block.idx == 0: prev_block = model_struct.mid_block_struct else: prev_block = model_struct.up_block_structs[up_block.idx - 1] logger.info(f"+ using previous block {prev_block.name}") prev_channels = prev_block.resnet_structs[-1].convs[-1][-1].out_channels else: prev_channels = up_block.resnet_structs[resnet.idx - 1].convs[-1][-1].out_channels logger.info(f"+ conv_in_channels = {prev_channels}/{conv.in_channels}") logger.info(f"+ conv_out_channels = {conv.out_channels}") concat_conv = ConcatConv2d.from_conv2d(conv, [prev_channels]) for parent_name, parent_module, child_name in parents_map[conv]: logger.info(f"+ replacing {child_name} in {parent_name}") setattr(parent_module, child_name, concat_conv) tools.logging.Formatter.indent_dec() tools.logging.Formatter.indent_dec() tools.logging.Formatter.indent_dec() def replace_fused_linear_with_concat_linear(model: nn.Module) -> None: """Replace fused Linear in FluxSingleTransformerBlock with ConcatLinear.""" logger = tools.logging.getLogger(__name__) logger.info("Replacing fused Linear with ConcatLinear.") tools.logging.Formatter.indent_inc() for name, module in model.named_modules(): if isinstance(module, FluxSingleTransformerBlock): logger.info(f"+ Replacing fused Linear in {name} with ConcatLinear.") tools.logging.Formatter.indent_inc() logger.info(f"- in_features = {module.proj_out.out_features}/{module.proj_out.in_features}") logger.info(f"- out_features = {module.proj_out.out_features}") tools.logging.Formatter.indent_dec() module.proj_out = ConcatLinear.from_linear(module.proj_out, [module.proj_out.out_features]) tools.logging.Formatter.indent_dec() def shift_input_activations(model: nn.Module) -> None: """Shift input activations of convolutions and linear layers if their lowerbound is negative. Args: model (nn.Module): model to shift input activations. """ logger = tools.logging.getLogger(__name__) model_struct = DiffusionModelStruct.construct(model) module_parents_map = patch.get_module_parents_map(model) logger.info("- Shifting input activations.") tools.logging.Formatter.indent_inc() for _, module_name, module, parent, field_name in model_struct.named_key_modules(): lowerbound = None if isinstance(parent, DiffusionResnetStruct) and field_name.startswith("conv"): lowerbound = parent.config.intermediate_lowerbound elif isinstance(parent, DiffusionFeedForwardStruct) and field_name.startswith("down_proj"): lowerbound = parent.config.intermediate_lowerbound if lowerbound is not None and lowerbound < 0: shift = -lowerbound logger.info(f"+ Shifting input activations of {module_name} by {shift}") tools.logging.Formatter.indent_inc() if isinstance(module, nn.Linear): shifted = ShiftedLinear.from_linear(module, shift=shift) shifted.linear.unsigned = True elif isinstance(module, nn.Conv2d): shifted = ShiftedConv2d.from_conv2d(module, shift=shift) shifted.conv.unsigned = True else: raise NotImplementedError(f"Unsupported module type {type(module)}") for parent_name, parent_module, child_name in module_parents_map[module]: logger.info(f"+ Replacing {child_name} in {parent_name}") setattr(parent_module, child_name, shifted) tools.logging.Formatter.indent_dec() tools.logging.Formatter.indent_dec() def replace_attn_processor(model: nn.Module) -> None: """Replace Attention processor with DiffusionAttentionProcessor.""" logger = tools.logging.getLogger(__name__) logger.info("Replacing Attention processors.") tools.logging.Formatter.indent_inc() for name, module in model.named_modules(): if isinstance(module, Attention): logger.info(f"+ Replacing {name} processor with DiffusionAttentionProcessor.") module.set_processor(DiffusionAttentionProcessor(module.processor)) tools.logging.Formatter.indent_dec() ================================================ FILE: deepcompressor/app/diffusion/nn/struct.py ================================================ # -*- coding: utf-8 -*- """Utility functions for Diffusion Models.""" import enum import typing as tp from abc import abstractmethod from collections import OrderedDict, defaultdict from dataclasses import dataclass, field # region imports import torch.nn as nn from diffusers.models.activations import GEGLU, GELU, ApproximateGELU, SwiGLU from diffusers.models.attention import BasicTransformerBlock, FeedForward, JointTransformerBlock from diffusers.models.attention_processor import Attention, SanaLinearAttnProcessor2_0 from diffusers.models.embeddings import ( CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, ImageHintTimeEmbedding, ImageProjection, ImageTimeEmbedding, PatchEmbed, PixArtAlphaTextProjection, TextImageProjection, TextImageTimeEmbedding, TextTimeEmbedding, TimestepEmbedding, ) from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormSingle, AdaLayerNormZero from diffusers.models.resnet import Downsample2D, ResnetBlock2D, Upsample2D from diffusers.models.transformers.pixart_transformer_2d import PixArtTransformer2DModel from diffusers.models.transformers.sana_transformer import GLUMBConv, SanaTransformer2DModel, SanaTransformerBlock from diffusers.models.transformers.transformer_2d import Transformer2DModel from diffusers.models.transformers.transformer_flux import ( FluxSingleTransformerBlock, FluxTransformer2DModel, FluxTransformerBlock, ) from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel from diffusers.models.unets.unet_2d import UNet2DModel from diffusers.models.unets.unet_2d_blocks import ( CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UNetMidBlock2D, UNetMidBlock2DCrossAttn, UpBlock2D, ) from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from diffusers.pipelines import ( FluxControlPipeline, FluxFillPipeline, FluxPipeline, PixArtAlphaPipeline, PixArtSigmaPipeline, SanaPipeline, StableDiffusion3Pipeline, StableDiffusionPipeline, StableDiffusionXLPipeline, ) from deepcompressor.nn.patch.conv import ConcatConv2d, ShiftedConv2d from deepcompressor.nn.patch.linear import ConcatLinear, ShiftedLinear from deepcompressor.nn.struct.attn import ( AttentionConfigStruct, AttentionStruct, BaseTransformerStruct, FeedForwardConfigStruct, FeedForwardStruct, TransformerBlockStruct, ) from deepcompressor.nn.struct.base import BaseModuleStruct from deepcompressor.utils.common import join_name from .attention import DiffusionAttentionProcessor # endregion __all__ = ["DiffusionModelStruct", "DiffusionBlockStruct", "DiffusionModelStruct"] DIT_BLOCK_CLS = tp.Union[ BasicTransformerBlock, JointTransformerBlock, FluxSingleTransformerBlock, FluxTransformerBlock, SanaTransformerBlock, ] UNET_BLOCK_CLS = tp.Union[ DownBlock2D, CrossAttnDownBlock2D, UNetMidBlock2D, UNetMidBlock2DCrossAttn, UpBlock2D, CrossAttnUpBlock2D, ] DIT_CLS = tp.Union[ Transformer2DModel, PixArtTransformer2DModel, SD3Transformer2DModel, FluxTransformer2DModel, SanaTransformer2DModel, ] UNET_CLS = tp.Union[UNet2DModel, UNet2DConditionModel] MODEL_CLS = tp.Union[DIT_CLS, UNET_CLS] UNET_PIPELINE_CLS = tp.Union[StableDiffusionPipeline, StableDiffusionXLPipeline] DIT_PIPELINE_CLS = tp.Union[ StableDiffusion3Pipeline, PixArtAlphaPipeline, PixArtSigmaPipeline, FluxPipeline, FluxControlPipeline, FluxFillPipeline, SanaPipeline, ] PIPELINE_CLS = tp.Union[UNET_PIPELINE_CLS, DIT_PIPELINE_CLS] @dataclass(kw_only=True) class DiffusionModuleStruct(BaseModuleStruct): def named_key_modules(self) -> tp.Generator[tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: if isinstance(self.module, (nn.Linear, nn.Conv2d)): yield self.key, self.name, self.module, self.parent, self.fname else: for name, module in self.module.named_modules(): if name and isinstance(module, (nn.Linear, nn.Conv2d)): module_name = join_name(self.name, name) field_name = join_name(self.fname, name) yield self.key, module_name, module, self.parent, field_name @dataclass(kw_only=True) class DiffusionBlockStruct(BaseModuleStruct): @abstractmethod def iter_attention_structs(self) -> tp.Generator["DiffusionAttentionStruct", None, None]: ... @abstractmethod def iter_transformer_block_structs(self) -> tp.Generator["DiffusionTransformerBlockStruct", None, None]: ... @dataclass(kw_only=True) class DiffusionModelStruct(DiffusionBlockStruct): pre_module_structs: OrderedDict[str, DiffusionModuleStruct] = field(init=False, repr=False) post_module_structs: OrderedDict[str, DiffusionModuleStruct] = field(init=False, repr=False) @property @abstractmethod def num_blocks(self) -> int: ... @property @abstractmethod def block_structs(self) -> list[DiffusionBlockStruct]: ... @abstractmethod def get_prev_module_keys(self) -> tuple[str, ...]: ... @abstractmethod def get_post_module_keys(self) -> tuple[str, ...]: ... @abstractmethod def _get_iter_block_activations_args( self, **input_kwargs ) -> tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]: ... def _get_iter_pre_module_activations_args( self, ) -> tuple[list[nn.Module], list[DiffusionModuleStruct], list[bool], list[bool]]: layers, layer_structs, recomputes, use_prev_layer_outputs = [], [], [], [] for layer_struct in self.pre_module_structs.values(): layers.append(layer_struct.module) layer_structs.append(layer_struct) recomputes.append(False) use_prev_layer_outputs.append(False) return layers, layer_structs, recomputes, use_prev_layer_outputs def _get_iter_post_module_activations_args( self, ) -> tuple[list[nn.Module], list[DiffusionModuleStruct], list[bool], list[bool]]: layers, layer_structs, recomputes, use_prev_layer_outputs = [], [], [], [] for layer_struct in self.post_module_structs.values(): layers.append(layer_struct.module) layer_structs.append(layer_struct) recomputes.append(False) use_prev_layer_outputs.append(False) return layers, layer_structs, recomputes, use_prev_layer_outputs def get_iter_layer_activations_args( self, skip_pre_modules: bool, skip_post_modules: bool, **input_kwargs ) -> tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]: """ Get the arguments for iterating over the layers and their activations. Args: skip_pre_modules (`bool`): Whether to skip the pre-modules skip_post_modules (`bool`): Whether to skip the post-modules Returns: `tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]`: the layers, the layer structs, the recomputes, and the use_prev_layer_outputs """ layers, structs, recomputes, uses = [], [], [], [] if not skip_pre_modules: layers, structs, recomputes, uses = self._get_iter_pre_module_activations_args() _layers, _structs, _recomputes, _uses = self._get_iter_block_activations_args(**input_kwargs) layers.extend(_layers) structs.extend(_structs) recomputes.extend(_recomputes) uses.extend(_uses) if not skip_post_modules: _layers, _structs, _recomputes, _uses = self._get_iter_post_module_activations_args() layers.extend(_layers) structs.extend(_structs) recomputes.extend(_recomputes) uses.extend(_uses) return layers, structs, recomputes, uses def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: for module in self.pre_module_structs.values(): yield from module.named_key_modules() for block in self.block_structs: yield from block.named_key_modules() for module in self.post_module_structs.values(): yield from module.named_key_modules() def iter_attention_structs(self) -> tp.Generator["AttentionStruct", None, None]: for block in self.block_structs: yield from block.iter_attention_structs() def iter_transformer_block_structs(self) -> tp.Generator["DiffusionTransformerBlockStruct", None, None]: for block in self.block_structs: yield from block.iter_transformer_block_structs() def get_named_layers( self, skip_pre_modules: bool, skip_post_modules: bool, skip_blocks: bool = False ) -> OrderedDict[str, DiffusionBlockStruct | DiffusionModuleStruct]: named_layers = OrderedDict() if not skip_pre_modules: named_layers.update(self.pre_module_structs) if not skip_blocks: for block in self.block_structs: named_layers[block.name] = block if not skip_post_modules: named_layers.update(self.post_module_structs) return named_layers @staticmethod def _default_construct( module: tp.Union[PIPELINE_CLS, MODEL_CLS], /, parent: tp.Optional[BaseModuleStruct] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiffusionModelStruct": if isinstance(module, UNET_PIPELINE_CLS): module = module.unet elif isinstance(module, DIT_PIPELINE_CLS): module = module.transformer if isinstance(module, UNET_CLS): return UNetStruct.construct(module, parent=parent, fname=fname, rname=rname, rkey=rkey, idx=idx, **kwargs) elif isinstance(module, DIT_CLS): return DiTStruct.construct(module, parent=parent, fname=fname, rname=rname, rkey=rkey, idx=idx, **kwargs) raise NotImplementedError(f"Unsupported module type: {type(module)}") @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: unet_key_map = UNetStruct._get_default_key_map() dit_key_map = DiTStruct._get_default_key_map() flux_key_map = FluxStruct._get_default_key_map() key_map: dict[str, set[str]] = defaultdict(set) for rkey, keys in unet_key_map.items(): key_map[rkey].update(keys) for rkey, keys in dit_key_map.items(): key_map[rkey].update(keys) for rkey, keys in flux_key_map.items(): key_map[rkey].update(keys) return {k: v for k, v in key_map.items() if v} @staticmethod def _simplify_keys(keys: tp.Iterable[str], *, key_map: dict[str, set[str]]) -> list[str]: """Simplify the keys based on the key map. Args: keys (`Iterable[str]`): The keys to simplify. key_map (`dict[str, set[str]]`): The key map. Returns: `list[str]`: The simplified keys. """ # we first sort key_map by length of values in descending order key_map = dict(sorted(key_map.items(), key=lambda item: len(item[1]), reverse=True)) ukeys, skeys = set(keys), set() for k, v in key_map.items(): if k in ukeys: skeys.add(k) ukeys.discard(k) ukeys.difference_update(v) continue if ukeys.issuperset(v): skeys.add(k) ukeys.difference_update(v) assert not ukeys, f"Unrecognized keys: {ukeys}" return sorted(skeys) @dataclass(kw_only=True) class DiffusionAttentionStruct(AttentionStruct): module: Attention = field(repr=False, kw_only=False) """the module of AttentionBlock""" parent: tp.Optional["DiffusionTransformerBlockStruct"] = field(repr=False) def filter_kwargs(self, kwargs: dict) -> dict: """Filter layer kwargs to attn kwargs.""" if isinstance(self.parent.module, BasicTransformerBlock): if kwargs.get("cross_attention_kwargs", None) is None: attn_kwargs = {} else: attn_kwargs = dict(kwargs["cross_attention_kwargs"].items()) attn_kwargs.pop("gligen", None) if self.idx == 0: attn_kwargs["attention_mask"] = kwargs.get("attention_mask", None) else: attn_kwargs["attention_mask"] = kwargs.get("encoder_attention_mask", None) else: attn_kwargs = {} return attn_kwargs @staticmethod def _default_construct( module: Attention, /, parent: tp.Optional["DiffusionTransformerBlockStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiffusionAttentionStruct": if module.is_cross_attention: q_proj, k_proj, v_proj = module.to_q, None, None add_q_proj, add_k_proj, add_v_proj, add_o_proj = None, module.to_k, module.to_v, None q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "", "" add_q_proj_rname, add_k_proj_rname, add_v_proj_rname, add_o_proj_rname = "", "to_k", "to_v", "" else: q_proj, k_proj, v_proj = module.to_q, module.to_k, module.to_v add_q_proj = getattr(module, "add_q_proj", None) add_k_proj = getattr(module, "add_k_proj", None) add_v_proj = getattr(module, "add_v_proj", None) add_o_proj = getattr(module, "to_add_out", None) q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "to_k", "to_v" add_q_proj_rname, add_k_proj_rname, add_v_proj_rname = "add_q_proj", "add_k_proj", "add_v_proj" add_o_proj_rname = "to_add_out" if getattr(module, "to_out", None) is not None: o_proj = module.to_out[0] o_proj_rname = "to_out.0" assert isinstance(o_proj, nn.Linear) elif parent is not None: assert isinstance(parent.module, FluxSingleTransformerBlock) assert isinstance(parent.module.proj_out, ConcatLinear) assert len(parent.module.proj_out.linears) == 2 o_proj = parent.module.proj_out.linears[0] o_proj_rname = ".proj_out.linears.0" else: raise RuntimeError("Cannot find the output projection.") if isinstance(module.processor, DiffusionAttentionProcessor): with_rope = module.processor.rope is not None elif module.processor.__class__.__name__.startswith("Flux"): with_rope = True else: with_rope = False # TODO: fix for other processors config = AttentionConfigStruct( hidden_size=q_proj.weight.shape[1], add_hidden_size=add_k_proj.weight.shape[1] if add_k_proj is not None else 0, inner_size=q_proj.weight.shape[0], num_query_heads=module.heads, num_key_value_heads=module.to_k.weight.shape[0] // (module.to_q.weight.shape[0] // module.heads), with_qk_norm=module.norm_q is not None, with_rope=with_rope, linear_attn=isinstance(module.processor, SanaLinearAttnProcessor2_0), ) return DiffusionAttentionStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, q_proj=q_proj, k_proj=k_proj, v_proj=v_proj, o_proj=o_proj, add_q_proj=add_q_proj, add_k_proj=add_k_proj, add_v_proj=add_v_proj, add_o_proj=add_o_proj, q=None, # TODO: add q, k, v k=None, v=None, q_proj_rname=q_proj_rname, k_proj_rname=k_proj_rname, v_proj_rname=v_proj_rname, o_proj_rname=o_proj_rname, add_q_proj_rname=add_q_proj_rname, add_k_proj_rname=add_k_proj_rname, add_v_proj_rname=add_v_proj_rname, add_o_proj_rname=add_o_proj_rname, q_rname="", k_rname="", v_rname="", ) @dataclass(kw_only=True) class DiffusionFeedForwardStruct(FeedForwardStruct): module: FeedForward = field(repr=False, kw_only=False) """the module of FeedForward""" parent: tp.Optional["DiffusionTransformerBlockStruct"] = field(repr=False) # region modules moe_gate: None = field(init=False, repr=False, default=None) experts: list[nn.Module] = field(init=False, repr=False) # endregion # region names moe_gate_rname: str = field(init=False, repr=False, default="") experts_rname: str = field(init=False, repr=False, default="") # endregion # region aliases @property def up_proj(self) -> nn.Linear: return self.up_projs[0] @property def down_proj(self) -> nn.Linear: return self.down_projs[0] @property def up_proj_rname(self) -> str: return self.up_proj_rnames[0] @property def down_proj_rname(self) -> str: return self.down_proj_rnames[0] @property def up_proj_name(self) -> str: return self.up_proj_names[0] @property def down_proj_name(self) -> str: return self.down_proj_names[0] # endregion def __post_init__(self) -> None: assert len(self.up_projs) == len(self.down_projs) == 1 assert len(self.up_proj_rnames) == len(self.down_proj_rnames) == 1 self.experts = [self.module] super().__post_init__() @staticmethod def _default_construct( module: FeedForward | FluxSingleTransformerBlock | GLUMBConv, /, parent: tp.Optional["DiffusionTransformerBlockStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiffusionFeedForwardStruct": if isinstance(module, FeedForward): layer_1, layer_2 = module.net[0], module.net[2] assert isinstance(layer_1, (GEGLU, GELU, ApproximateGELU, SwiGLU)) up_proj, up_proj_rname = layer_1.proj, "net.0.proj" assert isinstance(up_proj, nn.Linear) down_proj, down_proj_rname = layer_2, "net.2" if isinstance(layer_1, GEGLU): act_type = "gelu_glu" elif isinstance(layer_1, SwiGLU): act_type = "swish_glu" else: assert layer_1.__class__.__name__.lower().endswith("gelu") act_type = "gelu" if isinstance(layer_2, ShiftedLinear): down_proj, down_proj_rname = layer_2.linear, "net.2.linear" act_type = "gelu_shifted" assert isinstance(down_proj, nn.Linear) ffn = module elif isinstance(module, FluxSingleTransformerBlock): up_proj, up_proj_rname = module.proj_mlp, "proj_mlp" act_type = "gelu" assert isinstance(module.proj_out, ConcatLinear) assert len(module.proj_out.linears) == 2 layer_2 = module.proj_out.linears[1] if isinstance(layer_2, ShiftedLinear): down_proj, down_proj_rname = layer_2.linear, "proj_out.linears.1.linear" act_type = "gelu_shifted" else: down_proj, down_proj_rname = layer_2, "proj_out.linears.1" ffn = nn.Sequential(up_proj, module.act_mlp, layer_2) assert not rname, f"Unsupported rname: {rname}" elif isinstance(module, GLUMBConv): ffn = module up_proj, up_proj_rname = module.conv_inverted, "conv_inverted" down_proj, down_proj_rname = module.conv_point, "conv_point" act_type = "silu_conv_silu_glu" else: raise NotImplementedError(f"Unsupported module type: {type(module)}") config = FeedForwardConfigStruct( hidden_size=up_proj.weight.shape[1], intermediate_size=down_proj.weight.shape[1], intermediate_act_type=act_type, num_experts=1, ) return DiffusionFeedForwardStruct( module=ffn, # this may be a virtual module parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, up_projs=[up_proj], down_projs=[down_proj], up_proj_rnames=[up_proj_rname], down_proj_rnames=[down_proj_rname], ) @dataclass(kw_only=True) class DiffusionTransformerBlockStruct(TransformerBlockStruct, DiffusionBlockStruct): # region relative keys norm_rkey: tp.ClassVar[str] = "transformer_norm" add_norm_rkey: tp.ClassVar[str] = "transformer_add_norm" attn_struct_cls: tp.ClassVar[type[DiffusionAttentionStruct]] = DiffusionAttentionStruct ffn_struct_cls: tp.ClassVar[type[DiffusionFeedForwardStruct]] = DiffusionFeedForwardStruct # endregion parent: tp.Optional["DiffusionTransformerStruct"] = field(repr=False) # region child modules post_attn_norms: list[nn.LayerNorm] = field(init=False, repr=False, default_factory=list) post_attn_add_norms: list[nn.LayerNorm] = field(init=False, repr=False, default_factory=list) post_ffn_norm: None = field(init=False, repr=False, default=None) post_add_ffn_norm: None = field(init=False, repr=False, default=None) # endregion # region relative names post_attn_norm_rnames: list[str] = field(init=False, repr=False, default_factory=list) post_attn_add_norm_rnames: list[str] = field(init=False, repr=False, default_factory=list) post_ffn_norm_rname: str = field(init=False, repr=False, default="") post_add_ffn_norm_rname: str = field(init=False, repr=False, default="") # endregion # region attributes norm_type: str add_norm_type: str # endregion # region absolute keys norm_key: str = field(init=False, repr=False) add_norm_key: str = field(init=False, repr=False) # endregion # region child structs pre_attn_norm_structs: list[DiffusionModuleStruct | None] = field(init=False, repr=False) pre_attn_add_norm_structs: list[DiffusionModuleStruct | None] = field(init=False, repr=False) pre_ffn_norm_struct: DiffusionModuleStruct = field(init=False, repr=False, default=None) pre_add_ffn_norm_struct: DiffusionModuleStruct | None = field(init=False, repr=False, default=None) attn_structs: list[DiffusionAttentionStruct] = field(init=False, repr=False) ffn_struct: DiffusionFeedForwardStruct | None = field(init=False, repr=False) add_ffn_struct: DiffusionFeedForwardStruct | None = field(init=False, repr=False) # endregion def __post_init__(self) -> None: super().__post_init__() self.norm_key = join_name(self.key, self.norm_rkey, sep="_") self.add_norm_key = join_name(self.key, self.add_norm_rkey, sep="_") self.attn_norm_structs = [ DiffusionModuleStruct(norm, parent=self, fname="pre_attn_norm", rname=rname, rkey=self.norm_rkey, idx=idx) for idx, (norm, rname) in enumerate(zip(self.pre_attn_norms, self.pre_attn_norm_rnames, strict=True)) ] self.add_attn_norm_structs = [ DiffusionModuleStruct( norm, parent=self, fname="pre_attn_add_norm", rname=rname, rkey=self.add_norm_rkey, idx=idx ) for idx, (norm, rname) in enumerate( zip(self.pre_attn_add_norms, self.pre_attn_add_norm_rnames, strict=True) ) ] if self.pre_ffn_norm is not None: self.pre_ffn_norm_struct = DiffusionModuleStruct( self.pre_ffn_norm, parent=self, fname="pre_ffn_norm", rname=self.pre_ffn_norm_rname, rkey=self.norm_rkey ) if self.pre_add_ffn_norm is not None: self.pre_add_ffn_norm_struct = DiffusionModuleStruct( self.pre_add_ffn_norm, parent=self, fname="pre_add_ffn_norm", rname=self.pre_add_ffn_norm_rname, rkey=self.add_norm_rkey, ) def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: for attn_norm in self.attn_norm_structs: if attn_norm.module is not None: yield from attn_norm.named_key_modules() for add_attn_norm in self.add_attn_norm_structs: if add_attn_norm.module is not None: yield from add_attn_norm.named_key_modules() for attn_struct in self.attn_structs: yield from attn_struct.named_key_modules() if self.pre_ffn_norm_struct is not None: if self.pre_attn_norms and self.pre_attn_norms[0] is not self.pre_ffn_norm: yield from self.pre_ffn_norm_struct.named_key_modules() if self.ffn_struct is not None: yield from self.ffn_struct.named_key_modules() if self.pre_add_ffn_norm_struct is not None: if self.pre_attn_add_norms and self.pre_attn_add_norms[0] is not self.pre_add_ffn_norm: yield from self.pre_add_ffn_norm_struct.named_key_modules() if self.add_ffn_struct is not None: yield from self.add_ffn_struct.named_key_modules() @staticmethod def _default_construct( module: DIT_BLOCK_CLS, /, parent: tp.Optional["DiffusionTransformerStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiffusionTransformerBlockStruct": if isinstance(module, (BasicTransformerBlock, SanaTransformerBlock)): parallel = False if isinstance(module, SanaTransformerBlock): norm_type = add_norm_type = "ada_norm_single" else: norm_type = add_norm_type = module.norm_type pre_attn_norms, pre_attn_norm_rnames = [], [] attns, attn_rnames = [], [] pre_attn_add_norms, pre_attn_add_norm_rnames = [], [] assert module.norm1 is not None assert module.attn1 is not None pre_attn_norms.append(module.norm1) pre_attn_norm_rnames.append("norm1") attns.append(module.attn1) attn_rnames.append("attn1") pre_attn_add_norms.append(module.attn1.norm_cross) pre_attn_add_norm_rnames.append("attn1.norm_cross") if module.attn2 is not None: if norm_type == "ada_norm_single": pre_attn_norms.append(None) pre_attn_norm_rnames.append("") else: assert module.norm2 is not None pre_attn_norms.append(module.norm2) pre_attn_norm_rnames.append("norm2") attns.append(module.attn2) attn_rnames.append("attn2") pre_attn_add_norms.append(module.attn2.norm_cross) pre_attn_add_norm_rnames.append("attn2.norm_cross") if norm_type == "ada_norm_single": assert module.norm2 is not None pre_ffn_norm, pre_ffn_norm_rname = module.norm2, "norm2" else: pre_ffn_norm, pre_ffn_norm_rname = module.norm3, "" if module.norm3 is None else "norm3" ffn, ffn_rname = module.ff, "" if module.ff is None else "ff" pre_add_ffn_norm, pre_add_ffn_norm_rname, add_ffn, add_ffn_rname = None, "", None, "" elif isinstance(module, JointTransformerBlock): parallel = False norm_type = "ada_norm_zero" pre_attn_norms, pre_attn_norm_rnames = [module.norm1], ["norm1"] if isinstance(module.norm1_context, AdaLayerNormZero): add_norm_type = "ada_norm_zero" else: add_norm_type = "ada_norm_continous" pre_attn_add_norms, pre_attn_add_norm_rnames = [module.norm1_context], ["norm1_context"] attns, attn_rnames = [module.attn], ["attn"] pre_ffn_norm, pre_ffn_norm_rname = module.norm2, "norm2" ffn, ffn_rname = module.ff, "ff" pre_add_ffn_norm, pre_add_ffn_norm_rname = module.norm2_context, "norm2_context" add_ffn, add_ffn_rname = module.ff_context, "ff_context" elif isinstance(module, FluxSingleTransformerBlock): parallel = True norm_type = add_norm_type = "ada_norm_zero_single" pre_attn_norms, pre_attn_norm_rnames = [module.norm], ["norm"] attns, attn_rnames = [module.attn], ["attn"] pre_attn_add_norms, pre_attn_add_norm_rnames = [], [] pre_ffn_norm, pre_ffn_norm_rname = module.norm, "norm" ffn, ffn_rname = module, "" pre_add_ffn_norm, pre_add_ffn_norm_rname, add_ffn, add_ffn_rname = None, "", None, "" elif isinstance(module, FluxTransformerBlock): parallel = False norm_type = add_norm_type = "ada_norm_zero" pre_attn_norms, pre_attn_norm_rnames = [module.norm1], ["norm1"] attns, attn_rnames = [module.attn], ["attn"] pre_attn_add_norms, pre_attn_add_norm_rnames = [module.norm1_context], ["norm1_context"] pre_ffn_norm, pre_ffn_norm_rname = module.norm2, "norm2" ffn, ffn_rname = module.ff, "ff" pre_add_ffn_norm, pre_add_ffn_norm_rname = module.norm2_context, "norm2_context" add_ffn, add_ffn_rname = module.ff_context, "ff_context" else: raise NotImplementedError(f"Unsupported module type: {type(module)}") return DiffusionTransformerBlockStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, parallel=parallel, pre_attn_norms=pre_attn_norms, pre_attn_add_norms=pre_attn_add_norms, attns=attns, pre_ffn_norm=pre_ffn_norm, ffn=ffn, pre_add_ffn_norm=pre_add_ffn_norm, add_ffn=add_ffn, pre_attn_norm_rnames=pre_attn_norm_rnames, pre_attn_add_norm_rnames=pre_attn_add_norm_rnames, attn_rnames=attn_rnames, pre_ffn_norm_rname=pre_ffn_norm_rname, ffn_rname=ffn_rname, pre_add_ffn_norm_rname=pre_add_ffn_norm_rname, add_ffn_rname=add_ffn_rname, norm_type=norm_type, add_norm_type=add_norm_type, ) @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) norm_rkey = norm_key = cls.norm_rkey add_norm_rkey = add_norm_key = cls.add_norm_rkey key_map[norm_rkey].add(norm_key) key_map[add_norm_rkey].add(add_norm_key) attn_cls = cls.attn_struct_cls attn_key = attn_rkey = cls.attn_rkey qkv_proj_key = qkv_proj_rkey = join_name(attn_key, attn_cls.qkv_proj_rkey, sep="_") out_proj_key = out_proj_rkey = join_name(attn_key, attn_cls.out_proj_rkey, sep="_") add_qkv_proj_key = add_qkv_proj_rkey = join_name(attn_key, attn_cls.add_qkv_proj_rkey, sep="_") add_out_proj_key = add_out_proj_rkey = join_name(attn_key, attn_cls.add_out_proj_rkey, sep="_") key_map[attn_rkey].add(qkv_proj_key) key_map[attn_rkey].add(out_proj_key) if attn_cls.add_qkv_proj_rkey.startswith("add_") and attn_cls.add_out_proj_rkey.startswith("add_"): add_attn_rkey = join_name(attn_rkey, "add", sep="_") key_map[add_attn_rkey].add(add_qkv_proj_key) key_map[add_attn_rkey].add(add_out_proj_key) key_map[qkv_proj_rkey].add(qkv_proj_key) key_map[out_proj_rkey].add(out_proj_key) key_map[add_qkv_proj_rkey].add(add_qkv_proj_key) key_map[add_out_proj_rkey].add(add_out_proj_key) ffn_cls = cls.ffn_struct_cls ffn_key = ffn_rkey = cls.ffn_rkey add_ffn_key = add_ffn_rkey = cls.add_ffn_rkey up_proj_key = up_proj_rkey = join_name(ffn_key, ffn_cls.up_proj_rkey, sep="_") down_proj_key = down_proj_rkey = join_name(ffn_key, ffn_cls.down_proj_rkey, sep="_") add_up_proj_key = add_up_proj_rkey = join_name(add_ffn_key, ffn_cls.up_proj_rkey, sep="_") add_down_proj_key = add_down_proj_rkey = join_name(add_ffn_key, ffn_cls.down_proj_rkey, sep="_") key_map[ffn_rkey].add(up_proj_key) key_map[ffn_rkey].add(down_proj_key) key_map[add_ffn_rkey].add(add_up_proj_key) key_map[add_ffn_rkey].add(add_down_proj_key) key_map[up_proj_rkey].add(up_proj_key) key_map[down_proj_rkey].add(down_proj_key) key_map[add_up_proj_rkey].add(add_up_proj_key) key_map[add_down_proj_rkey].add(add_down_proj_key) return {k: v for k, v in key_map.items() if v} @dataclass(kw_only=True) class DiffusionTransformerStruct(BaseTransformerStruct, DiffusionBlockStruct): # region relative keys proj_in_rkey: tp.ClassVar[str] = "transformer_proj_in" proj_out_rkey: tp.ClassVar[str] = "transformer_proj_out" transformer_block_rkey: tp.ClassVar[str] = "" transformer_block_struct_cls: tp.ClassVar[type[DiffusionTransformerBlockStruct]] = DiffusionTransformerBlockStruct # endregion module: Transformer2DModel = field(repr=False, kw_only=False) # region modules norm_in: nn.GroupNorm | None """Input normalization""" proj_in: nn.Linear | nn.Conv2d """Input projection""" norm_out: nn.GroupNorm | None """Output normalization""" proj_out: nn.Linear | nn.Conv2d """Output projection""" transformer_blocks: nn.ModuleList = field(repr=False) """Transformer blocks""" # endregion # region relative names transformer_blocks_rname: str # endregion # region absolute names transformer_blocks_name: str = field(init=False, repr=False) transformer_block_names: list[str] = field(init=False, repr=False) # endregion # region child structs transformer_block_structs: list[DiffusionTransformerBlockStruct] = field(init=False, repr=False) # endregion # region aliases @property def num_blocks(self) -> int: return len(self.transformer_blocks) @property def block_structs(self) -> list[DiffusionBlockStruct]: return self.transformer_block_structs @property def block_names(self) -> list[str]: return self.transformer_block_names # endregion def __post_init__(self): super().__post_init__() transformer_block_rnames = [ f"{self.transformer_blocks_rname}.{idx}" for idx in range(len(self.transformer_blocks)) ] self.transformer_blocks_name = join_name(self.name, self.transformer_blocks_rname) self.transformer_block_names = [join_name(self.name, rname) for rname in transformer_block_rnames] self.transformer_block_structs = [ self.transformer_block_struct_cls.construct( layer, parent=self, fname="transformer_block", rname=rname, rkey=self.transformer_block_rkey, idx=idx, ) for idx, (layer, rname) in enumerate(zip(self.transformer_blocks, transformer_block_rnames, strict=True)) ] @staticmethod def _default_construct( module: Transformer2DModel, /, parent: BaseModuleStruct = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiffusionTransformerStruct": if isinstance(module, Transformer2DModel): assert module.is_input_continuous, "input must be continuous" transformer_blocks, transformer_blocks_rname = module.transformer_blocks, "transformer_blocks" norm_in, norm_in_rname = module.norm, "norm" proj_in, proj_in_rname = module.proj_in, "proj_in" proj_out, proj_out_rname = module.proj_out, "proj_out" norm_out, norm_out_rname = None, "" else: raise NotImplementedError(f"Unsupported module type: {type(module)}") return DiffusionTransformerStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, norm_in=norm_in, proj_in=proj_in, transformer_blocks=transformer_blocks, proj_out=proj_out, norm_out=norm_out, norm_in_rname=norm_in_rname, proj_in_rname=proj_in_rname, transformer_blocks_rname=transformer_blocks_rname, norm_out_rname=norm_out_rname, proj_out_rname=proj_out_rname, ) @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) proj_in_rkey = proj_in_key = cls.proj_in_rkey proj_out_rkey = proj_out_key = cls.proj_out_rkey key_map[proj_in_rkey].add(proj_in_key) key_map[proj_out_rkey].add(proj_out_key) block_cls = cls.transformer_block_struct_cls block_key = block_rkey = cls.transformer_block_rkey block_key_map = block_cls._get_default_key_map() for rkey, keys in block_key_map.items(): rkey = join_name(block_rkey, rkey, sep="_") for key in keys: key = join_name(block_key, key, sep="_") key_map[rkey].add(key) return {k: v for k, v in key_map.items() if v} @dataclass(kw_only=True) class DiffusionResnetStruct(BaseModuleStruct): # region relative keys conv_rkey: tp.ClassVar[str] = "conv" shortcut_rkey: tp.ClassVar[str] = "shortcut" time_proj_rkey: tp.ClassVar[str] = "time_proj" # endregion module: ResnetBlock2D = field(repr=False, kw_only=False) """the module of Resnet""" config: FeedForwardConfigStruct # region child modules norms: list[nn.GroupNorm] convs: list[list[nn.Conv2d]] shortcut: nn.Conv2d | None time_proj: nn.Linear | None # endregion # region relative names norm_rnames: list[str] conv_rnames: list[list[str]] shortcut_rname: str time_proj_rname: str # endregion # region absolute names norm_names: list[str] = field(init=False, repr=False) conv_names: list[list[str]] = field(init=False, repr=False) shortcut_name: str = field(init=False, repr=False) time_proj_name: str = field(init=False, repr=False) # endregion # region absolute keys conv_key: str = field(init=False, repr=False) shortcut_key: str = field(init=False, repr=False) time_proj_key: str = field(init=False, repr=False) # endregion def __post_init__(self): super().__post_init__() self.norm_names = [join_name(self.name, rname) for rname in self.norm_rnames] self.conv_names = [[join_name(self.name, rname) for rname in rnames] for rnames in self.conv_rnames] self.shortcut_name = join_name(self.name, self.shortcut_rname) self.time_proj_name = join_name(self.name, self.time_proj_rname) self.conv_key = join_name(self.key, self.conv_rkey, sep="_") self.shortcut_key = join_name(self.key, self.shortcut_rkey, sep="_") self.time_proj_key = join_name(self.key, self.time_proj_rkey, sep="_") def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: for convs, names in zip(self.convs, self.conv_names, strict=True): for conv, name in zip(convs, names, strict=True): yield self.conv_key, name, conv, self, "conv" if self.shortcut is not None: yield self.shortcut_key, self.shortcut_name, self.shortcut, self, "shortcut" if self.time_proj is not None: yield self.time_proj_key, self.time_proj_name, self.time_proj, self, "time_proj" @staticmethod def construct( module: ResnetBlock2D, /, parent: BaseModuleStruct = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiffusionResnetStruct": if isinstance(module, ResnetBlock2D): assert module.upsample is None, "upsample must be None" assert module.downsample is None, "downsample must be None" act_type = module.nonlinearity.__class__.__name__.lower() shifted = False if isinstance(module.conv1, ConcatConv2d): conv1_convs, conv1_names = [], [] for conv_idx, conv in enumerate(module.conv1.convs): if isinstance(conv, ShiftedConv2d): shifted = True conv1_convs.append(conv.conv) conv1_names.append(f"conv1.convs.{conv_idx}.conv") else: assert isinstance(conv, nn.Conv2d) conv1_convs.append(conv) conv1_names.append(f"conv1.convs.{conv_idx}") elif isinstance(module.conv1, ShiftedConv2d): shifted = True conv1_convs = [module.conv1.conv] conv1_names = ["conv1.conv"] else: assert isinstance(module.conv1, nn.Conv2d) conv1_convs, conv1_names = [module.conv1], ["conv1"] if isinstance(module.conv2, ConcatConv2d): conv2_convs, conv2_names = [], [] for conv_idx, conv in enumerate(module.conv2.convs): if isinstance(conv, ShiftedConv2d): shifted = True conv2_convs.append(conv.conv) conv2_names.append(f"conv2.convs.{conv_idx}.conv") else: assert isinstance(conv, nn.Conv2d) conv2_convs.append(conv) conv2_names.append(f"conv2.convs.{conv_idx}") elif isinstance(module.conv2, ShiftedConv2d): shifted = True conv2_convs = [module.conv2.conv] conv2_names = ["conv2.conv"] else: assert isinstance(module.conv2, nn.Conv2d) conv2_convs, conv2_names = [module.conv2], ["conv2"] convs, conv_rnames = [conv1_convs, conv2_convs], [conv1_names, conv2_names] norms, norm_rnames = [module.norm1, module.norm2], ["norm1", "norm2"] shortcut, shortcut_rname = module.conv_shortcut, "" if module.conv_shortcut is None else "conv_shortcut" time_proj, time_proj_rname = module.time_emb_proj, "" if module.time_emb_proj is None else "time_emb_proj" if shifted: assert all(hasattr(conv, "shifted") and conv.shifted for level_convs in convs for conv in level_convs) act_type += "_shifted" else: raise NotImplementedError(f"Unsupported module type: {type(module)}") config = FeedForwardConfigStruct( hidden_size=convs[0][0].weight.shape[1], intermediate_size=convs[0][0].weight.shape[0], intermediate_act_type=act_type, num_experts=1, ) return DiffusionResnetStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, norms=norms, convs=convs, shortcut=shortcut, time_proj=time_proj, norm_rnames=norm_rnames, conv_rnames=conv_rnames, shortcut_rname=shortcut_rname, time_proj_rname=time_proj_rname, ) @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) conv_key = conv_rkey = cls.conv_rkey shortcut_key = shortcut_rkey = cls.shortcut_rkey time_proj_key = time_proj_rkey = cls.time_proj_rkey key_map[conv_rkey].add(conv_key) key_map[shortcut_rkey].add(shortcut_key) key_map[time_proj_rkey].add(time_proj_key) return {k: v for k, v in key_map.items() if v} @dataclass(kw_only=True) class UNetBlockStruct(DiffusionBlockStruct): class BlockType(enum.StrEnum): DOWN = "down" MID = "mid" UP = "up" # region relative keys resnet_rkey: tp.ClassVar[str] = "resblock" sampler_rkey: tp.ClassVar[str] = "sample" transformer_rkey: tp.ClassVar[str] = "" resnet_struct_cls: tp.ClassVar[type[DiffusionResnetStruct]] = DiffusionResnetStruct transformer_struct_cls: tp.ClassVar[type[DiffusionTransformerStruct]] = DiffusionTransformerStruct # endregion parent: tp.Optional["UNetStruct"] = field(repr=False) # region attributes block_type: BlockType # endregion # region modules resnets: nn.ModuleList = field(repr=False) transformers: nn.ModuleList = field(repr=False) sampler: nn.Conv2d | None # endregion # region relative names resnets_rname: str transformers_rname: str sampler_rname: str # endregion # region absolute names resnets_name: str = field(init=False, repr=False) transformers_name: str = field(init=False, repr=False) sampler_name: str = field(init=False, repr=False) resnet_names: list[str] = field(init=False, repr=False) transformer_names: list[str] = field(init=False, repr=False) # endregion # region absolute keys sampler_key: str = field(init=False, repr=False) # endregion # region child structs resnet_structs: list[DiffusionResnetStruct] = field(init=False, repr=False) transformer_structs: list[DiffusionTransformerStruct] = field(init=False, repr=False) # endregion @property def downsample(self) -> nn.Conv2d | None: return self.sampler if self.is_downsample_block() else None @property def upsample(self) -> nn.Conv2d | None: return self.sampler if self.is_upsample_block() else None def __post_init__(self) -> None: super().__post_init__() if self.is_downsample_block(): assert len(self.resnets) == len(self.transformers) or len(self.transformers) == 0 if self.parent is not None and isinstance(self.parent, UNetStruct): assert self.rname == f"{self.parent.down_blocks_rname}.{self.idx}" elif self.is_mid_block(): assert len(self.resnets) == len(self.transformers) + 1 or len(self.transformers) == 0 if self.parent is not None and isinstance(self.parent, UNetStruct): assert self.rname == self.parent.mid_block_name assert self.idx == 0 else: assert self.is_upsample_block(), f"Unsupported block type: {self.block_type}" assert len(self.resnets) == len(self.transformers) or len(self.transformers) == 0 if self.parent is not None and isinstance(self.parent, UNetStruct): assert self.rname == f"{self.parent.up_blocks_rname}.{self.idx}" resnet_rnames = [f"{self.resnets_rname}.{idx}" for idx in range(len(self.resnets))] transformer_rnames = [f"{self.transformers_rname}.{idx}" for idx in range(len(self.transformers))] self.resnets_name = join_name(self.name, self.resnets_rname) self.transformers_name = join_name(self.name, self.transformers_rname) self.resnet_names = [join_name(self.name, rname) for rname in resnet_rnames] self.transformer_names = [join_name(self.name, rname) for rname in transformer_rnames] self.sampler_name = join_name(self.name, self.sampler_rname) self.sampler_key = join_name(self.key, self.sampler_rkey, sep="_") self.resnet_structs = [ self.resnet_struct_cls.construct( resnet, parent=self, fname="resnet", rname=rname, rkey=self.resnet_rkey, idx=idx ) for idx, (resnet, rname) in enumerate(zip(self.resnets, resnet_rnames, strict=True)) ] self.transformer_structs = [ self.transformer_struct_cls.construct( transformer, parent=self, fname="transformer", rname=rname, rkey=self.transformer_rkey, idx=idx ) for idx, (transformer, rname) in enumerate(zip(self.transformers, transformer_rnames, strict=True)) ] def is_downsample_block(self) -> bool: return self.block_type == self.BlockType.DOWN def is_mid_block(self) -> bool: return self.block_type == self.BlockType.MID def is_upsample_block(self) -> bool: return self.block_type == self.BlockType.UP def has_downsample(self) -> bool: return self.is_downsample_block() and self.sampler is not None def has_upsample(self) -> bool: return self.is_upsample_block() and self.sampler is not None def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: for resnet in self.resnet_structs: yield from resnet.named_key_modules() for transformer in self.transformer_structs: yield from transformer.named_key_modules() if self.sampler is not None: yield self.sampler_key, self.sampler_name, self.sampler, self, "sampler" def iter_attention_structs(self) -> tp.Generator[DiffusionAttentionStruct, None, None]: for transformer in self.transformer_structs: yield from transformer.iter_attention_structs() def iter_transformer_block_structs(self) -> tp.Generator[DiffusionTransformerBlockStruct, None, None]: for transformer in self.transformer_structs: yield from transformer.iter_transformer_block_structs() @staticmethod def _default_construct( module: UNET_BLOCK_CLS, /, parent: tp.Optional["UNetStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "UNetBlockStruct": resnets, resnets_rname = module.resnets, "resnets" if isinstance(module, (DownBlock2D, CrossAttnDownBlock2D)): block_type = UNetBlockStruct.BlockType.DOWN if isinstance(module, CrossAttnDownBlock2D) and module.attentions is not None: transformers, transformers_rname = module.attentions, "attentions" else: transformers, transformers_rname = [], "" if module.downsamplers is None: sampler, sampler_rname = None, "" else: assert len(module.downsamplers) == 1 downsampler = module.downsamplers[0] assert isinstance(downsampler, Downsample2D) sampler, sampler_rname = downsampler.conv, "downsamplers.0.conv" assert isinstance(sampler, nn.Conv2d) elif isinstance(module, (UNetMidBlock2D, UNetMidBlock2DCrossAttn)): block_type = UNetBlockStruct.BlockType.MID if (isinstance(module, UNetMidBlock2DCrossAttn) or module.add_attention) and module.attentions is not None: transformers, transformers_rname = module.attentions, "attentions" else: transformers, transformers_rname = [], "" sampler, sampler_rname = None, "" elif isinstance(module, (UpBlock2D, CrossAttnUpBlock2D)): block_type = UNetBlockStruct.BlockType.UP if isinstance(module, CrossAttnUpBlock2D) and module.attentions is not None: transformers, transformers_rname = module.attentions, "attentions" else: transformers, transformers_rname = [], "" if module.upsamplers is None: sampler, sampler_rname = None, "" else: assert len(module.upsamplers) == 1 upsampler = module.upsamplers[0] assert isinstance(upsampler, Upsample2D) sampler, sampler_rname = upsampler.conv, "upsamplers.0.conv" assert isinstance(sampler, nn.Conv2d) else: raise NotImplementedError(f"Unsupported module type: {type(module)}") return UNetBlockStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, block_type=block_type, resnets=resnets, transformers=transformers, sampler=sampler, resnets_rname=resnets_rname, transformers_rname=transformers_rname, sampler_rname=sampler_rname, ) @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) resnet_cls = cls.resnet_struct_cls resnet_key = resnet_rkey = cls.resnet_rkey resnet_key_map = resnet_cls._get_default_key_map() for rkey, keys in resnet_key_map.items(): rkey = join_name(resnet_rkey, rkey, sep="_") for key in keys: key = join_name(resnet_key, key, sep="_") key_map[rkey].add(key) key_map[resnet_rkey].add(key) transformer_cls = cls.transformer_struct_cls transformer_key = transformer_rkey = cls.transformer_rkey transformer_key_map = transformer_cls._get_default_key_map() for rkey, keys in transformer_key_map.items(): trkey = join_name(transformer_rkey, rkey, sep="_") for key in keys: key = join_name(transformer_key, key, sep="_") key_map[rkey].add(key) key_map[trkey].add(key) return {k: v for k, v in key_map.items() if v} @dataclass(kw_only=True) class UNetStruct(DiffusionModelStruct): # region relative keys input_embed_rkey: tp.ClassVar[str] = "input_embed" """hidden_states = input_embed(hidden_states), e.g., conv_in""" time_embed_rkey: tp.ClassVar[str] = "time_embed" """temb = time_embed(timesteps, hidden_states)""" add_time_embed_rkey: tp.ClassVar[str] = "time_embed" """add_temb = add_time_embed(timesteps, encoder_hidden_states)""" text_embed_rkey: tp.ClassVar[str] = "text_embed" """encoder_hidden_states = text_embed(encoder_hidden_states)""" norm_out_rkey: tp.ClassVar[str] = "output_embed" """hidden_states = norm_out(hidden_states), e.g., conv_norm_out""" proj_out_rkey: tp.ClassVar[str] = "output_embed" """hidden_states = output_embed(hidden_states), e.g., conv_out""" down_block_rkey: tp.ClassVar[str] = "down" mid_block_rkey: tp.ClassVar[str] = "mid" up_block_rkey: tp.ClassVar[str] = "up" down_block_struct_cls: tp.ClassVar[type[UNetBlockStruct]] = UNetBlockStruct mid_block_struct_cls: tp.ClassVar[type[UNetBlockStruct]] = UNetBlockStruct up_block_struct_cls: tp.ClassVar[type[UNetBlockStruct]] = UNetBlockStruct # endregion # region child modules # region pre-block modules input_embed: nn.Conv2d time_embed: TimestepEmbedding """Time embedding""" add_time_embed: ( TextTimeEmbedding | TextImageTimeEmbedding | TimestepEmbedding | ImageTimeEmbedding | ImageHintTimeEmbedding | None ) """Additional time embedding""" text_embed: nn.Linear | ImageProjection | TextImageProjection | None """Text embedding""" # region post-block modules norm_out: nn.GroupNorm | None proj_out: nn.Conv2d # endregion # endregion down_blocks: nn.ModuleList = field(repr=False) mid_block: nn.Module = field(repr=False) up_blocks: nn.ModuleList = field(repr=False) # endregion # region relative names input_embed_rname: str time_embed_rname: str add_time_embed_rname: str text_embed_rname: str norm_out_rname: str proj_out_rname: str down_blocks_rname: str mid_block_rname: str up_blocks_rname: str # endregion # region absolute names input_embed_name: str = field(init=False, repr=False) time_embed_name: str = field(init=False, repr=False) add_time_embed_name: str = field(init=False, repr=False) text_embed_name: str = field(init=False, repr=False) norm_out_name: str = field(init=False, repr=False) proj_out_name: str = field(init=False, repr=False) down_blocks_name: str = field(init=False, repr=False) mid_block_name: str = field(init=False, repr=False) up_blocks_name: str = field(init=False, repr=False) down_block_names: list[str] = field(init=False, repr=False) up_block_names: list[str] = field(init=False, repr=False) # endregion # region absolute keys input_embed_key: str = field(init=False, repr=False) time_embed_key: str = field(init=False, repr=False) add_time_embed_key: str = field(init=False, repr=False) text_embed_key: str = field(init=False, repr=False) norm_out_key: str = field(init=False, repr=False) proj_out_key: str = field(init=False, repr=False) # endregion # region child structs down_block_structs: list[UNetBlockStruct] = field(init=False, repr=False) mid_block_struct: UNetBlockStruct = field(init=False, repr=False) up_block_structs: list[UNetBlockStruct] = field(init=False, repr=False) # endregion @property def num_down_blocks(self) -> int: return len(self.down_blocks) @property def num_up_blocks(self) -> int: return len(self.up_blocks) @property def num_blocks(self) -> int: return self.num_down_blocks + 1 + self.num_up_blocks @property def block_structs(self) -> list[UNetBlockStruct]: return [*self.down_block_structs, self.mid_block_struct, *self.up_block_structs] def __post_init__(self) -> None: super().__post_init__() down_block_rnames = [f"{self.down_blocks_rname}.{idx}" for idx in range(len(self.down_blocks))] up_block_rnames = [f"{self.up_blocks_rname}.{idx}" for idx in range(len(self.up_blocks))] self.down_blocks_name = join_name(self.name, self.down_blocks_rname) self.mid_block_name = join_name(self.name, self.mid_block_rname) self.up_blocks_name = join_name(self.name, self.up_blocks_rname) self.down_block_names = [join_name(self.name, rname) for rname in down_block_rnames] self.up_block_names = [join_name(self.name, rname) for rname in up_block_rnames] self.pre_module_structs = {} for fname in ("time_embed", "add_time_embed", "text_embed", "input_embed"): module, rname, rkey = getattr(self, fname), getattr(self, f"{fname}_rname"), getattr(self, f"{fname}_rkey") setattr(self, f"{fname}_key", join_name(self.key, rkey, sep="_")) if module is not None or rname: setattr(self, f"{fname}_name", join_name(self.name, rname)) else: setattr(self, f"{fname}_name", "") if module is not None: assert rname, f"rname of {fname} must not be empty" self.pre_module_structs[getattr(self, f"{fname}_name")] = DiffusionModuleStruct( module=module, parent=self, fname=fname, rname=rname, rkey=rkey ) self.post_module_structs = {} for fname in ("norm_out", "proj_out"): module, rname, rkey = getattr(self, fname), getattr(self, f"{fname}_rname"), getattr(self, f"{fname}_rkey") setattr(self, f"{fname}_key", join_name(self.key, rkey, sep="_")) if module is not None or rname: setattr(self, f"{fname}_name", join_name(self.name, rname)) else: setattr(self, f"{fname}_name", "") if module is not None: self.post_module_structs[getattr(self, f"{fname}_name")] = DiffusionModuleStruct( module=module, parent=self, fname=fname, rname=rname, rkey=rkey ) self.down_block_structs = [ self.down_block_struct_cls.construct( block, parent=self, fname="down_block", rname=rname, rkey=self.down_block_rkey, idx=idx ) for idx, (block, rname) in enumerate(zip(self.down_blocks, down_block_rnames, strict=True)) ] self.mid_block_struct = self.mid_block_struct_cls.construct( self.mid_block, parent=self, fname="mid_block", rname=self.mid_block_name, rkey=self.mid_block_rkey ) self.up_block_structs = [ self.up_block_struct_cls.construct( block, parent=self, fname="up_block", rname=rname, rkey=self.up_block_rkey, idx=idx ) for idx, (block, rname) in enumerate(zip(self.up_blocks, up_block_rnames, strict=True)) ] def get_prev_module_keys(self) -> tuple[str, ...]: return tuple({self.input_embed_key, self.time_embed_key, self.add_time_embed_key, self.text_embed_key}) def get_post_module_keys(self) -> tuple[str, ...]: return tuple({self.norm_out_key, self.proj_out_key}) def _get_iter_block_activations_args( self, **input_kwargs ) -> tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]: layers, layer_structs, recomputes, use_prev_layer_outputs = [], [], [], [] num_down_blocks = len(self.down_blocks) num_up_blocks = len(self.up_blocks) layers.extend(self.down_blocks) layer_structs.extend(self.down_block_structs) use_prev_layer_outputs.append(False) use_prev_layer_outputs.extend([True] * (num_down_blocks - 1)) recomputes.append(False) # region check whether down block's outputs are changed _mid_block_additional_residual = input_kwargs.get("mid_block_additional_residual", None) _down_block_additional_residuals = input_kwargs.get("down_block_additional_residuals", None) _is_adapter = input_kwargs.get("down_intrablock_additional_residuals", None) is not None if not _is_adapter and _mid_block_additional_residual is None and _down_block_additional_residuals is not None: _is_adapter = True for down_block in self.down_blocks: if hasattr(down_block, "has_cross_attention") and down_block.has_cross_attention: # outputs unchanged recomputes.append(False) elif _is_adapter: # outputs changed recomputes.append(True) else: # outputs unchanged recomputes.append(False) # endregion layers.append(self.mid_block) layer_structs.append(self.mid_block_struct) use_prev_layer_outputs.append(False) # recomputes is already appened in the previous down blocks layers.extend(self.up_blocks) layer_structs.extend(self.up_block_structs) use_prev_layer_outputs.append(False) use_prev_layer_outputs.extend([True] * (num_up_blocks - 1)) recomputes += [True] * num_up_blocks return layers, layer_structs, recomputes, use_prev_layer_outputs @staticmethod def _default_construct( module: tp.Union[UNET_PIPELINE_CLS, UNET_CLS], /, parent: tp.Optional[BaseModuleStruct] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "UNetStruct": if isinstance(module, UNET_PIPELINE_CLS): module = module.unet if isinstance(module, (UNet2DConditionModel, UNet2DModel)): input_embed, time_embed = module.conv_in, module.time_embedding input_embed_rname, time_embed_rname = "conv_in", "time_embedding" text_embed, text_embed_rname = None, "" add_time_embed, add_time_embed_rname = None, "" if hasattr(module, "encoder_hid_proj"): text_embed, text_embed_rname = module.encoder_hid_proj, "encoder_hid_proj" if hasattr(module, "add_embedding"): add_time_embed, add_time_embed_rname = module.add_embedding, "add_embedding" norm_out, norm_out_rname = module.conv_norm_out, "conv_norm_out" proj_out, proj_out_rname = module.conv_out, "conv_out" down_blocks, down_blocks_rname = module.down_blocks, "down_blocks" mid_block, mid_block_rname = module.mid_block, "mid_block" up_blocks, up_blocks_rname = module.up_blocks, "up_blocks" return UNetStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, input_embed=input_embed, time_embed=time_embed, add_time_embed=add_time_embed, text_embed=text_embed, norm_out=norm_out, proj_out=proj_out, down_blocks=down_blocks, mid_block=mid_block, up_blocks=up_blocks, input_embed_rname=input_embed_rname, time_embed_rname=time_embed_rname, add_time_embed_rname=add_time_embed_rname, text_embed_rname=text_embed_rname, norm_out_rname=norm_out_rname, proj_out_rname=proj_out_rname, down_blocks_rname=down_blocks_rname, mid_block_rname=mid_block_rname, up_blocks_rname=up_blocks_rname, ) raise NotImplementedError(f"Unsupported module type: {type(module)}") @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) for idx, (block_key, block_cls) in enumerate( ( (cls.down_block_rkey, cls.down_block_struct_cls), (cls.mid_block_rkey, cls.mid_block_struct_cls), (cls.up_block_rkey, cls.up_block_struct_cls), ) ): block_key_map: dict[str, set[str]] = defaultdict(set) if idx != 1: sampler_key = join_name(block_key, block_cls.sampler_rkey, sep="_") sampler_rkey = block_cls.sampler_rkey block_key_map[sampler_rkey].add(sampler_key) _block_key_map = block_cls._get_default_key_map() for rkey, keys in _block_key_map.items(): for key in keys: key = join_name(block_key, key, sep="_") block_key_map[rkey].add(key) for rkey, keys in block_key_map.items(): key_map[rkey].update(keys) if block_key: key_map[block_key].update(keys) keys: set[str] = set() keys.add(cls.input_embed_rkey) keys.add(cls.time_embed_rkey) keys.add(cls.add_time_embed_rkey) keys.add(cls.text_embed_rkey) keys.add(cls.norm_out_rkey) keys.add(cls.proj_out_rkey) for mapped_keys in key_map.values(): for key in mapped_keys: keys.add(key) if "embed" not in keys and "embed" not in key_map: key_map["embed"].add(cls.input_embed_rkey) key_map["embed"].add(cls.time_embed_rkey) key_map["embed"].add(cls.add_time_embed_rkey) key_map["embed"].add(cls.text_embed_rkey) key_map["embed"].add(cls.norm_out_rkey) key_map["embed"].add(cls.proj_out_rkey) for key in keys: if key in key_map: key_map[key].clear() key_map[key].add(key) return {k: v for k, v in key_map.items() if v} @dataclass(kw_only=True) class DiTStruct(DiffusionModelStruct, DiffusionTransformerStruct): # region relative keys input_embed_rkey: tp.ClassVar[str] = "input_embed" """hidden_states = input_embed(hidden_states), e.g., conv_in""" time_embed_rkey: tp.ClassVar[str] = "time_embed" """temb = time_embed(timesteps)""" text_embed_rkey: tp.ClassVar[str] = "text_embed" """encoder_hidden_states = text_embed(encoder_hidden_states)""" norm_in_rkey: tp.ClassVar[str] = "input_embed" """hidden_states = norm_in(hidden_states)""" proj_in_rkey: tp.ClassVar[str] = "input_embed" """hidden_states = proj_in(hidden_states)""" norm_out_rkey: tp.ClassVar[str] = "output_embed" """hidden_states = norm_out(hidden_states)""" proj_out_rkey: tp.ClassVar[str] = "output_embed" """hidden_states = proj_out(hidden_states)""" transformer_block_rkey: tp.ClassVar[str] = "" # endregion # region child modules input_embed: PatchEmbed time_embed: AdaLayerNormSingle | CombinedTimestepTextProjEmbeddings | TimestepEmbedding text_embed: PixArtAlphaTextProjection | nn.Linear norm_in: None = field(init=False, repr=False, default=None) proj_in: None = field(init=False, repr=False, default=None) norm_out: nn.LayerNorm | AdaLayerNormContinuous | None proj_out: nn.Linear # endregion # region relative names input_embed_rname: str time_embed_rname: str text_embed_rname: str norm_in_rname: str = field(init=False, repr=False, default="") proj_in_rname: str = field(init=False, repr=False, default="") norm_out_rname: str proj_out_rname: str # endregion # region absolute names input_embed_name: str = field(init=False, repr=False) time_embed_name: str = field(init=False, repr=False) text_embed_name: str = field(init=False, repr=False) # endregion # region absolute keys input_embed_key: str = field(init=False, repr=False) time_embed_key: str = field(init=False, repr=False) text_embed_key: str = field(init=False, repr=False) norm_out_key: str = field(init=False, repr=False) # endregion @property def num_blocks(self) -> int: return len(self.transformer_blocks) @property def block_structs(self) -> list[DiffusionTransformerBlockStruct]: return self.transformer_block_structs @property def block_names(self) -> list[str]: return self.transformer_block_names def __post_init__(self) -> None: super().__post_init__() self.pre_module_structs = {} for fname in ("input_embed", "time_embed", "text_embed"): module, rname, rkey = getattr(self, fname), getattr(self, f"{fname}_rname"), getattr(self, f"{fname}_rkey") setattr(self, f"{fname}_key", join_name(self.key, rkey, sep="_")) if module is not None or rname: setattr(self, f"{fname}_name", join_name(self.name, rname)) else: setattr(self, f"{fname}_name", "") if module is not None: self.pre_module_structs.setdefault( getattr(self, f"{fname}_name"), DiffusionModuleStruct(module=module, parent=self, fname=fname, rname=rname, rkey=rkey), ) self.post_module_structs = {} self.norm_out_key = join_name(self.key, self.norm_out_rkey, sep="_") for fname in ("norm_out", "proj_out"): module, rname, rkey = getattr(self, fname), getattr(self, f"{fname}_rname"), getattr(self, f"{fname}_rkey") if module is not None: self.post_module_structs.setdefault( getattr(self, f"{fname}_name"), DiffusionModuleStruct(module=module, parent=self, fname=fname, rname=rname, rkey=rkey), ) def get_prev_module_keys(self) -> tuple[str, ...]: return tuple({self.input_embed_key, self.time_embed_key, self.text_embed_key}) def get_post_module_keys(self) -> tuple[str, ...]: return tuple({self.norm_out_key, self.proj_out_key}) def _get_iter_block_activations_args( self, **input_kwargs ) -> tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]: """ Get the arguments for iterating over the layers and their activations. Args: skip_pre_modules (`bool`): Whether to skip the pre-modules skip_post_modules (`bool`): Whether to skip the post-modules Returns: `tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]`: the layers, the layer structs, the recomputes, and the use_prev_layer_outputs """ layers, layer_structs, recomputes, use_prev_layer_outputs = [], [], [], [] layers.extend(self.transformer_blocks) layer_structs.extend(self.transformer_block_structs) use_prev_layer_outputs.append(False) use_prev_layer_outputs.extend([True] * (len(self.transformer_blocks) - 1)) recomputes.extend([False] * len(self.transformer_blocks)) return layers, layer_structs, recomputes, use_prev_layer_outputs @staticmethod def _default_construct( module: tp.Union[DIT_PIPELINE_CLS, DIT_CLS], /, parent: tp.Optional[BaseModuleStruct] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "DiTStruct": if isinstance(module, DIT_PIPELINE_CLS): module = module.transformer if isinstance(module, FluxTransformer2DModel): return FluxStruct.construct(module, parent=parent, fname=fname, rname=rname, rkey=rkey, idx=idx, **kwargs) else: if isinstance(module, PixArtTransformer2DModel): input_embed, input_embed_rname = module.pos_embed, "pos_embed" time_embed, time_embed_rname = module.adaln_single, "adaln_single" text_embed, text_embed_rname = module.caption_projection, "caption_projection" norm_out, norm_out_rname = module.norm_out, "norm_out" proj_out, proj_out_rname = module.proj_out, "proj_out" transformer_blocks, transformer_blocks_rname = module.transformer_blocks, "transformer_blocks" # ! in fact, `module.adaln_single.emb` is `time_embed`, # ! `module.adaln_single.linear` is `transformer_norm` # ! but since PixArt shares the `transformer_norm`, we categorize it as `time_embed` elif isinstance(module, SanaTransformer2DModel): input_embed, input_embed_rname = module.patch_embed, "patch_embed" time_embed, time_embed_rname = module.time_embed, "time_embed" text_embed, text_embed_rname = module.caption_projection, "caption_projection" norm_out, norm_out_rname = module.norm_out, "norm_out" proj_out, proj_out_rname = module.proj_out, "proj_out" transformer_blocks, transformer_blocks_rname = module.transformer_blocks, "transformer_blocks" elif isinstance(module, SD3Transformer2DModel): input_embed, input_embed_rname = module.pos_embed, "pos_embed" time_embed, time_embed_rname = module.time_text_embed, "time_text_embed" text_embed, text_embed_rname = module.context_embedder, "context_embedder" norm_out, norm_out_rname = module.norm_out, "norm_out" proj_out, proj_out_rname = module.proj_out, "proj_out" transformer_blocks, transformer_blocks_rname = module.transformer_blocks, "transformer_blocks" else: raise NotImplementedError(f"Unsupported module type: {type(module)}") return DiTStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, input_embed=input_embed, time_embed=time_embed, text_embed=text_embed, transformer_blocks=transformer_blocks, norm_out=norm_out, proj_out=proj_out, input_embed_rname=input_embed_rname, time_embed_rname=time_embed_rname, text_embed_rname=text_embed_rname, norm_out_rname=norm_out_rname, proj_out_rname=proj_out_rname, transformer_blocks_rname=transformer_blocks_rname, ) @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) block_cls = cls.transformer_block_struct_cls block_key = block_rkey = cls.transformer_block_rkey block_key_map = block_cls._get_default_key_map() for rkey, keys in block_key_map.items(): brkey = join_name(block_rkey, rkey, sep="_") for key in keys: key = join_name(block_key, key, sep="_") key_map[rkey].add(key) key_map[brkey].add(key) if block_rkey: key_map[block_rkey].add(key) keys: set[str] = set() keys.add(cls.input_embed_rkey) keys.add(cls.time_embed_rkey) keys.add(cls.text_embed_rkey) keys.add(cls.norm_in_rkey) keys.add(cls.proj_in_rkey) keys.add(cls.norm_out_rkey) keys.add(cls.proj_out_rkey) for mapped_keys in key_map.values(): for key in mapped_keys: keys.add(key) if "embed" not in keys and "embed" not in key_map: key_map["embed"].add(cls.input_embed_rkey) key_map["embed"].add(cls.time_embed_rkey) key_map["embed"].add(cls.text_embed_rkey) key_map["embed"].add(cls.norm_in_rkey) key_map["embed"].add(cls.proj_in_rkey) key_map["embed"].add(cls.norm_out_rkey) key_map["embed"].add(cls.proj_out_rkey) for key in keys: if key in key_map: key_map[key].clear() key_map[key].add(key) return {k: v for k, v in key_map.items() if v} @dataclass(kw_only=True) class FluxStruct(DiTStruct): # region relative keys single_transformer_block_rkey: tp.ClassVar[str] = "" single_transformer_block_struct_cls: tp.ClassVar[type[DiffusionTransformerBlockStruct]] = ( DiffusionTransformerBlockStruct ) # endregion module: FluxTransformer2DModel = field(repr=False, kw_only=False) """the module of FluxTransformer2DModel""" # region child modules input_embed: nn.Linear time_embed: CombinedTimestepGuidanceTextProjEmbeddings | CombinedTimestepTextProjEmbeddings text_embed: nn.Linear single_transformer_blocks: nn.ModuleList = field(repr=False) # endregion # region relative names single_transformer_blocks_rname: str # endregion # region absolute names single_transformer_blocks_name: str = field(init=False, repr=False) single_transformer_block_names: list[str] = field(init=False, repr=False) # endregion # region child structs single_transformer_block_structs: list[DiffusionTransformerBlockStruct] = field(init=False) # endregion @property def num_blocks(self) -> int: return len(self.transformer_block_structs) + len(self.single_transformer_block_structs) @property def block_structs(self) -> list[DiffusionTransformerBlockStruct]: return [*self.transformer_block_structs, *self.single_transformer_block_structs] @property def block_names(self) -> list[str]: return [*self.transformer_block_names, *self.single_transformer_block_names] def __post_init__(self) -> None: super().__post_init__() single_transformer_block_rnames = [ f"{self.single_transformer_blocks_rname}.{idx}" for idx in range(len(self.single_transformer_blocks)) ] self.single_transformer_blocks_name = join_name(self.name, self.single_transformer_blocks_rname) self.single_transformer_block_names = [join_name(self.name, rname) for rname in single_transformer_block_rnames] self.single_transformer_block_structs = [ self.single_transformer_block_struct_cls.construct( block, parent=self, fname="single_transformer_block", rname=rname, rkey=self.single_transformer_block_rkey, idx=idx, ) for idx, (block, rname) in enumerate( zip(self.single_transformer_blocks, single_transformer_block_rnames, strict=True) ) ] def _get_iter_block_activations_args( self, **input_kwargs ) -> tuple[list[nn.Module], list[DiffusionModuleStruct | DiffusionBlockStruct], list[bool], list[bool]]: layers, layer_structs, recomputes, use_prev_layer_outputs = super()._get_iter_block_activations_args() layers.extend(self.single_transformer_blocks) layer_structs.extend(self.single_transformer_block_structs) use_prev_layer_outputs.append(False) use_prev_layer_outputs.extend([True] * (len(self.single_transformer_blocks) - 1)) recomputes.extend([False] * len(self.single_transformer_blocks)) return layers, layer_structs, recomputes, use_prev_layer_outputs @staticmethod def _default_construct( module: tp.Union[FluxPipeline, FluxControlPipeline, FluxTransformer2DModel], /, parent: tp.Optional[BaseModuleStruct] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "FluxStruct": if isinstance(module, (FluxPipeline, FluxControlPipeline)): module = module.transformer if isinstance(module, FluxTransformer2DModel): input_embed, time_embed, text_embed = module.x_embedder, module.time_text_embed, module.context_embedder input_embed_rname, time_embed_rname, text_embed_rname = "x_embedder", "time_text_embed", "context_embedder" norm_out, norm_out_rname = module.norm_out, "norm_out" proj_out, proj_out_rname = module.proj_out, "proj_out" transformer_blocks, transformer_blocks_rname = module.transformer_blocks, "transformer_blocks" single_transformer_blocks = module.single_transformer_blocks single_transformer_blocks_rname = "single_transformer_blocks" return FluxStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, input_embed=input_embed, time_embed=time_embed, text_embed=text_embed, transformer_blocks=transformer_blocks, single_transformer_blocks=single_transformer_blocks, norm_out=norm_out, proj_out=proj_out, input_embed_rname=input_embed_rname, time_embed_rname=time_embed_rname, text_embed_rname=text_embed_rname, norm_out_rname=norm_out_rname, proj_out_rname=proj_out_rname, transformer_blocks_rname=transformer_blocks_rname, single_transformer_blocks_rname=single_transformer_blocks_rname, ) raise NotImplementedError(f"Unsupported module type: {type(module)}") @classmethod def _get_default_key_map(cls) -> dict[str, set[str]]: """Get the default allowed keys.""" key_map: dict[str, set[str]] = defaultdict(set) for block_rkey, block_cls in ( (cls.transformer_block_rkey, cls.transformer_block_struct_cls), (cls.single_transformer_block_rkey, cls.single_transformer_block_struct_cls), ): block_key = block_rkey block_key_map = block_cls._get_default_key_map() for rkey, keys in block_key_map.items(): brkey = join_name(block_rkey, rkey, sep="_") for key in keys: key = join_name(block_key, key, sep="_") key_map[rkey].add(key) key_map[brkey].add(key) if block_rkey: key_map[block_rkey].add(key) keys: set[str] = set() keys.add(cls.input_embed_rkey) keys.add(cls.time_embed_rkey) keys.add(cls.text_embed_rkey) keys.add(cls.norm_in_rkey) keys.add(cls.proj_in_rkey) keys.add(cls.norm_out_rkey) keys.add(cls.proj_out_rkey) for mapped_keys in key_map.values(): for key in mapped_keys: keys.add(key) if "embed" not in keys and "embed" not in key_map: key_map["embed"].add(cls.input_embed_rkey) key_map["embed"].add(cls.time_embed_rkey) key_map["embed"].add(cls.text_embed_rkey) key_map["embed"].add(cls.norm_in_rkey) key_map["embed"].add(cls.proj_in_rkey) key_map["embed"].add(cls.norm_out_rkey) key_map["embed"].add(cls.proj_out_rkey) for key in keys: if key in key_map: key_map[key].clear() key_map[key].add(key) return {k: v for k, v in key_map.items() if v} DiffusionAttentionStruct.register_factory(Attention, DiffusionAttentionStruct._default_construct) DiffusionFeedForwardStruct.register_factory( (FeedForward, FluxSingleTransformerBlock, GLUMBConv), DiffusionFeedForwardStruct._default_construct ) DiffusionTransformerBlockStruct.register_factory(DIT_BLOCK_CLS, DiffusionTransformerBlockStruct._default_construct) UNetBlockStruct.register_factory(UNET_BLOCK_CLS, UNetBlockStruct._default_construct) UNetStruct.register_factory(tp.Union[UNET_PIPELINE_CLS, UNET_CLS], UNetStruct._default_construct) FluxStruct.register_factory( tp.Union[FluxPipeline, FluxControlPipeline, FluxTransformer2DModel], FluxStruct._default_construct ) DiTStruct.register_factory(tp.Union[DIT_PIPELINE_CLS, DIT_CLS], DiTStruct._default_construct) DiffusionTransformerStruct.register_factory(Transformer2DModel, DiffusionTransformerStruct._default_construct) DiffusionModelStruct.register_factory(tp.Union[PIPELINE_CLS, MODEL_CLS], DiffusionModelStruct._default_construct) ================================================ FILE: deepcompressor/app/diffusion/pipeline/__init__.py ================================================ # -*- coding: utf-8 -*- from .config import DiffusionPipelineConfig ================================================ FILE: deepcompressor/app/diffusion/pipeline/config.py ================================================ # -*- coding: utf-8 -*- """Diffusion pipeline configuration module.""" import gc import typing as tp from dataclasses import dataclass, field import torch from diffusers.pipelines import ( AutoPipelineForText2Image, DiffusionPipeline, FluxControlPipeline, FluxFillPipeline, SanaPipeline, ) from omniconfig import configclass from torch import nn from transformers import PreTrainedModel, PreTrainedTokenizer, T5EncoderModel from deepcompressor.data.utils.dtype import eval_dtype from deepcompressor.quantizer.processor import Quantizer from deepcompressor.utils import tools from deepcompressor.utils.hooks import AccumBranchHook, ProcessHook from ....nn.patch.linear import ConcatLinear, ShiftedLinear from ....nn.patch.lowrank import LowRankBranch from ..nn.patch import ( replace_fused_linear_with_concat_linear, replace_up_block_conv_with_concat_conv, shift_input_activations, ) __all__ = ["DiffusionPipelineConfig"] @configclass @dataclass class LoRAConfig: """LoRA configuration. Args: path (`str`): The path of the LoRA branch. weight_name (`str`): The weight name of the LoRA branch. alpha (`float`): The alpha value of the LoRA branch. """ path: str weight_name: str alpha: float = 1.0 @configclass @dataclass class DiffusionPipelineConfig: """Diffusion pipeline configuration. Args: name (`str`): The name of the pipeline. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The data type of the pipeline. device (`str`, *optional*, defaults to `"cuda"`): The device of the pipeline. shift_activations (`bool`, *optional*, defaults to `False`): Whether to shift activations. """ _pipeline_factories: tp.ClassVar[ dict[str, tp.Callable[[str, str, torch.dtype, torch.device, bool], DiffusionPipeline]] ] = {} _text_extractors: tp.ClassVar[ dict[ str, tp.Callable[ [DiffusionPipeline, tuple[type[PreTrainedModel], ...]], list[tuple[str, PreTrainedModel, PreTrainedTokenizer]], ], ] ] = {} name: str path: str = "" dtype: torch.dtype = field( default_factory=lambda s=torch.float32: eval_dtype(s, with_quant_dtype=False, with_none=False) ) device: str = "cuda" shift_activations: bool = False lora: LoRAConfig | None = None family: str = field(init=False) task: str = "text-to-image" def __post_init__(self): self.family = self.name.split("-")[0] if self.name == "flux.1-canny-dev": self.task = "canny-to-image" elif self.name == "flux.1-depth-dev": self.task = "depth-to-image" elif self.name == "flux.1-fill-dev": self.task = "inpainting" def build( self, *, dtype: str | torch.dtype | None = None, device: str | torch.device | None = None ) -> DiffusionPipeline: """Build the diffusion pipeline. Args: dtype (`str` or `torch.dtype`, *optional*): The data type of the pipeline. device (`str` or `torch.device`, *optional*): The device of the pipeline. Returns: `DiffusionPipeline`: The diffusion pipeline. """ if dtype is None: dtype = self.dtype if device is None: device = self.device _factory = self._pipeline_factories.get(self.name, self._default_build) return _factory( name=self.name, path=self.path, dtype=dtype, device=device, shift_activations=self.shift_activations ) def extract_text_encoders( self, pipeline: DiffusionPipeline, supported: tuple[type[PreTrainedModel], ...] = (T5EncoderModel,) ) -> list[tuple[str, PreTrainedModel, PreTrainedTokenizer]]: """Extract the text encoders and tokenizers from the pipeline. Args: pipeline (`DiffusionPipeline`): The diffusion pipeline. supported (`tuple[type[PreTrainedModel], ...]`, *optional*, defaults to `(T5EncoderModel,)`): The supported text encoder types. If not specified, all text encoders will be extracted. Returns: `list[tuple[str, PreTrainedModel, PreTrainedTokenizer]]`: The list of text encoder name, model, and tokenizer. """ _extractor = self._text_extractors.get(self.name, self._default_extract_text_encoders) return _extractor(pipeline, supported) @classmethod def register_pipeline_factory( cls, names: str | tuple[str, ...], /, factory: tp.Callable[[str, str, torch.dtype, torch.device, bool], DiffusionPipeline], *, overwrite: bool = False, ) -> None: """Register a pipeline factory. Args: names (`str` or `tuple[str, ...]`): The name of the pipeline. factory (`Callable[[str, str,torch.dtype, torch.device, bool], DiffusionPipeline]`): The pipeline factory function. overwrite (`bool`, *optional*, defaults to `False`): Whether to overwrite the existing factory for the pipeline. """ if isinstance(names, str): names = [names] for name in names: if name in cls._pipeline_factories and not overwrite: raise ValueError(f"Pipeline factory {name} already exists.") cls._pipeline_factories[name] = factory @classmethod def register_text_extractor( cls, names: str | tuple[str, ...], /, extractor: tp.Callable[ [DiffusionPipeline, tuple[type[PreTrainedModel], ...]], list[tuple[str, PreTrainedModel, PreTrainedTokenizer]], ], *, overwrite: bool = False, ) -> None: """Register a text extractor. Args: names (`str` or `tuple[str, ...]`): The name of the pipeline. extractor (`Callable[[DiffusionPipeline], list[tuple[str, PreTrainedModel, PreTrainedTokenizer]]`): The text extractor function. overwrite (`bool`, *optional*, defaults to `False`): Whether to overwrite the existing extractor for the pipeline. """ if isinstance(names, str): names = [names] for name in names: if name in cls._text_extractors and not overwrite: raise ValueError(f"Text extractor {name} already exists.") cls._text_extractors[name] = extractor def load_lora( # noqa: C901 self, pipeline: DiffusionPipeline, smooth_cache: dict[str, torch.Tensor] | None = None ) -> DiffusionPipeline: smooth_cache = smooth_cache or {} model = pipeline.unet if hasattr(pipeline, "unet") else pipeline.transformer assert isinstance(model, nn.Module) if self.lora is not None: logger = tools.logging.getLogger(__name__) logger.info(f"Load LoRA branches from {self.lora.path}") lora_state_dict, alphas = pipeline.lora_state_dict( self.lora.path, return_alphas=True, weight_name=self.lora.weight_name ) tools.logging.Formatter.indent_inc() for name, module in model.named_modules(): if isinstance(module, (nn.Linear, ConcatLinear, ShiftedLinear)): lora_a_key, lora_b_key = f"transformer.{name}.lora_A.weight", f"transformer.{name}.lora_B.weight" if lora_a_key in lora_state_dict: assert lora_b_key in lora_state_dict logger.info(f"+ Load LoRA branch for {name}") tools.logging.Formatter.indent_inc() a = lora_state_dict.pop(lora_a_key) b = lora_state_dict.pop(lora_b_key) assert isinstance(a, torch.Tensor) assert isinstance(b, torch.Tensor) assert a.shape[1] == module.in_features assert b.shape[0] == module.out_features if isinstance(module, ConcatLinear): logger.debug( f"- split LoRA branch into {len(module.linears)} parts ({module.in_features_list})" ) m_splits = module.linears a_splits = a.split(module.in_features_list, dim=1) b_splits = [b] * len(a_splits) else: m_splits, a_splits, b_splits = [module], [a], [b] for m, a, b in zip(m_splits, a_splits, b_splits, strict=True): assert a.shape[0] == b.shape[1] if isinstance(m, ShiftedLinear): s, m = m.shift, m.linear logger.debug(f"- shift LoRA input by {s.item() if s.numel() == 1 else s}") else: s = None assert isinstance(m, nn.Linear) device, dtype = m.weight.device, m.weight.dtype a, b = a.to(device=device, dtype=torch.float64), b.to(device=device, dtype=torch.float64) if s is not None: if s.numel() == 1: s = torch.matmul(b, a.sum(dim=1).mul_(s.double())).mul_(self.lora.alpha) else: s = torch.matmul(b, torch.matmul(a, s.view(1, -1).double())).mul_(self.lora.alpha) if hasattr(m, "in_smooth_cache_key"): logger.debug(f"- smooth LoRA input using {m.in_smooth_cache_key} smooth scale") ss = smooth_cache[m.in_smooth_cache_key].to(device=device, dtype=torch.float64) a = a.mul_(ss.view(1, -1)) del ss if hasattr(m, "out_smooth_cache_key"): logger.debug(f"- smooth LoRA output using {m.out_smooth_cache_key} smooth scale") ss = smooth_cache[m.out_smooth_cache_key].to(device=device, dtype=torch.float64) b = b.div_(ss.view(-1, 1)) if s is not None: s = s.div_(ss.view(-1)) del ss branch_hook, quant_hook = None, None for hook in m._forward_pre_hooks.values(): if isinstance(hook, AccumBranchHook) and isinstance(hook.branch, LowRankBranch): branch_hook = hook if isinstance(hook, ProcessHook) and isinstance(hook.processor, Quantizer): quant_hook = hook if branch_hook is not None: logger.debug("- fuse with existing LoRA branch") assert isinstance(branch_hook.branch, LowRankBranch) _a = branch_hook.branch.a.weight.data _b = branch_hook.branch.b.weight.data if branch_hook.branch.alpha != self.lora.alpha: a, b = a.to(dtype=dtype), b.mul_(self.lora.alpha).to(dtype=dtype) _b = _b.to(dtype=torch.float64).mul_(branch_hook.branch.alpha).to(dtype=dtype) alpha = 1 else: a, b = a.to(dtype=dtype), b.to(dtype=dtype) alpha = self.lora.alpha branch_hook.branch = LowRankBranch( m.in_features, m.out_features, rank=a.shape[0] + branch_hook.branch.rank, alpha=alpha, ).to(device=device, dtype=dtype) branch_hook.branch.a.weight.data[: a.shape[0], :] = a branch_hook.branch.b.weight.data[:, : b.shape[1]] = b branch_hook.branch.a.weight.data[a.shape[0] :, :] = _a branch_hook.branch.b.weight.data[:, b.shape[1] :] = _b else: logger.debug("- create a new LoRA branch") branch = LowRankBranch( m.in_features, m.out_features, rank=a.shape[0], alpha=self.lora.alpha ) branch = branch.to(device=device, dtype=dtype) branch.a.weight.data.copy_(a.to(dtype=dtype)) branch.b.weight.data.copy_(b.to(dtype=dtype)) # low rank branch hook should be registered before the quantization hook if quant_hook is not None: logger.debug(f"- remove quantization hook from {name}") quant_hook.remove(m) logger.debug(f"- register LoRA branch to {name}") branch.as_hook().register(m) if quant_hook is not None: logger.debug(f"- re-register quantization hook to {name}") quant_hook.register(m) if s is not None: assert m.bias is not None m.bias.data.copy_((m.bias.double().sub_(s)).to(dtype)) del m_splits, a_splits, b_splits, a, b, s gc.collect() torch.cuda.empty_cache() tools.logging.Formatter.indent_dec() tools.logging.Formatter.indent_dec() if len(lora_state_dict) > 0: logger.warning(f"Unused LoRA weights: {lora_state_dict.keys()}") branches = nn.ModuleList() for _, module in model.named_modules(): for hook in module._forward_hooks.values(): if isinstance(hook, AccumBranchHook) and isinstance(hook.branch, LowRankBranch): branches.append(hook.branch) model.register_module("_low_rank_branches", branches) @staticmethod def _default_build( name: str, path: str, dtype: str | torch.dtype, device: str | torch.device, shift_activations: bool ) -> DiffusionPipeline: if not path: if name == "sdxl": path = "stabilityai/stable-diffusion-xl-base-1.0" elif name == "sdxl-turbo": path = "stabilityai/sdxl-turbo" elif name == "pixart-sigma": path = "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS" elif name == "flux.1-dev": path = "black-forest-labs/FLUX.1-dev" elif name == "flux.1-canny-dev": path = "black-forest-labs/FLUX.1-Canny-dev" elif name == "flux.1-depth-dev": path = "black-forest-labs/FLUX.1-Depth-dev" elif name == "flux.1-fill-dev": path = "black-forest-labs/FLUX.1-Fill-dev" elif name == "flux.1-schnell": path = "black-forest-labs/FLUX.1-schnell" else: raise ValueError(f"Path for {name} is not specified.") if name in ["flux.1-canny-dev", "flux.1-depth-dev"]: pipeline = FluxControlPipeline.from_pretrained(path, torch_dtype=dtype) elif name == "flux.1-fill-dev": pipeline = FluxFillPipeline.from_pretrained(path, torch_dtype=dtype) elif name.startswith("sana-"): if dtype == torch.bfloat16: pipeline = SanaPipeline.from_pretrained(path, variant="bf16", torch_dtype=dtype, use_safetensors=True) pipeline.vae.to(dtype) pipeline.text_encoder.to(dtype) else: pipeline = SanaPipeline.from_pretrained(path, torch_dtype=dtype) else: pipeline = AutoPipelineForText2Image.from_pretrained(path, torch_dtype=dtype) pipeline = pipeline.to(device) model = pipeline.unet if hasattr(pipeline, "unet") else pipeline.transformer replace_fused_linear_with_concat_linear(model) replace_up_block_conv_with_concat_conv(model) if shift_activations: shift_input_activations(model) return pipeline @staticmethod def _default_extract_text_encoders( pipeline: DiffusionPipeline, supported: tuple[type[PreTrainedModel], ...] ) -> list[tuple[str, PreTrainedModel, PreTrainedTokenizer]]: """Extract the text encoders and tokenizers from the pipeline. Args: pipeline (`DiffusionPipeline`): The diffusion pipeline. supported (`tuple[type[PreTrainedModel], ...]`, *optional*, defaults to `(T5EncoderModel,)`): The supported text encoder types. If not specified, all text encoders will be extracted. Returns: `list[tuple[str, PreTrainedModel, PreTrainedTokenizer]]`: The list of text encoder name, model, and tokenizer. """ results: list[tuple[str, PreTrainedModel, PreTrainedTokenizer]] = [] for key in vars.__dict__.keys(): if key.startswith("text_encoder"): suffix = key[len("text_encoder") :] encoder, tokenizer = getattr(pipeline, f"text_encoder{suffix}"), getattr(pipeline, f"tokenizer{suffix}") if not supported or isinstance(encoder, supported): results.append((key, encoder, tokenizer)) return results ================================================ FILE: deepcompressor/app/diffusion/ptq.py ================================================ import gc import json import os import pprint import traceback import torch from diffusers import DiffusionPipeline from deepcompressor.app.llm.nn.patch import patch_attention, patch_gemma_rms_norm from deepcompressor.app.llm.ptq import ptq as llm_ptq from deepcompressor.utils import tools from .config import DiffusionPtqCacheConfig, DiffusionPtqRunConfig, DiffusionQuantCacheConfig, DiffusionQuantConfig from .nn.struct import DiffusionModelStruct from .quant import ( load_diffusion_weights_state_dict, quantize_diffusion_activations, quantize_diffusion_weights, rotate_diffusion, smooth_diffusion, ) __all__ = ["ptq"] def ptq( # noqa: C901 model: DiffusionModelStruct, config: DiffusionQuantConfig, cache: DiffusionPtqCacheConfig | None = None, load_dirpath: str = "", save_dirpath: str = "", copy_on_save: bool = False, save_model: bool = False, ) -> DiffusionModelStruct: """Post-training quantization of a diffusion model. Args: model (`DiffusionModelStruct`): The diffusion model. config (`DiffusionQuantConfig`): The diffusion model post-training quantization configuration. cache (`DiffusionPtqCacheConfig`, *optional*, defaults to `None`): The diffusion model quantization cache path configuration. load_dirpath (`str`, *optional*, defaults to `""`): The directory path to load the quantization checkpoint. save_dirpath (`str`, *optional*, defaults to `""`): The directory path to save the quantization checkpoint. copy_on_save (`bool`, *optional*, defaults to `False`): Whether to copy the cache to the save directory. save_model (`bool`, *optional*, defaults to `False`): Whether to save the quantized model checkpoint. Returns: `DiffusionModelStruct`: The quantized diffusion model. """ logger = tools.logging.getLogger(__name__) if not isinstance(model, DiffusionModelStruct): model = DiffusionModelStruct.construct(model) assert isinstance(model, DiffusionModelStruct) quant_wgts = config.enabled_wgts quant_ipts = config.enabled_ipts quant_opts = config.enabled_opts quant_acts = quant_ipts or quant_opts quant = quant_wgts or quant_acts load_model_path, load_path, save_path = "", None, None if load_dirpath: load_path = DiffusionQuantCacheConfig( smooth=os.path.join(load_dirpath, "smooth.pt"), branch=os.path.join(load_dirpath, "branch.pt"), wgts=os.path.join(load_dirpath, "wgts.pt"), acts=os.path.join(load_dirpath, "acts.pt"), ) load_model_path = os.path.join(load_dirpath, "model.pt") if os.path.exists(load_model_path): if config.enabled_wgts and config.wgts.enabled_low_rank: if os.path.exists(load_path.branch): load_model = True else: logger.warning(f"Model low-rank branch checkpoint {load_path.branch} does not exist") load_model = False else: load_model = True if load_model: logger.info(f"* Loading model from {load_model_path}") save_dirpath = "" # do not save the model if loading else: logger.warning(f"Model checkpoint {load_model_path} does not exist") load_model = False else: load_model = False if save_dirpath: os.makedirs(save_dirpath, exist_ok=True) save_path = DiffusionQuantCacheConfig( smooth=os.path.join(save_dirpath, "smooth.pt"), branch=os.path.join(save_dirpath, "branch.pt"), wgts=os.path.join(save_dirpath, "wgts.pt"), acts=os.path.join(save_dirpath, "acts.pt"), ) else: save_model = False if quant and config.enabled_rotation: logger.info("* Rotating model for quantization") tools.logging.Formatter.indent_inc() rotate_diffusion(model, config=config) tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() # region smooth quantization if quant and config.enabled_smooth: logger.info("* Smoothing model for quantization") tools.logging.Formatter.indent_inc() load_from = "" if load_path and os.path.exists(load_path.smooth): load_from = load_path.smooth elif cache and cache.path.smooth and os.path.exists(cache.path.smooth): load_from = cache.path.smooth if load_from: logger.info(f"- Loading smooth scales from {load_from}") smooth_cache = torch.load(load_from) smooth_diffusion(model, config, smooth_cache=smooth_cache) else: logger.info("- Generating smooth scales") smooth_cache = smooth_diffusion(model, config) if cache and cache.path.smooth: logger.info(f"- Saving smooth scales to {cache.path.smooth}") os.makedirs(cache.dirpath.smooth, exist_ok=True) torch.save(smooth_cache, cache.path.smooth) load_from = cache.path.smooth if save_path: if not copy_on_save and load_from: logger.info(f"- Linking smooth scales to {save_path.smooth}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.smooth) else: logger.info(f"- Saving smooth scales to {save_path.smooth}") torch.save(smooth_cache, save_path.smooth) del smooth_cache tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() # endregion # region collect original state dict if config.needs_acts_quantizer_cache: if load_path and os.path.exists(load_path.acts): orig_state_dict = None elif cache and cache.path.acts and os.path.exists(cache.path.acts): orig_state_dict = None else: orig_state_dict: dict[str, torch.Tensor] = { name: param.detach().clone() for name, param in model.module.named_parameters() if param.ndim > 1 } else: orig_state_dict = None # endregion if load_model: logger.info(f"* Loading model checkpoint from {load_model_path}") load_diffusion_weights_state_dict( model, config, state_dict=torch.load(load_model_path), branch_state_dict=torch.load(load_path.branch) if os.path.exists(load_path.branch) else None, ) gc.collect() torch.cuda.empty_cache() elif quant_wgts: logger.info("* Quantizing weights") tools.logging.Formatter.indent_inc() quantizer_state_dict, quantizer_load_from = None, "" if load_path and os.path.exists(load_path.wgts): quantizer_load_from = load_path.wgts elif cache and cache.path.wgts and os.path.exists(cache.path.wgts): quantizer_load_from = cache.path.wgts if quantizer_load_from: logger.info(f"- Loading weight settings from {quantizer_load_from}") quantizer_state_dict = torch.load(quantizer_load_from) branch_state_dict, branch_load_from = None, "" if load_path and os.path.exists(load_path.branch): branch_load_from = load_path.branch elif cache and cache.path.branch and os.path.exists(cache.path.branch): branch_load_from = cache.path.branch if branch_load_from: logger.info(f"- Loading branch settings from {branch_load_from}") branch_state_dict = torch.load(branch_load_from) if not quantizer_load_from: logger.info("- Generating weight settings") if not branch_load_from: logger.info("- Generating branch settings") quantizer_state_dict, branch_state_dict, scale_state_dict = quantize_diffusion_weights( model, config, quantizer_state_dict=quantizer_state_dict, branch_state_dict=branch_state_dict, return_with_scale_state_dict=bool(save_dirpath), ) if not quantizer_load_from and cache and cache.dirpath.wgts: logger.info(f"- Saving weight settings to {cache.path.wgts}") os.makedirs(cache.dirpath.wgts, exist_ok=True) torch.save(quantizer_state_dict, cache.path.wgts) quantizer_load_from = cache.path.wgts if not branch_load_from and cache and cache.dirpath.branch: logger.info(f"- Saving branch settings to {cache.path.branch}") os.makedirs(cache.dirpath.branch, exist_ok=True) torch.save(branch_state_dict, cache.path.branch) branch_load_from = cache.path.branch if save_path: if not copy_on_save and quantizer_load_from: logger.info(f"- Linking weight settings to {save_path.wgts}") os.symlink(os.path.relpath(quantizer_load_from, save_dirpath), save_path.wgts) else: logger.info(f"- Saving weight settings to {save_path.wgts}") torch.save(quantizer_state_dict, save_path.wgts) if not copy_on_save and branch_load_from: logger.info(f"- Linking branch settings to {save_path.branch}") os.symlink(os.path.relpath(branch_load_from, save_dirpath), save_path.branch) else: logger.info(f"- Saving branch settings to {save_path.branch}") torch.save(branch_state_dict, save_path.branch) if save_model: logger.info(f"- Saving model to {save_dirpath}") torch.save(scale_state_dict, os.path.join(save_dirpath, "scale.pt")) torch.save(model.module.state_dict(), os.path.join(save_dirpath, "model.pt")) del quantizer_state_dict, branch_state_dict, scale_state_dict tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() if quant_acts: logger.info(" * Quantizing activations") tools.logging.Formatter.indent_inc() if config.needs_acts_quantizer_cache: load_from = "" if load_path and os.path.exists(load_path.acts): load_from = load_path.acts elif cache and cache.path.acts and os.path.exists(cache.path.acts): load_from = cache.path.acts if load_from: logger.info(f"- Loading activation settings from {load_from}") quantizer_state_dict = torch.load(load_from) quantize_diffusion_activations( model, config, quantizer_state_dict=quantizer_state_dict, orig_state_dict=orig_state_dict ) else: logger.info("- Generating activation settings") quantizer_state_dict = quantize_diffusion_activations(model, config, orig_state_dict=orig_state_dict) if cache and cache.dirpath.acts and quantizer_state_dict is not None: logger.info(f"- Saving activation settings to {cache.path.acts}") os.makedirs(cache.dirpath.acts, exist_ok=True) torch.save(quantizer_state_dict, cache.path.acts) load_from = cache.path.acts if save_dirpath: if not copy_on_save and load_from: logger.info(f"- Linking activation quantizer settings to {save_path.acts}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.acts) else: logger.info(f"- Saving activation quantizer settings to {save_path.acts}") torch.save(quantizer_state_dict, save_path.acts) del quantizer_state_dict else: logger.info("- No need to generate/load activation quantizer settings") quantize_diffusion_activations(model, config, orig_state_dict=orig_state_dict) tools.logging.Formatter.indent_dec() del orig_state_dict gc.collect() torch.cuda.empty_cache() return model def main(config: DiffusionPtqRunConfig, logging_level: int = tools.logging.DEBUG) -> DiffusionPipeline: """Post-training quantization of a diffusion model. Args: config (`DiffusionPtqRunConfig`): The diffusion model post-training quantization configuration. logging_level (`int`, *optional*, defaults to `logging.DEBUG`): The logging level. Returns: `DiffusionPipeline`: The diffusion pipeline with quantized model. """ config.output.lock() config.dump(path=config.output.get_running_job_path("config.yaml")) tools.logging.setup(path=config.output.get_running_job_path("run.log"), level=logging_level) logger = tools.logging.getLogger(__name__) logger.info("=== Configurations ===") tools.logging.info(config.formatted_str(), logger=logger) logger.info("=== Dumped Configurations ===") tools.logging.info(pprint.pformat(config.dump(), indent=2, width=120), logger=logger) logger.info("=== Output Directory ===") logger.info(config.output.job_dirpath) logger.info("=== Start Evaluating ===") logger.info("* Building diffusion model pipeline") tools.logging.Formatter.indent_inc() pipeline = config.pipeline.build() if "nf4" not in config.pipeline.name and "gguf" not in config.pipeline.name: model = DiffusionModelStruct.construct(pipeline) tools.logging.Formatter.indent_dec() save_dirpath = os.path.join(config.output.running_job_dirpath, "cache") if config.save_model: if config.save_model.lower() in ("false", "none", "null", "nil"): save_model = False elif config.save_model.lower() in ("true", "default"): save_dirpath, save_model = os.path.join(config.output.running_job_dirpath, "model"), True else: save_dirpath, save_model = config.save_model, True else: save_model = False model = ptq( model, config.quant, cache=config.cache, load_dirpath=config.load_from, save_dirpath=save_dirpath, copy_on_save=config.copy_on_save, save_model=save_model, ) if config.pipeline.lora is not None: load_from = "" if config.quant.enabled_smooth: if config.load_from and os.path.exists(os.path.join(config.load_from, "smooth.pt")): load_from = os.path.join(config.load_from, "smooth.pt") elif config.cache.path and os.path.exists(config.cache.path.smooth): load_from = config.cache.path.smooth elif os.path.exists(os.path.join(save_dirpath, "smooth.pt")): load_from = os.path.join(save_dirpath, "smooth.pt") logger.info(f"* Loading smooth scales from {load_from}") config.pipeline.load_lora(pipeline, smooth_cache=torch.load(load_from) if load_from else None) if config.text is not None and config.text.is_enabled(): for encoder_name, encoder, tokenizer in config.pipeline.extract_text_encoders(pipeline): logger.info(f"* Post-training quantizing the text encoder {encoder_name}") patch_attention(encoder) patch_gemma_rms_norm(encoder) save_dirpath = os.path.join(save_dirpath, "encoder") setattr( pipeline, encoder_name, llm_ptq( encoder, tokenizer, config.text, cache=config.text_cache, load_dirpath=os.path.join(config.load_from, "encoder") if config.load_from else "", save_dirpath=save_dirpath, copy_on_save=config.copy_on_save, save_model=save_model, ), ) config.eval.gen_root = config.eval.gen_root.format( output=config.output.running_dirpath, job=config.output.running_job_dirname ) if config.skip_eval: if not config.skip_gen: logger.info("* Generating image") tools.logging.Formatter.indent_inc() config.eval.generate(pipeline, task=config.pipeline.task) tools.logging.Formatter.indent_dec() else: logger.info(f"* Evaluating model {'(skipping generation)' if config.skip_gen else ''}") tools.logging.Formatter.indent_inc() results = config.eval.evaluate(pipeline, skip_gen=config.skip_gen, task=config.pipeline.task) tools.logging.Formatter.indent_dec() if results is not None: logger.info(f"* Saving results to {config.output.job_dirpath}") with open(config.output.get_running_job_path("results.json"), "w") as f: json.dump(results, f, indent=2, sort_keys=True) config.output.unlock() if __name__ == "__main__": config, _, unused_cfgs, unused_args, unknown_args = DiffusionPtqRunConfig.get_parser().parse_known_args() assert isinstance(config, DiffusionPtqRunConfig) if len(unused_cfgs) > 0: tools.logging.warning(f"Unused configurations: {unused_cfgs}") if unused_args is not None: tools.logging.warning(f"Unused arguments: {unused_args}") assert len(unknown_args) == 0, f"Unknown arguments: {unknown_args}" try: main(config, logging_level=tools.logging.DEBUG) except Exception as e: tools.logging.Formatter.indent_reset() tools.logging.error("=== Error ===") tools.logging.error(traceback.format_exc()) tools.logging.shutdown() traceback.print_exc() config.output.unlock(error=True) raise e ================================================ FILE: deepcompressor/app/diffusion/quant/__init__.py ================================================ # -*- coding: utf-8 -*- from .activation import quantize_diffusion_activations from .config import DiffusionQuantCacheConfig, DiffusionQuantConfig from .quantizer import DiffusionActivationQuantizer, DiffusionWeightQuantizer from .rotate import rotate_diffusion from .smooth import smooth_diffusion from .weight import load_diffusion_weights_state_dict, quantize_diffusion_weights ================================================ FILE: deepcompressor/app/diffusion/quant/activation.py ================================================ # -*- coding: utf-8 -*- """Diffusion model activation quantization calibration module.""" import gc import typing as tp import torch import torch.nn as nn from tqdm import tqdm from deepcompressor.data.cache import IOTensorsCache from deepcompressor.data.common import TensorType from deepcompressor.utils import tools from ..nn.struct import ( DiffusionAttentionStruct, DiffusionBlockStruct, DiffusionModelStruct, DiffusionModuleStruct, DiffusionTransformerBlockStruct, ) from .config import DiffusionQuantConfig from .quantizer import DiffusionActivationQuantizer from .utils import get_needs_inputs_fn, get_needs_outputs_fn __all__ = ["quantize_diffusion_activations"] @torch.inference_mode() def quantize_diffusion_block_activations( # noqa: C901 layer: DiffusionBlockStruct | DiffusionModuleStruct, config: DiffusionQuantConfig, quantizer_state_dict: dict[str, dict[str, torch.Tensor | float | None]], layer_cache: dict[str, IOTensorsCache] | None = None, layer_kwargs: dict[str, tp.Any] | None = None, orig_state_dict: dict[str, torch.Tensor] | None = None, ) -> dict[str, DiffusionActivationQuantizer]: """Quantize the activations of a diffusion model block. Args: layer (`DiffusionBlockStruct` or `DiffusionModuleStruct`): The diffusion model block. config (`DiffusionQuantConfig`): The quantization configuration. quantizer_state_dict (`dict[str, dict[str, torch.Tensor | float | None]]`): The activation quantizers state dict cache. layer_cache (`dict[str, IOTensorsCache]`, *optional*): The layer cache. layer_kwargs (`dict[str, Any]`, *optional*): The layer keyword arguments. orig_state_dict (`dict[str, torch.Tensor]`, *optional*): The original state dictionary. Returns: `dict[str, DiffusionActivationQuantizer]`: The activation quantizers. """ logger = tools.logging.getLogger(f"{__name__}.ActivationQuant") logger.debug("- Quantizing layer %s", layer.name) layer_cache = layer_cache or {} layer_kwargs = layer_kwargs or {} orig_state_dict = orig_state_dict or {} args_caches: list[ tuple[ str, # key TensorType, list[nn.Linear], # modules list[str], # module names nn.Module, # eval module str, # eval name dict[str, tp.Any], # eval kwargs list[tuple[nn.Parameter, torch.Tensor]], # original wgts ] ] = [] In, Out = TensorType.Inputs, TensorType.Outputs # noqa: F841 used_modules: set[nn.Module] = set() for module_key, module_name, module, parent, field_name in layer.named_key_modules(): modules, orig_struct_wgts = None, {} if field_name in ("k_proj", "v_proj", "add_q_proj", "add_v_proj"): continue if field_name in ("q_proj", "add_k_proj", "up_proj"): grandparent = parent.parent assert isinstance(grandparent, DiffusionTransformerBlockStruct) if grandparent.parallel and parent.idx == 0: if orig_state_dict: orig_struct_wgts = { proj_module: (proj_module.weight, orig_state_dict[f"{proj_name}.weight"]) for _, proj_name, proj_module, _, _ in grandparent.named_key_modules() } if field_name == "q_proj": assert isinstance(parent, DiffusionAttentionStruct) assert module_name == parent.q_proj_name modules, module_names = parent.qkv_proj, parent.qkv_proj_names if grandparent.ffn_struct is not None: modules.append(grandparent.ffn_struct.up_proj) module_names.append(grandparent.ffn_struct.up_proj_name) elif field_name == "add_k_proj": assert isinstance(parent, DiffusionAttentionStruct) assert module_name == parent.add_k_proj_name modules, module_names = parent.add_qkv_proj, parent.add_qkv_proj_names if grandparent.add_ffn_struct is not None: modules.append(grandparent.add_ffn_struct.up_proj) module_names.append(grandparent.add_ffn_struct.up_proj_name) else: assert field_name == "up_proj" if module in used_modules: continue assert module_name == grandparent.add_ffn_struct.up_proj_name assert grandparent.attn_structs[0].is_self_attn() eval_module, eval_name, eval_kwargs = grandparent.module, grandparent.name, layer_kwargs elif isinstance(parent, DiffusionAttentionStruct): eval_module, eval_name = parent.module, parent.name eval_kwargs = parent.filter_kwargs(layer_kwargs) if layer_kwargs else {} if orig_state_dict: orig_struct_wgts = { proj_module: (proj_module.weight, orig_state_dict[f"{proj_name}.weight"]) for _, proj_name, proj_module, _, _ in parent.named_key_modules() } if field_name == "q_proj": assert module_name == parent.q_proj_name modules, module_names = parent.qkv_proj, parent.qkv_proj_names else: assert field_name == "add_k_proj" assert module_name == parent.add_k_proj_name modules, module_names = parent.add_qkv_proj, parent.add_qkv_proj_names if modules is None: assert module not in used_modules used_modules.add(module) orig_wgts = [(module.weight, orig_state_dict[f"{module_name}.weight"])] if orig_state_dict else None args_caches.append((module_key, In, [module], [module_name], module, module_name, None, orig_wgts)) else: orig_wgts = [] for proj_module in modules: assert proj_module not in used_modules used_modules.add(proj_module) if orig_state_dict: orig_wgts.append(orig_struct_wgts.pop(proj_module)) orig_wgts.extend(orig_struct_wgts.values()) orig_wgts = None if not orig_wgts else orig_wgts args_caches.append((module_key, In, modules, module_names, eval_module, eval_name, eval_kwargs, orig_wgts)) # endregion quantizers: dict[str, DiffusionActivationQuantizer] = {} tools.logging.Formatter.indent_inc() for module_key, tensor_type, modules, module_names, eval_module, eval_name, eval_kwargs, orig_wgts in args_caches: if isinstance(modules[0], nn.Linear): channels_dim = -1 assert all(isinstance(m, nn.Linear) for m in modules) elif isinstance(modules[0], nn.Conv2d): channels_dim = 1 assert all(isinstance(m, nn.Conv2d) for m in modules) else: raise ValueError(f"Unknown module type: {type(modules[0])}") if tensor_type == TensorType.Inputs: cache_keys = [f"{name}.input" for name in module_names] quantizer_config = config.unsigned_ipts if getattr(modules[0], "unsigned", False) else config.ipts activations = layer_cache.get(module_names[0], IOTensorsCache()).inputs else: cache_keys = [f"{name}.output" for name in module_names] quantizer_config = config.opts activations = layer_cache.get(module_names[0], IOTensorsCache()).outputs quantizer = DiffusionActivationQuantizer( quantizer_config, channels_dim=channels_dim, develop_dtype=config.develop_dtype, key=module_key, tensor_type=tensor_type, ) if quantizer.is_enabled(): if cache_keys[0] not in quantizer_state_dict: logger.debug("- Calibrating %s", ", ".join(cache_keys)) quantizer.calibrate_dynamic_range( modules=modules, activations=activations, eval_module=eval_module, eval_inputs=layer_cache[eval_name].inputs if layer_cache else None, eval_kwargs=eval_kwargs, orig_weights=orig_wgts, ) quantizer_state_dict[cache_keys[0]] = quantizer.state_dict() gc.collect() torch.cuda.empty_cache() else: quantizer.load_state_dict(quantizer_state_dict[cache_keys[0]], device=modules[0].weight.device) for cache_key in cache_keys: quantizers[cache_key] = quantizer del quantizer tools.logging.Formatter.indent_dec() return quantizers @torch.inference_mode() def quantize_diffusion_activations( model: nn.Module | DiffusionModelStruct, config: DiffusionQuantConfig, quantizer_state_dict: dict[str, dict[str, torch.Tensor | float | None]] | None = None, orig_state_dict: dict[str, torch.Tensor] | None = None, ) -> dict[str, dict[str, torch.Tensor | float | None]]: """Quantize the activations of a diffusion model. Args: model (`nn.Module` or `DiffusionModelStruct`): The diffusion model. config (`DiffusionQuantConfig`): The quantization configuration. quantizer_state_dict (`dict[str, dict[str, torch.Tensor | float | None]]`, *optional*, defaults to `None`): The activation quantizers state dict cache. orig_state_dict (`dict[str, torch.Tensor]`, *optional*, defaults to `None`): The original state dictionary. Returns: `dict[str, dict[str, torch.Tensor | float | None]]`: The activation quantizers state dict cache. """ logger = tools.logging.getLogger(f"{__name__}.ActivationQuant") if not isinstance(model, DiffusionModelStruct): model = DiffusionModelStruct.construct(model) assert isinstance(model, DiffusionModelStruct) quantizer_state_dict = quantizer_state_dict or {} quantizers: dict[str, DiffusionActivationQuantizer] = {} skip_pre_modules = all(key in config.ipts.skips for key in model.get_prev_module_keys()) skip_post_modules = all(key in config.ipts.skips for key in model.get_post_module_keys()) if not quantizer_state_dict and config.needs_acts_quantizer_cache: with tools.logging.redirect_tqdm(): for _, (layer, layer_cache, layer_kwargs) in tqdm( config.calib.build_loader().iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model, config=config), needs_outputs_fn=get_needs_outputs_fn(model, config=config), skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules, ), desc="quantizing activations", leave=False, total=model.num_blocks + int(not skip_post_modules) + int(not skip_pre_modules) * 3, dynamic_ncols=True, ): block_quantizers = quantize_diffusion_block_activations( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, layer_cache=layer_cache, layer_kwargs=layer_kwargs, orig_state_dict=orig_state_dict, ) quantizers.update(block_quantizers) else: for _, layer in model.get_named_layers( skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules ).items(): block_quantizers = quantize_diffusion_block_activations( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, orig_state_dict=orig_state_dict, ) quantizers.update(block_quantizers) for _, module_name, module, _, _ in model.named_key_modules(): ipts_quantizer = quantizers.get(f"{module_name}.input", None) opts_quantizer = quantizers.get(f"{module_name}.output", None) needs_quant_ipts = ipts_quantizer is not None and ipts_quantizer.is_enabled() needs_quant_opts = opts_quantizer is not None and opts_quantizer.is_enabled() if needs_quant_ipts or needs_quant_opts: logger.debug( "- Quantizing %s (%s)", module_name, ("inputs" if needs_quant_ipts else "") + (" and " if needs_quant_ipts and needs_quant_opts else "") + ("outputs" if needs_quant_opts else ""), ) if needs_quant_ipts: ipts_quantizer.as_hook(is_output=False).register(module) if needs_quant_opts: opts_quantizer.as_hook(is_output=True).register(module) return quantizer_state_dict ================================================ FILE: deepcompressor/app/diffusion/quant/config.py ================================================ # -*- coding: utf-8 -*- """Quantization config.""" import os from dataclasses import dataclass, field import torch from omniconfig import configclass from deepcompressor.calib.config import ( QuantRotationConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective, SearchBasedCalibStrategy, SmoothTransfomerConfig, ) from deepcompressor.data.utils.dtype import eval_dtype from deepcompressor.quantizer.config import QuantLowRankConfig from deepcompressor.utils.common import num2str from ..cache.config import DiffusionQuantCacheConfig from ..dataset.calib import DiffusionCalibCacheLoaderConfig from .quantizer.config import DiffusionModuleQuantizerConfig __all__ = ["DiffusionQuantConfig"] @configclass @dataclass(kw_only=True) class DiffusionQuantConfig(DiffusionModuleQuantizerConfig): """Diffusion model quantization configuration. Args: wgts (`DiffusionWeightQuantizerConfig`): The weight quantization configuration. ipts (`DiffusionActivationQuantizerConfig`): The input activation quantization configuration. opts (`DiffusionActivationQuantizerConfig`): The output activation quantization configuration. calib (`DiffusionCalibDatasetConfig`): The calibration dataset configuration. smooth (`TransfomerQuantSmoothConfig` or `None`, *optional*, defaults to `None`): The smooth quantization configuration. develop_dtype (`torch.dtype`, *optional*, defaults to `None`): The development data type. """ calib: DiffusionCalibCacheLoaderConfig rotation: QuantRotationConfig | None = None smooth: SmoothTransfomerConfig | None = None develop_dtype: torch.dtype = field(default_factory=lambda s=torch.float32: eval_dtype(s, with_quant_dtype=False)) def __post_init__(self) -> None: # noqa: C901 super().__post_init__() if self.rotation is not None and not self.rotation.transforms: self.rotation = None if self.smooth is not None: if not self.smooth.enabled_proj and not self.smooth.enabled_attn: self.smooth = None if self.enabled_smooth and self.smooth.enabled_proj and self.smooth.proj.allow_low_rank: if self.enabled_wgts: self.smooth.proj.allow_low_rank = self.wgts.enabled_low_rank if self.smooth.proj.allow_low_rank: self.smooth.proj.granularity = SearchBasedCalibGranularity.Layer else: self.smooth.proj.allow_low_rank = False if self.enabled_ipts: if self.ipts.enabled_calib_range and self.ipts.calib_range.granularity == SearchBasedCalibGranularity.Group: self.ipts.calib_range.granularity = SearchBasedCalibGranularity.ChannelGroup if self.ipts.static: assert self.ipts.smallest_group_shape[0] == -1, "static quantization requires batch group size to be -1" if self.enabled_opts: if self.opts.enabled_calib_range and self.opts.calib_range.granularity == SearchBasedCalibGranularity.Group: self.opts.calib_range.granularity = SearchBasedCalibGranularity.ChannelGroup if self.opts.static: assert self.opts.smallest_group_shape[0] == -1, "static quantization requires batch group size to be -1" self.organize() self.unsigned_ipts = self.ipts.for_unsigned() @property def enabled_rotation(self) -> bool: """Whether to enable rotation.""" return self.rotation is not None and bool(self.rotation.transforms) @property def enabled_smooth(self) -> bool: """Whether to enable smooth quantization.""" return self.smooth is not None @property def enabled_smooth_proj(self) -> bool: """Whether to enable smooth quantization for projections.""" return self.enabled_smooth and self.smooth.enabled_proj @property def enabled_smooth_attn(self) -> bool: """Whether to enable smooth quantization for attentions.""" return self.enabled_smooth and self.smooth.enabled_attn @property def needs_acts_quantizer_cache(self) -> bool: """Whether to cache the activations quantizer settings.""" if self.enabled_ipts and self.ipts.needs_calib_data: return True if self.enabled_opts and self.opts.needs_calib_data: return True return False def generate_calib_dirname(self) -> str: name = "" if self.enabled_rotation: name += "-rotate" if self.rotation.random: name += ".rnd" if self.enabled_smooth: name += "-smooth" if self.enabled_smooth_proj: name += ".proj" if self.enabled_smooth_attn: name += ".attn" calib_name = super().generate_calib_dirname() if calib_name: name += f"-{calib_name}" return name[1:] if name else name def generate_cache_dirpath( self, *, root: str, shift: bool, default_dtype: torch.dtype = torch.float16 ) -> DiffusionQuantCacheConfig: # noqa: C901 """Generate the cache paths for the module quantization configuration.""" quant_names = self.generate_dirnames(default_dtype=default_dtype) if shift: quant_names.append("shift") if self.enabled_wgts and self.wgts.enabled_low_rank: quant_names.extend(QuantLowRankConfig.generate_dirnames(self.wgts.low_rank, prefix="lowrank")) if self.enabled_rotation: quant_names.extend(self.rotation.generate_dirnames(prefix="rotate")) smooth_dirpath = "" if self.enabled_smooth: quant_names.extend(self.smooth.generate_dirnames(prefix="smooth")) smooth_dirpath = os.path.join("smooth", *quant_names) branch_dirpath = "" if self.enabled_wgts and self.wgts.enabled_low_rank: quant_names.extend(self.wgts.low_rank.generate_dirnames(prefix="lowrank")) branch_dirpath = os.path.join("branch", *quant_names) wgts_dirpath = "" if self.enabled_wgts and self.wgts.needs_calib_data: quant_names.extend(self.wgts.calib_range.generate_dirnames(prefix="w.range")) wgts_dirpath = os.path.join("wgts", *quant_names) if self.enabled_wgts and self.wgts.enabled_gptq: quant_names.extend(self.wgts.kernel_gptq.generate_dirnames(prefix="w.kernel")) acts_dirpath = "" if self.needs_acts_quantizer_cache: if self.enabled_ipts and self.ipts.needs_calib_data: quant_names.extend(self.ipts.calib_range.generate_dirnames(prefix="x.range")) if self.enabled_opts and self.opts.needs_calib_data: quant_names.extend(self.opts.calib_range.generate_dirnames(prefix="y.range")) acts_dirpath = os.path.join("acts", *quant_names) cache_dirpath = DiffusionQuantCacheConfig( smooth=smooth_dirpath, branch=branch_dirpath, wgts=wgts_dirpath, acts=acts_dirpath ).simplify(type(self)._key_map) cache_dirpath = cache_dirpath.add_parent_dirs(*self.calib.generate_dirnames()) cache_dirpath = cache_dirpath.add_parent_dirs(root, "diffusion", "cache", "quant") return cache_dirpath def generate_default_dirname(self) -> str: # noqa: C901 """Generate output directory name for evaluating a large language model.""" key_map = type(self)._key_map def simplify_skips(skips): return set( DiffusionQuantCacheConfig.simplify_path("skip.[{}]".format("+".join(skips)), key_map=key_map)[ 6:-1 ].split("+") ) skip_name, y_skips, w_skips, x_skips = "", set(), set(), set() if self.enabled_opts and self.opts.skips: y_skips = simplify_skips(self.opts.skips) if self.enabled_ipts and self.ipts.skips: x_skips = simplify_skips(self.ipts.skips) if self.enabled_wgts and self.wgts.skips: w_skips = simplify_skips(self.wgts.skips) skips_map = {} if y_skips or x_skips or w_skips: skip_name = "-skip" skip_name_list: list[tuple[str, set]] = [] if y_skips: skip_name_list.append(("y", y_skips)) if x_skips: skip_name_list.append(("x", x_skips)) if w_skips: skip_name_list.append(("w", w_skips)) # sort the keys by the number of elements in the set skip_name_list = sorted(skip_name_list, key=lambda x: (len(x[1]), x[0]), reverse=False) skips_map = {k: v for k, v in skip_name_list} # noqa: C416 skip_name_map: dict[str, set] = {} skip_0, skip_0_names = skip_name_list[0] skip_name_map[skip_0] = skip_0_names if len(skip_name_list) > 1: skip_1, skip_1_names = skip_name_list[1] if skip_1_names.issuperset(skip_0_names): skip_1_names = skip_1_names - skip_0_names skip_1_names.add(f"[{skip_0}]") skip_name_map[skip_1] = skip_1_names if len(skip_name_list) > 2: skip_2, skip_2_names = skip_name_list[2] if skip_2_names.issuperset(skip_name_list[1][1]): # skip_1_names may be modified skip_2_names = skip_2_names - skip_name_list[1][1] skip_2_names.add(f"[{skip_1}]") if skip_2_names.issuperset(skip_0_names): skip_2_names = skip_2_names - skip_0_names skip_2_names.add(f"[{skip_0}]") skip_name_map[skip_2] = skip_2_names if "y" in skip_name_map: skip_name += f".y.[{'+'.join(sorted(skip_name_map['y']))}]" if "x" in skip_name_map: skip_name += f".x.[{'+'.join(sorted(skip_name_map['x']))}]" if "w" in skip_name_map: skip_name += f".w.[{'+'.join(sorted(skip_name_map['w']))}]" del skip_name_list, skip_name_map extra_name = "" if self.enabled_extra_wgts: extra_name = "-extra.[{}]".format("+".join(sorted(simplify_skips(self.extra_wgts.includes)))) lowrank_name = "" if self.enabled_wgts and self.wgts.enabled_low_rank: lowrank_name = f"-low.r{num2str(self.wgts.low_rank.rank)}" if self.wgts.low_rank.num_iters > 1: lowrank_name += f".i{num2str(self.wgts.low_rank.num_iters)}" if self.wgts.low_rank.early_stop: lowrank_name += ".e" if self.wgts.low_rank.exclusive: lowrank_name += ".s" if self.wgts.low_rank.compensate: lowrank_name += ".c" if self.wgts.low_rank.objective != SearchBasedCalibObjective.OutputsError: lowrank_name += f".{self.wgts.low_rank.objective.name}" if self.wgts.low_rank.skips: lowrank_skips = simplify_skips(self.wgts.low_rank.skips) if "w" in skips_map and lowrank_skips.issuperset(skips_map["w"]): lowrank_skips = lowrank_skips - skips_map["w"] lowrank_skips.add("[w]") lowrank_name += ".skip.[{}]".format("+".join(sorted(lowrank_skips))) rotation_name = "" if self.enabled_rotation: rotation_name = "-rot" if self.rotation.random: rotation_name += ".rnd" rotation_name += ".[{}]".format("+".join(sorted(simplify_skips(self.rotation.transforms)))) smooth_name = "" if self.enabled_smooth: smooth_name = "-smth" if self.smooth.enabled_proj: smooth_name += ".proj" if self.smooth.proj.granularity != SearchBasedCalibGranularity.Layer: smooth_name += f".{self.smooth.proj.granularity.name}" if self.smooth.proj.strategy != SearchBasedCalibStrategy.Manual: smooth_name += f".{self.smooth.proj.strategy.name}" if self.smooth.proj.alpha <= 0: smooth_name += f".a{num2str(self.smooth.proj.alpha)}" if self.smooth.proj.beta <= 0: smooth_name += f".b{num2str(self.smooth.proj.beta)}" else: smooth_name += f".a{num2str(self.smooth.proj.alpha)}" smooth_name += f".b{num2str(self.smooth.proj.beta)}" xspan_eq_wspan = True for xspan, wspan in self.smooth.proj.spans: if xspan != wspan: xspan_eq_wspan = False break if xspan_eq_wspan: smooth_name += ".[{}]".format("+".join(xspan.name for xspan, _ in self.smooth.proj.spans)) else: smooth_name += ".[{}]".format( "+".join(f"x.{xspan.name}.w.{wspan.name}" for xspan, wspan in self.smooth.proj.spans) ) if self.smooth.proj.allow_low_rank: smooth_name += ".lr" if not self.smooth.proj.allow_b_quant or not self.smooth.proj.allow_a_quant: smooth_name += ".no.[" if not self.smooth.proj.allow_a_quant: smooth_name += "a+" if not self.smooth.proj.allow_b_quant: smooth_name += "b+" smooth_name = smooth_name[:-1] + "]" if self.smooth.proj.skips: smooth_skips = simplify_skips(self.smooth.proj.skips) if "w" in skips_map and smooth_skips.issuperset(skips_map["w"]): smooth_skips = smooth_skips - skips_map["w"] smooth_skips.add("[w]") smooth_name += ".skip.[{}]".format("+".join(sorted(smooth_skips))) if self.smooth.enabled_attn: smooth_name += ".yx" if self.smooth.attn.granularity != SearchBasedCalibGranularity.Layer: smooth_name += f".{self.smooth.attn.granularity.name}" if self.smooth.attn.strategy != SearchBasedCalibStrategy.Manual: smooth_name += f".{self.smooth.attn.strategy.name}" if self.smooth.attn.alpha <= 0: smooth_name += f".a{num2str(self.smooth.attn.alpha)}" if self.smooth.attn.beta <= 0: smooth_name += f".b{num2str(self.smooth.attn.beta)}" else: smooth_name += f".a{num2str(self.smooth.attn.alpha)}" smooth_name += f".b{num2str(self.smooth.attn.beta)}" xspan_eq_yspan = True for xspan, yspan in self.smooth.attn.spans: if xspan != yspan: xspan_eq_yspan = False break if xspan_eq_yspan: smooth_name += ".[{}]".format("+".join(xspan.name for xspan, _ in self.smooth.attn.spans)) else: smooth_name += ".[{}]".format( "+".join(f"x.{xspan.name}.y.{yspan.name}" for xspan, yspan in self.smooth.attn.spans) ) gptq_name = "" if self.enabled_wgts and self.wgts.kernel_gptq is not None: gptq_name = "-gptq" if self.wgts.kernel_gptq.skips: gptq_skips = simplify_skips(self.wgts.kernel_gptq.skips) if "w" in skips_map and gptq_skips.issuperset(skips_map["w"]): gptq_skips = gptq_skips - skips_map["w"] gptq_skips.add("[w]") gptq_name += ".skip.[{}]".format("+".join(sorted(gptq_skips))) wrange_name = "" if ( self.enabled_wgts and self.wgts.enabled_calib_range and (self.wgts.calib_range.needs_search or self.wgts.calib_range.ratio != 1) ): wrange_name = "-w.range" if self.wgts.calib_range.needs_search: if self.wgts.calib_range.granularity != SearchBasedCalibGranularity.Group: wrange_name += f".{self.wgts.calib_range.granularity.name}" if self.wgts.calib_range.objective != SearchBasedCalibObjective.OutputsError: wrange_name += f".{self.wgts.calib_range.objective.name}" if self.wgts.calib_range.degree != 2: wrange_name += f".d{num2str(self.wgts.calib_range.degree)}" wrange_name += f".[{num2str(self.wgts.calib_range.max_shrink)}" wrange_name += f".{num2str(self.wgts.calib_range.max_expand)}" wrange_name += f".g{self.wgts.calib_range.num_grids}]" else: wrange_name += f".r{num2str(self.wgts.calib_range.ratio)}" if self.wgts.calib_range.skips: wrange_skips = simplify_skips(self.wgts.calib_range.skips) if "w" in skips_map and wrange_skips.issuperset(skips_map["w"]): wrange_skips = wrange_skips - skips_map["w"] wrange_skips.add("[w]") wrange_name += ".skip.[{}]".format("+".join(sorted(wrange_skips))) xrange_name = "" if ( self.enabled_ipts and self.ipts.enabled_calib_range and (self.ipts.calib_range.needs_search or self.ipts.calib_range.ratio != 1) ): xrange_name = "-x.range" if self.ipts.calib_range.needs_search: if self.ipts.calib_range.granularity != SearchBasedCalibGranularity.Group: xrange_name += f".{self.ipts.calib_range.granularity.name}" if self.ipts.calib_range.objective != SearchBasedCalibObjective.OutputsError: xrange_name += f".{self.ipts.calib_range.objective.name}" if self.ipts.calib_range.degree != 2: xrange_name += f".d{num2str(self.ipts.calib_range.degree)}" xrange_name += f".[{num2str(self.ipts.calib_range.max_shrink)}" xrange_name += f".{num2str(self.ipts.calib_range.max_expand)}" xrange_name += f".g{self.ipts.calib_range.num_grids}]" else: xrange_name += f".r{num2str(self.ipts.calib_range.ratio)}" if self.ipts.calib_range.skips: xrange_skips = simplify_skips(self.ipts.calib_range.skips) if "x" in skips_map and xrange_skips.issuperset(skips_map["x"]): xrange_skips = xrange_skips - skips_map["x"] xrange_skips.add("[x]") xrange_name += ".skip.[{}]".format("+".join(sorted(xrange_skips))) yrange_name = "" if ( self.enabled_opts and self.opts.enabled_calib_range and (self.opts.calib_range.needs_search or self.opts.calib_range.ratio != 1) ): yrange_name = "-y.range" if self.opts.calib_range.needs_search: if self.opts.calib_range.granularity != SearchBasedCalibGranularity.Group: yrange_name += f".{self.opts.calib_range.granularity.name}" if self.opts.calib_range.objective != SearchBasedCalibObjective.OutputsError: yrange_name += f".{self.opts.calib_range.objective.name}" if self.opts.calib_range.degree != 2: yrange_name += f".d{num2str(self.opts.calib_range.degree)}" yrange_name += f".[{num2str(self.opts.calib_range.max_shrink)}" yrange_name += f".{num2str(self.opts.calib_range.max_expand)}" yrange_name += f".g{self.opts.calib_range.num_grids}]" else: yrange_name += f".r{num2str(self.opts.calib_range.ratio)}" if self.opts.calib_range.skips: yrange_skips = simplify_skips(self.opts.calib_range.skips) if "y" in skips_map and yrange_skips.issuperset(skips_map["y"]): yrange_skips = yrange_skips - skips_map["y"] yrange_skips.add("[y]") yrange_name += ".skip.[{}]".format("+".join(sorted(yrange_skips))) name = ( skip_name + extra_name + lowrank_name + rotation_name + smooth_name + gptq_name + wrange_name + xrange_name + yrange_name ) name = name[1:] if name else "default" name += f"-{self.calib.generate_dirnames()[0]}" return name @classmethod def set_key_map(cls, key_map: dict[str, set[str]]) -> None: """Set the key map for the language model quantization configuration. Args: key_map (dict[str, set[str]]): The key map. """ cls._key_map = key_map def organize(self) -> dict[str, bool]: # noqa: C901 """Organize the flags for the diffusion model quantization configuration. Returns: dict[str, bool]: The organized flags. """ key_map = type(self)._key_map wgts_skip_set, ipts_skip_set, opts_skip_set = set(), set(), set() if self.wgts is not None: wgts_skips = [] for skip in self.wgts.skips: wgts_skips.extend(list(key_map[skip])) wgts_skip_set = set(wgts_skips) self.wgts.skips = sorted(wgts_skip_set) if self.wgts.kernel_gptq is not None: wgts_kernel_gptq_skips = [] for skip in self.wgts.kernel_gptq.skips: wgts_kernel_gptq_skips.extend(list(key_map[skip])) self.wgts.kernel_gptq.skips = sorted(set(wgts_kernel_gptq_skips) - wgts_skip_set) if self.wgts.low_rank is not None: wgts_low_rank_skips = [] for skip in self.wgts.low_rank.skips: wgts_low_rank_skips.extend(list(key_map[skip])) self.wgts.low_rank.skips = sorted(set(wgts_low_rank_skips) - wgts_skip_set) if self.wgts.calib_range is not None: wgts_calib_range_skips = [] for skip in self.wgts.calib_range.skips: wgts_calib_range_skips.extend(list(key_map[skip])) self.wgts.calib_range.skips = sorted(set(wgts_calib_range_skips) - wgts_skip_set) if self.extra_wgts is not None: extra_includes = [] for include in self.extra_wgts.includes: extra_includes.extend(list(key_map[include])) extra_includes_set = set(extra_includes) - wgts_skip_set self.extra_wgts.includes = sorted(extra_includes_set) if not self.extra_wgts.is_enabled(): self.extra_wgts = None if self.ipts is not None: ipts_skips = [] for skip in self.ipts.skips: ipts_skips.extend(list(key_map[skip])) ipts_skip_set = set(ipts_skips) self.ipts.skips = sorted(ipts_skip_set) if self.ipts.calib_range is not None: ipts_calib_range_skips = [] for skip in self.ipts.calib_range.skips: ipts_calib_range_skips.extend(list(key_map[skip])) self.ipts.calib_range.skips = sorted(set(ipts_calib_range_skips) - ipts_skip_set) if self.opts is not None: opts_skips = [] for skip in self.opts.skips: opts_skips.extend(list(key_map[skip])) opts_skip_set = set(opts_skips) self.opts.skips = sorted(opts_skip_set) if self.opts.calib_range is not None: opts_calib_range_skips = [] for skip in self.opts.calib_range.skips: opts_calib_range_skips.extend(list(key_map[skip])) self.opts.calib_range.skips = sorted(set(opts_calib_range_skips) - opts_skip_set) if self.smooth is not None and self.smooth.proj is not None: smooth_proj_skips = [] for skip in self.smooth.proj.skips: smooth_proj_skips.extend(list(key_map[skip])) self.smooth.proj.skips = sorted(set(smooth_proj_skips) - (wgts_skip_set & ipts_skip_set)) if self.rotation is not None: rotation_transforms = [] for transform in self.rotation.transforms: rotation_transforms.extend(list(key_map[transform])) self.rotation.transforms = sorted(set(rotation_transforms)) ================================================ FILE: deepcompressor/app/diffusion/quant/quantizer/__init__.py ================================================ # -*- coding: utf-8 -*- from .config import DiffusionModuleQuantizerConfig from .quantizer import DiffusionActivationQuantizer, DiffusionWeightQuantizer ================================================ FILE: deepcompressor/app/diffusion/quant/quantizer/config.py ================================================ # -*- coding: utf-8 -*- """Quantizatizer config.""" from dataclasses import dataclass, field import torch from omniconfig import configclass from deepcompressor.calib.config import SkipBasedDynamicRangeCalibConfig, SkipBasedQuantLowRankCalibConfig from deepcompressor.data.dtype import QuantDataType from deepcompressor.quantizer.config import QuantizerConfig from deepcompressor.quantizer.kernel import QuantGptqConfig from deepcompressor.utils.config import EnableConfig, IncludeBasedConfig, SkipBasedConfig __all__ = [ "DiffusionQuantizerConfig", "DiffusionWeightQuantizerConfig", "DiffusionActivationQuantizerConfig", "DiffusionModuleQuantizerConfig", ] @configclass @dataclass class DiffusionGPTQConfig(SkipBasedConfig, QuantGptqConfig): """Configuration for GPTQ quantization. Args: damp_percentage (`float`, *optional*, defaults to `0.01`): The percentage of damping. block_size (`int`, *optional*, defaults to `128`): The block size of the GPTQ quantization. num_inv_tries (`int`, *optional*, defaults to `200`): The number of tries for the inverse. hessian_block_size (`int`, *optional*, defaults to `-1`): The block size when calculing the Hessian. skips: list[str] = field(default_factory=list) """ pass @configclass @dataclass class DiffusionQuantizerConfig(QuantizerConfig): """Diffusion model quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. static (`bool`, *optional*, defaults to `False`): Whether to use static quantization. kernel_gptq (`DiffusionGPTQConfig` or `None`, *optional*, defaults to `None`): The gptq quantization configuration. low_rank (`SkipBasedQuantLowRankCalibConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch calibration configuration. calib_range (`DynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The quantizatizer dynamic range calibration configuration. """ static: bool = False kernel_gptq: DiffusionGPTQConfig | None = None low_rank: SkipBasedQuantLowRankCalibConfig | None = None calib_range: SkipBasedDynamicRangeCalibConfig | None = None def __post_init__(self) -> None: super().__post_init__() if self.quant_dtype is None: self.static = False self.kernel_gptq = None self.low_rank = None self.calib_range = None if self.kernel_gptq is not None and not self.kernel_gptq.is_enabled(): self.kernel_gptq = None if self.static and self.calib_range is None: self.calib_range = SkipBasedDynamicRangeCalibConfig() if self.low_rank is not None and not self.low_rank.is_enabled(): self.low_rank = None @property def enabled_gptq(self) -> bool: """Whether quantization kernel calibration is enabled.""" return self.kernel_gptq is not None and self.kernel_gptq.is_enabled() @property def enabled_low_rank(self) -> bool: """Whether quantization SVD calibration is enabled.""" return self.low_rank is not None and self.low_rank.is_enabled() @property def enabled_calib_range(self) -> bool: """Whether quantization dynamic range calibration is enabled.""" return self.calib_range is not None def generate_calib_dirname(self) -> str: """Generate the name for quantization calibration. Returns: str: The name. """ name = "" if self.static: name += ".static" if self.enabled_gptq: name += ".gptq" if self.enabled_low_rank: name += ".lowrank" if self.enabled_calib_range and (self.calib_range.needs_search or self.calib_range.ratio != 1): name += ".range" return name[1:] if name else "" @configclass @dataclass class SkipBasedDiffusionQuantizerConfig(SkipBasedConfig, DiffusionQuantizerConfig): """Diffusion model quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. skips (`[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. static (`bool`, *optional*, defaults to `False`): Whether to use static quantization. kernel_gptq (`DiffusionGPTQConfig` or `None`, *optional*, defaults to `None`): The gptq quantization configuration. low_rank (`SkipBasedQuantLowRankCalibConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch calibration configuration. calib_range (`DynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The quantizatizer dynamic range calibration configuration. """ def __post_init__(self) -> None: super().__post_init__() if self.quant_dtype is None: self.skips.clear() @configclass @dataclass class DiffusionWeightQuantizerConfig(SkipBasedDiffusionQuantizerConfig): """Diffusion model weight quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. skips (`list[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. low_rank (`SkipBasedQuantLowRankCalibConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch calibration configuration. calib_range (`DynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The quantizatizer dynamic range calibration configuration. """ static: bool = field(init=False, default=True) @property def needs_calib_data(self) -> bool: return self.enabled_calib_range and self.calib_range.needs_search @configclass @dataclass class DiffusionActivationQuantizerConfig(SkipBasedDiffusionQuantizerConfig): """Diffusion model activation quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. skips (`list[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. static (`bool`, *optional*, defaults to `False`): Whether to use static quantization. calib_range (`DynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The quantizatizer dynamic range calibration configuration. allow_unsigned (`bool`, *optional*, defaults to `False`): Whether to allow unsigned data type for activation quantization. """ kernel_gptq: None = field(init=False, default=None) low_rank: None = field(init=False, default=None) allow_unsigned: bool = False @property def needs_calib_data(self) -> bool: return self.enabled_calib_range and (self.calib_range.needs_search or self.static) def generate_dirnames( self, *, prefix: str = "", shape: torch.Size | tuple[int, ...] = (1024, 1024, 16, 16), default_dtype: torch.dtype = torch.float16, **kwargs, ) -> list[str]: """Get the directory names of the quantization configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. shape (`torch.Size` or `tuple[int, ...]`, *optional*, defaults to `(1024, 1024, 16, 16)`): The shape of the tensor to be quantized. default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The dtype of the tensor to be quantized. Returns: `list[str]`: The directory names of the quantization configuration. - The number of effective bits. - The name of the quantization data type. - The name of the group shapes. - The name of the modules to skip. """ names = super().generate_dirnames(prefix=prefix, shape=shape, default_dtype=default_dtype) if self.allow_unsigned: names[1] += ".u" return names def for_unsigned(self) -> "DiffusionActivationQuantizerConfig": """get the quantizer configuration for unsigned activations. Returns: `DiffusionActivationQuantizerConfig`: The quantizer configuration for unsigned activations. """ if isinstance(self.dtype, QuantDataType) and self.allow_unsigned: return DiffusionActivationQuantizerConfig( dtype=self.dtype.to_unsigned(), zero_point=self.zero_point, group_shapes=self.group_shapes, scale_dtypes=self.scale_dtypes, skips=self.skips, static=self.static, calib_range=self.calib_range, allow_unsigned=self.allow_unsigned, ) else: return self @configclass @dataclass class DiffusionExtraWeightQuantizerConfig(IncludeBasedConfig, DiffusionQuantizerConfig): """Diffusion model extra weight quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. includes (`list[str]`, *optional*, defaults to `[]`): The keys of the modules to include. low_rank (`SkipBasedQuantLowRankCalibConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch calibration configuration. calib_range (`DynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The quantizatizer dynamic range calibration configuration. """ static: bool = field(init=False, default=True) kernel_gptq: DiffusionGPTQConfig | None = field(init=False, default=None) low_rank: SkipBasedQuantLowRankCalibConfig | None = field(init=False, default=None) calib_range: SkipBasedDynamicRangeCalibConfig | None = field(init=False, default=None) @property def needs_calib_data(self) -> bool: return self.enabled_calib_range and self.calib_range.needs_search @configclass @dataclass(kw_only=True) class DiffusionModuleQuantizerConfig(EnableConfig): """Diffusion model module quantizer configuration. Args: wgts (`DiffusionWeightQuantizerConfig`): The weight quantization configuration. ipts (`DiffusionActivationQuantizerConfig`): The input activation quantization configuration. opts (`DiffusionActivationQuantizerConfig`): The output activation quantization configuration. """ wgts: DiffusionWeightQuantizerConfig ipts: DiffusionActivationQuantizerConfig opts: DiffusionActivationQuantizerConfig extra_wgts: DiffusionExtraWeightQuantizerConfig | None = None unsigned_ipts: DiffusionActivationQuantizerConfig = field(init=False) def is_enabled(self): return self.enabled_wgts or self.enabled_ipts or self.enabled_opts @property def enabled_wgts(self) -> bool: """Whether to enable weight quantization.""" return self.wgts is not None and self.wgts.is_enabled() @property def enabled_ipts(self) -> bool: """Whether to enable activation quantization.""" return self.ipts is not None and self.ipts.is_enabled() @property def enabled_opts(self) -> bool: """Whether to enable activation quantization.""" return self.opts is not None and self.opts.is_enabled() @property def enabled_extra_wgts(self) -> bool: """Whether to enable extra weight quantization.""" return self.extra_wgts is not None and self.extra_wgts.is_enabled() def __post_init__(self) -> None: if self.enabled_opts: raise NotImplementedError("Output activation quantization is not supported yet.") if self.wgts.is_enabled() and self.extra_wgts is not None: self.extra_wgts.includes = list(filter(lambda key: key not in self.wgts.skips, self.extra_wgts.includes)) if self.extra_wgts.is_enabled(): self.extra_wgts.kernel_gptq = self.wgts.kernel_gptq self.extra_wgts.low_rank = self.wgts.low_rank self.extra_wgts.calib_range = self.wgts.calib_range else: self.extra_wgts = None else: self.extra_wgts = None def generate_dirnames( self, *, prefix: str = "", shape: torch.Size | tuple[int, ...] = (1024, 1024, 16, 16), default_dtype: torch.dtype = torch.float16, **kwargs, ) -> list[str]: """Get the directory names of the quantization configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. shape (`torch.Size` or `tuple[int, ...]`, *optional*, defaults to `(1024, 1024, 16, 16)`): The shape of the tensor to be quantized. default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The dtype of the tensor to be quantized. Returns: `list[str]`: The directory names of the quantization configuration. - The number of effective bits. - The name of the quantization data type. - The name of the group shapes. - The name of the modules to skip. """ wgts_names = self.wgts.generate_dirnames(prefix="w", shape=shape, default_dtype=default_dtype) ipts_names = self.ipts.generate_dirnames(prefix="x", shape=shape, default_dtype=default_dtype) opts_names = self.opts.generate_dirnames(prefix="y", shape=shape, default_dtype=default_dtype) names = [ f"{wgts_name}-{ipts_name}-{opts_name}" for wgts_name, ipts_name, opts_name in zip(wgts_names, ipts_names, opts_names, strict=True) ] if self.extra_wgts is not None: extra_wgts_names = self.extra_wgts.generate_dirnames(prefix="w", shape=shape, default_dtype=default_dtype) names = [f"{name}-{extra_wgts_name}" for name, extra_wgts_name in zip(names, extra_wgts_names, strict=True)] if prefix: names = [f"{prefix}.[{name}]" for name in names] return names def generate_calib_dirname(self) -> str: """Generate the name for quantization calibration. Returns: `str`: The name. """ name = "" if self.enabled_wgts: calib_name = self.wgts.generate_calib_dirname() if calib_name: name += f"-w.{calib_name}" if self.enabled_ipts: calib_name = self.ipts.generate_calib_dirname() if calib_name: name += f"-x.{calib_name}" if self.enabled_opts: calib_name = self.opts.generate_calib_dirname() if calib_name: name += f"-y.{calib_name}" return name[1:] if name else name ================================================ FILE: deepcompressor/app/diffusion/quant/quantizer/quantizer.py ================================================ # -*- coding: utf-8 -*- """Tensor Quantizer module.""" import typing as tp from dataclasses import dataclass, field import torch import torch.nn as nn from deepcompressor.calib.config import SkipBasedQuantLowRankCalibConfig from deepcompressor.calib.lowrank import LowRankBranch, QuantLowRankCalibrator from deepcompressor.calib.range import calibrate_dynamic_range from deepcompressor.data.cache import TensorsCache from deepcompressor.data.common import TensorType from deepcompressor.data.range import DynamicRange from deepcompressor.quantizer.processor import Quantizer from .config import ( DiffusionActivationQuantizerConfig, DiffusionGPTQConfig, DiffusionQuantizerConfig, DiffusionWeightQuantizerConfig, ) __all__ = ["DiffusionQuantizer", "DiffusionWeightQuantizer", "DiffusionActivationQuantizer"] @dataclass class DiffusionQuantizer(Quantizer): """Denoising model quantizer class. Args: config (`DiffusionQuantizerConfig` or `None`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. tensor_type (`TensorType`, *optional*, defaults to `TensorType.Weights`): The type of the tensor to quantize. channels_dim (`int` or `None`, *optional*, defaults to `None`): The dimension of channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. kernel (`DiffusionGPTQConfig` or `None`, *optional*, defaults to `MISSING`): The GPTQ kernel configuration. If not provided (i.e., `MISSING`), the GPTQ configuration from the `config` will be used. low_rank (`QuantLowRankConfig` or `None`, *optional*, defaults to `MISSING`): The quantization low-rank branch configuration. If not provided (i.e., `MISSING`), the low-rank branch configuration from the `config` will be used. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). """ config: DiffusionQuantizerConfig kernel: DiffusionGPTQConfig | None = field(init=False) low_rank: SkipBasedQuantLowRankCalibConfig | None = field(init=False) tensor_type: TensorType = TensorType.Weights def __post_init__(self) -> None: self.kernel = self.config.kernel_gptq self.low_rank = self.config.low_rank def calibrate_dynamic_range( self, modules: tp.Sequence[nn.Module], activations: TensorsCache, weights: tp.Sequence[nn.Parameter] = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_weights: tp.Sequence[tuple[nn.Parameter, torch.Tensor]] | None = None, orig_activations: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, ) -> tp.Sequence[DynamicRange] | None: """Calibrate the dynamic range. Args: modules (`Sequence[nn.Module]`): The modules to calibrate. activations (`TensorsCache`): The inputs cache if the tensor type is not outputs, or the outputs cache if the tensor type is outputs. weights (`Sequence[nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights to calibrate. If not provided, the weights of the modules will be used. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs for evaluation. If not provided, the `activations` cache will be used. eval_module (`nn.Module` or `None`, *optional*, defaults to `None`): The module to evaluate the quantization error. If not provided, the module to calibrate will be used. eval_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): The keyword arguments for evaluation. orig_weights (`Sequence[tuple[nn.Parameter, torch.Tensor]]` or `None`, *optional*, defaults to `None`): The original weights. orig_activations (`TensorsCache` or `None`, *optional*, defaults to `None`): The original activations. orig_eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original evaluation inputs. Returns: `Sequence[DynamicRange]` or `None`: The dynamic ranges of each quantization step. """ if ( not self.is_enabled() or self.config.calib_range is None or not self.config.calib_range.is_enabled_for(self.key) ): self.dynamic_range = None else: self.dynamic_range = calibrate_dynamic_range( tensor_type=self.tensor_type, config=self.config.calib_range, static=self.config.static, quantizer=self, modules=modules, activations=activations, weights=weights, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, orig_weights=orig_weights, orig_activations=orig_activations, orig_eval_inputs=orig_eval_inputs, ) return self.dynamic_range @dataclass class DiffusionWeightQuantizer(DiffusionQuantizer): """Diffusion model weight quantizer class. Args: Args: config (`DiffusionWeightQuantizerConfig` or `None`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. kernel (`DiffusionGPTQConfig` or `None`, *optional*, defaults to `MISSING`): The GPTQ kernel configuration. If not provided (i.e., `MISSING`), the GPTQ configuration from the `config` will be used. low_rank (`QuantLowRankConfig` or `None`, *optional*, defaults to `MISSING`): The quantization low-rank branch configuration. If not provided (i.e., `MISSING`), the low-rank branch configuration from the `config` will be used. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). """ config: DiffusionWeightQuantizerConfig channels_dim: None = field(init=False, default=None) tensor_type: TensorType = field(init=False, default=TensorType.Weights) def calibrate_dynamic_range( self, module: nn.Module, inputs: TensorsCache, weight: nn.Parameter | None = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_inputs: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, ) -> tp.Sequence[DynamicRange] | None: """Calibrate the dynamic range. Args: module (`nn.Module`): The module to calibrate. inputs (`TensorsCache`): The inputs cache. weight (`nn.Parameter` or `None`, *optional*, defaults to `None`): The weight parameter to calibrate. If not provided, the weight of the `module` will be used. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs for evaluation. If not provided, the `activations` cache will be used. eval_module (`nn.Module` or `None`, *optional*, defaults to `None`): The module to evaluate the quantization error. If not provided, the module to calibrate will be used. eval_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): The keyword arguments for evaluation. orig_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original inputs. orig_eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original evaluation inputs. Returns: `Sequence[DynamicRange]` or `None`: The dynamic ranges of each quantization step. """ return super().calibrate_dynamic_range( modules=[module], weights=[weight] if weight is not None else [module.weight], activations=inputs, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, orig_activations=orig_inputs, orig_eval_inputs=orig_eval_inputs, ) def calibrate_low_rank( self, input_quantizer: "DiffusionActivationQuantizer", modules: tp.Sequence[nn.Module], inputs: TensorsCache, weights: tp.Sequence[nn.Parameter] = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_inputs: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, ) -> LowRankBranch: """Calibrate the quantization low-rank branch.""" if weights is None: weights = [module.weight for module in modules] return QuantLowRankCalibrator( config=self.low_rank, w_quantizer=self, x_quantizer=input_quantizer, develop_dtype=self.develop_dtype, ).calibrate( x_wgts=weights, x_acts=inputs, x_mods=modules, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, orig_x_acts=orig_inputs, orig_eval_inputs=orig_eval_inputs, ) @dataclass class DiffusionActivationQuantizer(DiffusionQuantizer): """Diffusion model activation quantizer class. Args: config (`DiffusionActivationQuantizerConfig` or `None`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. tensor_type (`TensorType`, *optional*, defaults to `TensorType.Inputs`): The type of the tensor to quantize. channels_dim (`int` or `None`, *optional*, defaults to `None`): The dimension of channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). develop_dtype (torch.dtype, optional): The develop dtype. Defaults to ``torch.float32``. """ config: DiffusionActivationQuantizerConfig tensor_type: TensorType = TensorType.Inputs def __post_init__(self) -> None: super().__post_init__() assert self.tensor_type != TensorType.Weights, "The tensor type cannot be weights." assert isinstance(self.channels_dim, int), "The channels dimension must be provided." ================================================ FILE: deepcompressor/app/diffusion/quant/rotate.py ================================================ # -*- coding: utf-8 -*- """Large Language Model Rotation module.""" import gc import torch from deepcompressor.calib.rotate import ( get_rotation_matrix, hadamard_in_channels, rotate_in_channels, rotate_out_channels, ) from deepcompressor.utils import tools from ..nn.struct import DiffusionModelStruct from .config import DiffusionQuantConfig __all__ = ["rotate_diffusion"] @torch.inference_mode() def rotate_diffusion( # noqa: C901 model: DiffusionModelStruct, /, config: DiffusionQuantConfig ): """Rotate the weights of the diffusion model. Args: model (`PreTrainedModel` or `LlmStruct`): Model to be rotated. config (`QuantRotationConfig`): Rotation configuration. """ if not isinstance(model, DiffusionModelStruct): model = DiffusionModelStruct.construct(model) assert isinstance(model, DiffusionModelStruct) devices: dict[str, torch.device] = {} dtypes: dict[str, torch.dtype] = {} linears: dict[str, torch.nn.Linear] = {} size: float = 0 for n, m in model.module.named_modules(): if isinstance(m, torch.nn.Linear): devices[n] = m.weight.device dtypes[n] = m.weight.dtype linears[n] = m size += m.weight.numel() / 1e9 for linear in linears.values(): linear.to(dtype=torch.float32, device="cpu" if size > 30 else None) logger = tools.logging.getLogger(f"{__name__}.Rotate") head_rotation = None for transformer_block in model.iter_transformer_block_structs(): logger.debug(f"- Rotating {transformer_block.name}") tools.logging.Formatter.indent_inc() for attn in transformer_block.iter_attention_structs(): if attn.qkv_proj_key in config.rotation.transforms: if attn.qkv_proj_key not in config.wgts.skips or attn.qkv_proj_key not in config.ipts.skips: logger.debug(f"- Hadamard transform on {attn.name}.qkv_proj (in)") hadamard_in_channels( attn.qkv_proj, dtype=dtypes[attn.q_proj_name], device=devices[attn.q_proj_name] ) if not attn.is_self_attn() and attn.add_qkv_proj_key in config.rotation.transforms: if attn.add_qkv_proj_key not in config.wgts.skips or attn.add_qkv_proj_key not in config.ipts.skips: logger.debug(f"- Hadamard transform on {attn.name}.add_qkv_proj (in)") hadamard_in_channels( attn.add_qkv_proj, dtype=dtypes[attn.add_k_proj_name], device=devices[attn.add_k_proj_name] ) if attn.out_proj_key in config.rotation.transforms or attn.add_out_proj_key in config.rotation.transforms: if ( attn.out_proj_key not in config.wgts.skips or attn.out_proj_key not in config.ipts.skips or attn.add_out_proj_key not in config.wgts.skips or attn.add_out_proj_key not in config.ipts.skips ): if head_rotation is None: head_rotation = get_rotation_matrix( attn.config.num_head_channels, random=config.rotation.random ) if attn.v_proj is not None: logger.debug(f"- Rotating {attn.v_proj_name} (out)") rotate_out_channels(attn.v_proj.weight, rotation=head_rotation, bias=attn.v_proj.bias) if attn.add_v_proj is not None: logger.debug(f"- Rotating {attn.add_v_proj_name} (out)") rotate_out_channels(attn.add_v_proj.weight, rotation=head_rotation, bias=attn.add_v_proj.bias) if attn.o_proj is not None: logger.debug(f"- Rotating {attn.o_proj_name} (in)") rotate_in_channels(attn.o_proj.weight, rotation=head_rotation) if attn.add_o_proj is not None: logger.debug(f"- Rotating {attn.add_o_proj_name} (in)") rotate_in_channels(attn.add_o_proj.weight, rotation=head_rotation) gc.collect() torch.cuda.empty_cache() ffn, add_ffn = transformer_block.ffn_struct, transformer_block.add_ffn_struct if ffn.up_proj_key in config.rotation.transforms: if ffn.up_proj_key not in config.wgts.skips or ffn.up_proj_key not in config.ipts.skips: logger.debug(f"- Hadamard transform on {ffn.up_proj_name} (in)") hadamard_in_channels(ffn.up_projs, dtype=dtypes[ffn.up_proj_name], device=devices[ffn.up_proj_name]) if add_ffn is not None and add_ffn.up_proj_key in config.rotation.transforms: if add_ffn.up_proj_key not in config.wgts.skips or add_ffn.up_proj_key not in config.ipts.skips: logger.debug(f"- Hadamard transform on {add_ffn.up_proj_name} (in)") hadamard_in_channels( add_ffn.up_projs, dtype=dtypes[add_ffn.up_proj_name], device=devices[add_ffn.up_proj_name] ) if ffn.down_proj_key in config.rotation.transforms: if ffn.down_proj_key not in config.wgts.skips or ffn.down_proj_key not in config.ipts.skips: logger.debug(f"- Hadamard transform on {ffn.down_proj_name} (in)") hadamard_in_channels( ffn.down_projs, dtype=dtypes[ffn.down_proj_name], device=devices[ffn.down_proj_name] ) if add_ffn is not None and add_ffn.down_proj_key in config.rotation.transforms: if add_ffn.down_proj_key not in config.wgts.skips or add_ffn.down_proj_key not in config.ipts.skips: logger.debug(f"- Hadamard transform on {add_ffn.down_proj_name} (in)") hadamard_in_channels( add_ffn.down_projs, dtype=dtypes[add_ffn.down_proj_name], device=devices[add_ffn.down_proj_name] ) gc.collect() torch.cuda.empty_cache() tools.logging.Formatter.indent_dec() for n, m in linears.items(): m.to(device=devices[n], dtype=dtypes[n]) ================================================ FILE: deepcompressor/app/diffusion/quant/smooth.py ================================================ # -*- coding: utf-8 -*- """Diffusion smooth quantization module.""" import typing as tp import torch import torch.nn as nn from tqdm import tqdm from deepcompressor.calib.smooth import ActivationSmoother, smooth_linear_modules from deepcompressor.data.cache import IOTensorsCache from deepcompressor.quantizer import Quantizer from deepcompressor.utils import tools from deepcompressor.utils.hooks import KeyedInputPackager from ..nn.struct import ( DiffusionAttentionStruct, DiffusionBlockStruct, DiffusionFeedForwardStruct, DiffusionModelStruct, DiffusionTransformerBlockStruct, ) from .config import DiffusionQuantConfig from .utils import get_needs_inputs_fn, wrap_joint_attn __all__ = ["smooth_diffusion"] @torch.inference_mode() def smooth_diffusion_attention( attn: DiffusionAttentionStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, block_kwargs: dict[str, tp.Any] | None = None, ) -> dict[str, torch.Tensor]: logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") # attention qk if config.smooth.enabled_attn: logger.debug("- %s.k", attn.name) raise NotImplementedError("Not implemented yet") return smooth_cache @torch.inference_mode() def smooth_diffusion_qkv_proj( attn: DiffusionAttentionStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, block_kwargs: dict[str, tp.Any] | None = None, ) -> dict[str, torch.Tensor]: logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") # region qkv projection module_key = attn.qkv_proj_key needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): logger.debug("- %s.qkv_proj", attn.name) prevs = None if config.smooth.proj.fuse_when_possible and attn.parent.norm_type.startswith("layer_norm"): if not hasattr(attn.parent.module, "pos_embed") or attn.parent.module.pos_embed is None: prevs = attn.parent.pre_attn_norms[attn.idx] assert isinstance(prevs, nn.LayerNorm) cache_key = attn.q_proj_name config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( prevs, attn.qkv_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.q_proj_name].inputs if block_cache else None, eval_inputs=block_cache[attn.name].inputs if block_cache else None, eval_module=attn, eval_kwargs=attn.filter_kwargs(block_kwargs), develop_dtype=config.develop_dtype, ) if prevs is None: # we need to register forward pre hook to smooth inputs if attn.module.group_norm is None and attn.module.spatial_norm is None: ActivationSmoother( smooth_cache[cache_key], channels_dim=-1, input_packager=KeyedInputPackager(attn.module, [0]), ).as_hook().register(attn.module) else: ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(attn.qkv_proj) for m in attn.qkv_proj: m.in_smooth_cache_key = cache_key # endregion if attn.is_self_attn(): return smooth_cache # region additional qkv projection module_key = attn.add_qkv_proj_key needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) needs_quant = needs_quant and attn.add_k_proj is not None if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): logger.debug("- %s add_qkv_proj", attn.name) prevs = None pre_attn_add_norm = attn.parent.pre_attn_add_norms[attn.idx] if isinstance(pre_attn_add_norm, nn.LayerNorm) and config.smooth.proj.fuse_when_possible: prevs = pre_attn_add_norm cache_key = attn.add_k_proj_name config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( prevs, attn.add_qkv_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.add_k_proj_name].inputs if block_cache else None, eval_inputs=block_cache[attn.name].inputs if block_cache else None, eval_module=wrap_joint_attn(attn, indexes=1) if attn.is_joint_attn() else attn, eval_kwargs=attn.filter_kwargs(block_kwargs), develop_dtype=config.develop_dtype, ) if prevs is None: # we need to register forward pre hook to smooth inputs ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(attn.add_qkv_proj) for m in attn.add_qkv_proj: m.in_smooth_cache_key = cache_key # endregion return smooth_cache @torch.inference_mode() def smooth_diffusion_out_proj( # noqa: C901 attn: DiffusionAttentionStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, block_kwargs: dict[str, tp.Any] | None = None, ) -> dict[str, torch.Tensor]: logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") module_keys = [] for module_key in (attn.out_proj_key, attn.add_out_proj_key) if attn.is_joint_attn() else (attn.out_proj_key,): needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): module_keys.append(module_key) if not module_keys: return smooth_cache exclusive = False if config.enabled_wgts and config.wgts.enabled_low_rank: exclusive = config.wgts.low_rank.exclusive config.wgts.low_rank.exclusive = True fuse_smooth = not attn.config.linear_attn and config.smooth.proj.fuse_when_possible prevs = [attn.v_proj, attn.add_v_proj] if fuse_smooth else None if len(module_keys) == 1 and module_keys[0] == attn.out_proj_key: logger.debug("- %s.out_proj", attn.name) module_key = attn.out_proj_key cache_key = attn.o_proj_name config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( prevs, attn.o_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.o_proj_name].inputs if block_cache else None, eval_inputs=block_cache[attn.o_proj_name].inputs if block_cache else None, eval_module=attn.o_proj, extra_modules=[attn.add_o_proj] if attn.is_joint_attn() else None, develop_dtype=config.develop_dtype, ) elif len(module_keys) == 1 and module_keys[0] == attn.add_out_proj_key: assert attn.is_joint_attn() logger.debug("- %s.add_out_proj", attn.name) module_key = attn.add_out_proj_key cache_key = attn.add_o_proj_name config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( prevs, attn.add_o_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.add_o_proj_name].inputs if block_cache else None, eval_inputs=block_cache[attn.add_o_proj_name].inputs if block_cache else None, eval_module=attn.add_o_proj, extra_modules=[attn.o_proj], develop_dtype=config.develop_dtype, ) else: assert attn.is_joint_attn() logger.debug("- %s.out_proj + %s.add_out_proj", attn.name, attn.name) module_key = attn.out_proj_key cache_key = attn.o_proj_name config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( prevs, [attn.o_proj, attn.add_o_proj], scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.o_proj_name].inputs if block_cache else None, eval_inputs=block_cache[attn.name].inputs if block_cache else None, eval_module=wrap_joint_attn(attn, indexes=(0, 1)), eval_kwargs=attn.filter_kwargs(block_kwargs), develop_dtype=config.develop_dtype, ) if config.enabled_wgts and config.wgts.enabled_low_rank: config.wgts.low_rank.exclusive = exclusive if fuse_smooth: for prev in prevs: if prev is not None: prev.out_smooth_cache_key = cache_key else: for o_proj in [attn.o_proj, attn.add_o_proj]: if o_proj is not None: ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(o_proj) attn.o_proj.in_smooth_cache_key = cache_key if attn.add_o_proj is not None: attn.add_o_proj.in_smooth_cache_key = cache_key return smooth_cache @torch.inference_mode() def smooth_diffusion_up_proj( pre_ffn_norm: nn.Module, ffn: DiffusionFeedForwardStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, ) -> dict[str, torch.Tensor]: assert len(ffn.up_projs) == 1 logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") # ffn up projection module_key = ffn.up_proj_key needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): logger.debug("- %s.up_proj", ffn.name) prevs = None if config.smooth.proj.fuse_when_possible and isinstance(pre_ffn_norm, nn.LayerNorm): if ffn.parent.norm_type in ["ada_norm", "layer_norm"]: prevs = pre_ffn_norm cache_key = ffn.up_proj_name channels_dim = -1 if isinstance(ffn.down_proj, nn.Linear) else 1 config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( prevs, ffn.up_projs, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=channels_dim, key=module_key), inputs=block_cache[ffn.up_proj_name].inputs if block_cache else None, eval_inputs=block_cache[ffn.up_proj_name].inputs if block_cache else None, eval_module=ffn.up_proj, develop_dtype=config.develop_dtype, ) if prevs is None: ActivationSmoother(smooth_cache[cache_key], channels_dim=channels_dim).as_hook().register(ffn.up_proj) for proj in ffn.up_projs: proj.in_smooth_cache_key = cache_key return smooth_cache @torch.inference_mode() def smooth_diffusion_down_proj( ffn: DiffusionFeedForwardStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, ) -> dict[str, torch.Tensor]: logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") # ffn down projection module_key = ffn.down_proj_key.upper() needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): logger.debug("- %s.down_proj", ffn.name) cache_key = ffn.down_proj_name config_ipts = config.unsigned_ipts if getattr(ffn.down_proj, "unsigned", False) else config.ipts config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts channels_dim = -1 if isinstance(ffn.down_proj, nn.Linear) else 1 smooth_cache[cache_key] = smooth_linear_modules( None, ffn.down_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config_ipts, channels_dim=channels_dim, key=module_key), inputs=block_cache[ffn.down_proj_name].inputs if block_cache else None, eval_inputs=block_cache[ffn.down_proj_name].inputs if block_cache else None, eval_module=ffn.down_proj, develop_dtype=config.develop_dtype, ) ffn.down_proj.in_smooth_cache_key = cache_key ActivationSmoother(smooth_cache[cache_key], channels_dim=channels_dim).as_hook().register(ffn.down_proj) return smooth_cache @torch.inference_mode() def smooth_diffusion_parallel_qkv_up_proj( block: DiffusionTransformerBlockStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, block_kwargs: dict[str, tp.Any] | None = None, ) -> dict[str, torch.Tensor]: assert block.parallel assert len(block.ffn_struct.up_projs) == 1 logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") # region qkv proj + up proj attn, ffn = block.attn_structs[0], block.ffn_struct module_key = attn.qkv_proj_key needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): logger.debug("- %s.qkv_proj + %s.up_proj", attn.name, ffn.name) cache_key = attn.q_proj_name modules = attn.qkv_proj + ffn.up_projs config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( None, modules, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.q_proj_name].inputs if block_cache else None, eval_inputs=block_cache[block.name].inputs if block_cache else None, eval_module=block, eval_kwargs=block_kwargs, splits=[len(attn.qkv_proj)], develop_dtype=config.develop_dtype, ) ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(modules) for m in modules: m.in_smooth_cache_key = cache_key # endregion # region add qkv proj + add up proj if attn.is_self_attn(): if block.add_ffn_struct is not None: smooth_cache = smooth_diffusion_up_proj( pre_ffn_norm=block.pre_add_ffn_norm, ffn=block.add_ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache, ) return smooth_cache module_key = attn.add_qkv_proj_key needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): add_ffn = block.add_ffn_struct cache_key = attn.add_k_proj_name modules = attn.add_qkv_proj if add_ffn is None: logger.debug("- %s.add_qkv_proj", attn.name) else: logger.debug("- %s.add_qkv_proj + %s.up_proj", attn.name, add_ffn.name) modules = modules + add_ffn.up_projs config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( None, modules, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key, low_rank=config.wgts.low_rank), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=module_key), inputs=block_cache[attn.add_k_proj_name].inputs if block_cache else None, eval_inputs=block_cache[block.name].inputs if block_cache else None, eval_module=block, eval_kwargs=block_kwargs, develop_dtype=config.develop_dtype, ) ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(modules) for m in modules: m.in_smooth_cache_key = cache_key # endregion return smooth_cache @torch.inference_mode() def smooth_diffusion_sequential_transformer_block( block: DiffusionTransformerBlockStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, block_kwargs: dict[str, tp.Any] | None = None, ) -> dict[str, torch.Tensor]: assert not block.parallel for attn in block.attn_structs: smooth_cache = smooth_diffusion_attention( attn=attn, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs ) smooth_cache = smooth_diffusion_qkv_proj( attn=attn, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs ) smooth_cache = smooth_diffusion_out_proj( attn=attn, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs ) if block.ffn_struct is not None: smooth_cache = smooth_diffusion_up_proj( pre_ffn_norm=block.pre_ffn_norm, ffn=block.ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache, ) smooth_cache = smooth_diffusion_down_proj( ffn=block.ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache ) if block.add_ffn_struct is not None: smooth_cache = smooth_diffusion_up_proj( pre_ffn_norm=block.pre_add_ffn_norm, ffn=block.add_ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache, ) smooth_cache = smooth_diffusion_down_proj( ffn=block.add_ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache ) return smooth_cache @torch.inference_mode() def smooth_diffusion_parallel_transformer_block( block: DiffusionTransformerBlockStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], block_cache: dict[str, IOTensorsCache] | None = None, block_kwargs: dict[str, tp.Any] | None = None, ) -> dict[str, torch.Tensor]: assert block.parallel assert block.ffn_struct is not None for attn in block.attn_structs: smooth_cache = smooth_diffusion_attention( attn=attn, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs ) if attn.idx == 0: smooth_cache = smooth_diffusion_parallel_qkv_up_proj( block=block, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs, ) else: smooth_cache = smooth_diffusion_qkv_proj( attn=attn, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs ) smooth_cache = smooth_diffusion_out_proj( attn=attn, config=config, smooth_cache=smooth_cache, block_cache=block_cache, block_kwargs=block_kwargs ) smooth_cache = smooth_diffusion_down_proj( ffn=block.ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache ) if block.add_ffn_struct is not None: smooth_cache = smooth_diffusion_down_proj( ffn=block.add_ffn_struct, config=config, smooth_cache=smooth_cache, block_cache=block_cache ) return smooth_cache @torch.inference_mode() def smooth_diffusion_module( module_key: str, module_name: str, module: nn.Linear | nn.Conv2d, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], layer_cache: dict[str, IOTensorsCache] | None = None, ) -> dict[str, torch.Tensor]: assert isinstance(module, (nn.Linear, nn.Conv2d)) logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): logger.debug("- Smoothing Module %s", module_name) tools.logging.Formatter.indent_inc() logger.debug("- %s", module_name) cache_key = module_name channels_dim = -1 if isinstance(module, nn.Linear) else 1 config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts smooth_cache[cache_key] = smooth_linear_modules( None, module, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config_wgts, key=module_key), input_quantizer=Quantizer(config.ipts, channels_dim=channels_dim, key=module_key), inputs=layer_cache[module_name].inputs if layer_cache else None, eval_inputs=layer_cache[module_name].inputs if layer_cache else None, eval_module=module, develop_dtype=config.develop_dtype, ) ActivationSmoother(smooth_cache[cache_key], channels_dim=channels_dim).as_hook().register(module) module.in_smooth_cache_key = cache_key tools.logging.Formatter.indent_dec() else: logger.debug("- Skipping Module %s", module_name) return smooth_cache @torch.inference_mode() def smooth_diffusion_layer( layer: DiffusionBlockStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor], layer_cache: dict[str, IOTensorsCache] | None = None, layer_kwargs: dict[str, tp.Any] | None = None, ) -> None: """Smooth a single diffusion model block. Args: layer (`DiffusionBlockStruct`): The diffusion block. config (`DiffusionQuantConfig`): The quantization configuration. smooth_cache (`dict[str, torch.Tensor]`): The smoothing scales cache. layer_cache (`dict[str, IOTensorsCache]`, *optional*): The layer cache. layer_kwargs (`dict[str, tp.Any]`, *optional*): The layer keyword arguments. """ logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") logger.debug("- Smoothing Diffusion Block %s", layer.name) tools.logging.Formatter.indent_inc() layer_cache = layer_cache or {} layer_kwargs = layer_kwargs or {} # We skip resnets since we currently cannot scale the Swish function visited: set[str] = set() for module_key, module_name, module, parent, _ in layer.named_key_modules(): if isinstance(parent, (DiffusionAttentionStruct, DiffusionFeedForwardStruct)): block = parent.parent assert isinstance(block, DiffusionTransformerBlockStruct) if block.name not in visited: logger.debug("- Smoothing Transformer Block %s", block.name) visited.add(block.name) tools.logging.Formatter.indent_inc() if block.parallel: smooth_cache = smooth_diffusion_parallel_transformer_block( block=block, config=config, smooth_cache=smooth_cache, block_cache=layer_cache, block_kwargs=layer_kwargs, ) else: smooth_cache = smooth_diffusion_sequential_transformer_block( block=block, config=config, smooth_cache=smooth_cache, block_cache=layer_cache, block_kwargs=layer_kwargs, ) tools.logging.Formatter.indent_dec() elif isinstance(module, (nn.Linear, nn.Conv2d)): smooth_cache = smooth_diffusion_module( module_key=module_key, module_name=module_name, module=module, config=config, smooth_cache=smooth_cache, layer_cache=layer_cache, ) else: needs_quant = config.enabled_wgts and config.wgts.is_enabled_for(module_key) needs_quant = needs_quant or (config.enabled_ipts and config.ipts.is_enabled_for(module_key)) if needs_quant and config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(module_key): raise NotImplementedError(f"Module {module_name} is not supported for smoothing") logger.debug("- Skipping Module %s", module_name) tools.logging.Formatter.indent_dec() @torch.inference_mode() def smooth_diffusion( model: nn.Module | DiffusionModelStruct, config: DiffusionQuantConfig, smooth_cache: dict[str, torch.Tensor] | None = None, ) -> dict[str, torch.Tensor]: """Smooth the diffusion model. Args: model (`nn.Module` or `DiffusionModelStruct`): The diffusion model. config (`DiffusionQuantConfig`): The quantization configuration. smooth_cache (`dict[str, torch.Tensor]`, *optional*): The smoothing scales cache. Returns: `dict[str, torch.Tensor]`: The smoothing scales cache. """ if not isinstance(model, DiffusionModelStruct): model = DiffusionModelStruct.construct(model) assert isinstance(model, DiffusionModelStruct) smooth_cache = smooth_cache or {} if config.smooth.enabled_proj: if smooth_cache: assert smooth_cache.get("proj.fuse_when_possible", True) == config.smooth.proj.fuse_when_possible if config.smooth.enabled_attn: if smooth_cache: assert smooth_cache.get("attn.fuse_when_possible", True) == config.smooth.attn.fuse_when_possible if not smooth_cache: with tools.logging.redirect_tqdm(): for _, (layer, layer_cache, layer_kwargs) in tqdm( config.calib.build_loader().iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model, config), skip_pre_modules=True, skip_post_modules=True, ), desc="smoothing", leave=False, total=model.num_blocks, dynamic_ncols=True, ): smooth_diffusion_layer( layer=layer, config=config, smooth_cache=smooth_cache, layer_cache=layer_cache, layer_kwargs=layer_kwargs, ) else: for layer in model.block_structs: smooth_diffusion_layer(layer=layer, config=config, smooth_cache=smooth_cache) if config.smooth.enabled_proj: smooth_cache.setdefault("proj.fuse_when_possible", config.smooth.proj.fuse_when_possible) if config.smooth.enabled_attn: smooth_cache.setdefault("attn.fuse_when_possible", config.smooth.attn.fuse_when_possible) return smooth_cache ================================================ FILE: deepcompressor/app/diffusion/quant/utils.py ================================================ import typing as tp import torch import torch.nn as nn from ..nn.struct import DiffusionAttentionStruct, DiffusionFeedForwardStruct, DiffusionModelStruct from .config import DiffusionQuantConfig __all__ = ["get_needs_inputs_fn", "get_needs_outputs_fn", "wrap_joint_attn"] def wrap_joint_attn(attn: nn.Module, /, *, indexes: int | tuple[int, ...] = 1) -> tp.Callable: if isinstance(indexes, int): def eval(*args, **kwargs) -> torch.Tensor: return attn(*args, **kwargs)[indexes] else: def eval(*args, **kwargs) -> tuple[torch.Tensor, ...]: tensors = attn(*args, **kwargs) result = torch.concat([tensors[i] for i in indexes], dim=-2) return result return eval def get_needs_inputs_fn( model: DiffusionModelStruct, config: DiffusionQuantConfig ) -> tp.Callable[[str, nn.Module], bool]: """Get function that checks whether the module needs to cache inputs. Args: model (`DiffusionModelStruct`): The diffused model. config (`DiffusionQuantConfig`): The quantization configuration. Returns: `Callable[[str, nn.Module], bool]`: The function that checks whether the module needs to cache inputs. """ needs_inputs_names = set() for module_key, module_name, _, parent, field_name in model.named_key_modules(): if (config.enabled_wgts and config.wgts.is_enabled_for(module_key)) or ( config.enabled_ipts and config.ipts.is_enabled_for(module_key) ): if isinstance(parent, DiffusionAttentionStruct): if field_name.endswith("o_proj"): needs_inputs_names.add(module_name) elif field_name in ("q_proj", "k_proj", "v_proj"): needs_inputs_names.add(parent.q_proj_name) if parent.parent.parallel and parent.idx == 0: needs_inputs_names.add(parent.parent.name) else: needs_inputs_names.add(parent.name) elif field_name in ("add_q_proj", "add_k_proj", "add_v_proj"): needs_inputs_names.add(parent.add_k_proj_name) if parent.parent.parallel and parent.idx == 0: needs_inputs_names.add(parent.parent.name) else: needs_inputs_names.add(parent.name) else: raise RuntimeError(f"Unknown field name: {field_name}") elif isinstance(parent, DiffusionFeedForwardStruct): if field_name == "up_proj": needs_inputs_names.update(parent.up_proj_names[: parent.config.num_experts]) elif field_name == "down_proj": needs_inputs_names.update(parent.down_proj_names[: parent.config.num_experts]) else: raise RuntimeError(f"Unknown field name: {field_name}") else: needs_inputs_names.add(module_name) def needs_inputs(name: str, module: nn.Module) -> bool: return name in needs_inputs_names return needs_inputs def get_needs_outputs_fn( model: DiffusionModelStruct, config: DiffusionQuantConfig ) -> tp.Callable[[str, nn.Module], bool]: """Get function that checks whether the module needs to cache outputs. Args: model (`DiffusionModelStruct`): The diffused model. config (`DiffusionQuantConfig`): The quantization configuration. Returns: `Callable[[str, nn.Module], bool]`: The function that checks whether the module needs to cache outputs. """ # TODO: Implement the function that checks whether the module needs to cache outputs. def needs_outputs(name: str, module: nn.Module) -> bool: return False return needs_outputs ================================================ FILE: deepcompressor/app/diffusion/quant/weight.py ================================================ # -*- coding: utf-8 -*- """Diffusion model weight quantization calibration module.""" import gc import typing as tp import torch import torch.nn as nn from tqdm import tqdm from deepcompressor.data.cache import IOTensorsCache from deepcompressor.data.zero import ZeroPointDomain from deepcompressor.nn.patch.lowrank import LowRankBranch from deepcompressor.utils import tools from ..nn.struct import DiffusionAttentionStruct, DiffusionBlockStruct, DiffusionModelStruct, DiffusionModuleStruct from .config import DiffusionQuantConfig from .quantizer import DiffusionActivationQuantizer, DiffusionWeightQuantizer from .utils import get_needs_inputs_fn, wrap_joint_attn __all__ = ["quantize_diffusion_weights", "load_diffusion_weights_state_dict"] @torch.inference_mode() def calibrate_diffusion_block_low_rank_branch( # noqa: C901 layer: DiffusionModuleStruct | DiffusionBlockStruct, config: DiffusionQuantConfig, branch_state_dict: dict[str, dict[str, torch.Tensor]], layer_cache: dict[str, IOTensorsCache] = None, layer_kwargs: dict[str, tp.Any] = None, ) -> None: """Calibrate low-rank branches for a block of a diffusion model. Args: layer (`DiffusionModuleStruct` or `DiffusionBlockStruct`): The block to calibrate. config (`DiffusionQuantConfig`): The quantization configuration. branch_state_dict (`dict[str, dict[str, torch.Tensor]]`): The state dict of the low-rank branches. layer_cache (`dict[str, IOTensorsCache]`, *optional*, defaults to `None`): The cache of the layer. layer_kwargs (`dict[str, tp.Any]`, *optional*, defaults to `None`): The keyword arguments for the layer. """ assert config.wgts.low_rank is not None logger = tools.logging.getLogger(f"{__name__}.WeightQuantSVD") logger.debug("- Calibrating low-rank branches of block %s", layer.name) layer_cache = layer_cache or {} layer_kwargs = layer_kwargs or {} for module_key, module_name, module, parent, field_name in layer.named_key_modules(): modules, module_names = [module], [module_name] if not config.wgts.low_rank.exclusive: if field_name.endswith(("q_proj", "k_proj", "v_proj")): assert isinstance(parent, DiffusionAttentionStruct) if parent.is_self_attn(): if field_name == "q_proj": modules, module_names = parent.qkv_proj, parent.qkv_proj_names else: continue elif parent.is_cross_attn(): if field_name == "add_k_proj": modules.append(parent.add_v_proj) module_names.append(parent.add_v_proj_name) elif field_name != "q_proj": continue else: assert parent.is_joint_attn() if field_name == "q_proj": modules, module_names = parent.qkv_proj, parent.qkv_proj_names elif field_name == "add_k_proj": modules, module_names = parent.add_qkv_proj, parent.add_qkv_proj_names else: continue if field_name.endswith(("q_proj", "k_proj")): assert isinstance(parent, DiffusionAttentionStruct) if parent.parent.parallel and parent.idx == 0: eval_module = parent.parent.module eval_name = parent.parent.name eval_kwargs = layer_kwargs else: eval_module = parent.module eval_name = parent.name eval_kwargs = parent.filter_kwargs(layer_kwargs) if parent.is_joint_attn() and "add_" in field_name: eval_module = wrap_joint_attn(eval_module, indexes=1) else: eval_module, eval_name, eval_kwargs = module, module_name, None if isinstance(modules[0], nn.Linear): assert all(isinstance(m, nn.Linear) for m in modules) channels_dim = -1 else: assert all(isinstance(m, nn.Conv2d) for m in modules) channels_dim = 1 config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts quantizer = DiffusionWeightQuantizer(config_wgts, develop_dtype=config.develop_dtype, key=module_key) if quantizer.is_enabled() and quantizer.is_enabled_low_rank(): if isinstance(module, nn.Conv2d): assert module.weight.shape[2:].numel() else: assert isinstance(module, nn.Linear) if module_name not in branch_state_dict: logger.debug("- Calibrating low-rank branch for %s", ", ".join(module_names)) tools.logging.Formatter.indent_inc() branch_state_dict[module_name] = quantizer.calibrate_low_rank( input_quantizer=DiffusionActivationQuantizer( config.ipts, key=module_key, channels_dim=channels_dim ), modules=modules, inputs=layer_cache[module_name].inputs if layer_cache else None, eval_inputs=layer_cache[eval_name].inputs if layer_cache else None, eval_module=eval_module, eval_kwargs=eval_kwargs, ).state_dict() tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() shared_branch = LowRankBranch( in_features=module.weight.shape[1], out_features=sum(m.weight.shape[0] for m in modules), rank=config.wgts.low_rank.rank, ) shared_branch.to(device=module.weight.device, dtype=module.weight.dtype) shared_branch.load_state_dict(branch_state_dict[module_name]) logger.debug(" + Adding low-rank branches to %s", ", ".join(module_names)) if len(modules) > 1: oc_idx = 0 for module in modules: branch = LowRankBranch( in_features=module.weight.shape[1], out_features=module.weight.shape[0], rank=config.wgts.low_rank.rank, ) branch.a = shared_branch.a branch.b.to(dtype=module.weight.dtype, device=module.weight.device) branch.b.weight.copy_(shared_branch.b.weight[oc_idx : oc_idx + module.weight.shape[0]]) oc_idx += module.weight.shape[0] module.weight.data.sub_(branch.get_effective_weight().view(module.weight.data.shape)) branch.as_hook().register(module) else: module.weight.data.sub_(shared_branch.get_effective_weight().view(module.weight.data.shape)) shared_branch.as_hook().register(module) del shared_branch gc.collect() torch.cuda.empty_cache() @torch.inference_mode() def update_diffusion_block_weight_quantizer_state_dict( layer: DiffusionModuleStruct | DiffusionBlockStruct, config: DiffusionQuantConfig, quantizer_state_dict: dict[str, dict[str, torch.Tensor | float | None]], layer_cache: dict[str, IOTensorsCache], layer_kwargs: dict[str, tp.Any], ): """Update the state dict of the weight quantizers for a block of a diffusion model. Args: layer (`DiffusionModuleStruct` or `DiffusionBlockStruct`): The block to update. config (`DiffusionQuantConfig`): The quantization configuration. quantizer_state_dict (`dict[str, dict[str, torch.Tensor | float | None]]`): The state dict of the weight quantizers. layer_cache (`dict[str, IOTensorsCache]`): The cache of the layer. layer_kwargs (`dict[str, tp.Any]`): The keyword arguments for the layer. """ logger = tools.logging.getLogger(f"{__name__}.WeightQuant") logger.debug("- Calibrating weights: block %s", layer.name) tools.logging.Formatter.indent_inc() for module_key, module_name, module, parent, field_name in layer.named_key_modules(): if field_name.endswith(("q_proj", "k_proj")): assert isinstance(parent, DiffusionAttentionStruct) if parent.parent.parallel and parent.idx == 0: eval_module = parent.parent.module eval_name = parent.parent.name eval_kwargs = layer_kwargs else: eval_module = parent.module eval_name = parent.name eval_kwargs = parent.filter_kwargs(layer_kwargs) if parent.is_joint_attn() and "add_" in field_name: eval_module = wrap_joint_attn(eval_module, indexes=1) else: eval_module, eval_name, eval_kwargs = module, module_name, None config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts quantizer = DiffusionWeightQuantizer(config_wgts, develop_dtype=config.develop_dtype, key=module_key) if quantizer.is_enabled(): if module_name not in quantizer_state_dict: logger.debug("- Calibrating %s.weight quantizer", module_name) quantizer.calibrate_dynamic_range( module=module, inputs=layer_cache[module_name].inputs if layer_cache else None, eval_inputs=layer_cache[eval_name].inputs if layer_cache else None, eval_module=eval_module, eval_kwargs=eval_kwargs, ) quantizer_state_dict[module_name] = quantizer.state_dict() gc.collect() torch.cuda.empty_cache() else: logger.debug("- Loading %s.weight quantizer", module_name) else: logger.debug("- Skipping %s.weight", module_name) if module_name in quantizer_state_dict: quantizer_state_dict.pop(module_name) tools.logging.Formatter.indent_dec() @torch.inference_mode() def quantize_diffusion_block_weights( layer: DiffusionModuleStruct | DiffusionBlockStruct, config: DiffusionQuantConfig, quantizer_state_dict: dict[str, dict[str, torch.Tensor | float | None]], layer_cache: dict[str, IOTensorsCache] = None, return_with_scale_state_dict: bool = False, ) -> dict[str, torch.Tensor | float | None]: """Quantize the weights of a block of a diffusion model. Args: layer (`DiffusionModuleStruct` or `DiffusionBlockStruct`): The block to quantize. config (`DiffusionQuantConfig`): The quantization configuration. quantizer_state_dict (`dict[str, dict[str, torch.Tensor | float | None]]`): The state dict of the weight quantizers. layer_cache (`dict[str, IOTensorsCache]`, *optional*, defaults to `None`): The cache of the layer. return_with_scale_state_dict (`bool`, *optional*, defaults to `False`): Whether to return the scale state dict. Returns: `dict[str, torch.Tensor | float | None]`: The scale state dict. """ logger = tools.logging.getLogger(f"{__name__}.WeightQuant") logger.debug("- Quantizing weights: block %s", layer.name) layer_cache = layer_cache or {} scale_state_dict: dict[str, torch.Tensor | float | None] = {} tools.logging.Formatter.indent_inc() for module_key, module_name, module, _, _ in layer.named_key_modules(): if module_name in quantizer_state_dict: param_name = f"{module_name}.weight" logger.debug("- Quantizing %s", param_name) config_wgts = config.wgts if config.enabled_extra_wgts and config.extra_wgts.is_enabled_for(module_key): config_wgts = config.extra_wgts logger.debug(" + quant_dtype: %s", str(config_wgts.dtype)) logger.debug(" + group_shape: %s", str(config_wgts.group_shapes)) logger.debug(" + scale_dtype: %s", str(config_wgts.scale_dtypes)) quantizer = DiffusionWeightQuantizer(config_wgts, develop_dtype=config.develop_dtype, key=module_key) quantizer.load_state_dict(quantizer_state_dict[module_name], device=module.weight.device) result = quantizer.quantize( module.weight.data, inputs=layer_cache[module_name].inputs.front() if layer_cache else None, return_with_dequant=True, return_with_quant=return_with_scale_state_dict, ) if ( config.wgts.enabled_low_rank and config.wgts.low_rank.is_enabled_for(module_key) and config.wgts.low_rank.compensate and config.wgts.low_rank.num_iters <= 1 ): logger.debug("- Adding compensate low-rank branch to %s (side)", module_name) LowRankBranch( in_features=module.weight.shape[1], out_features=module.weight.shape[0], rank=config.wgts.low_rank.rank, weight=module.weight.data - result.data, ).as_hook().register(module) module.weight.data = result.data if return_with_scale_state_dict: scale_state_dict.update(result.scale.state_dict(f"{param_name}.scale")) zero_name = "scaled_zero" if config.wgts.zero_point is ZeroPointDomain.PostScale else "zero" if isinstance(result.zero, torch.Tensor): scale_state_dict[f"{param_name}.{zero_name}"] = result.zero.to("cpu") else: scale_state_dict[f"{param_name}.{zero_name}"] = result.zero del result gc.collect() torch.cuda.empty_cache() tools.logging.Formatter.indent_dec() return scale_state_dict @torch.inference_mode() def quantize_diffusion_weights( model: nn.Module | DiffusionModelStruct, config: DiffusionQuantConfig, quantizer_state_dict: dict[str, dict[str, torch.Tensor | float | None]] | None = None, branch_state_dict: dict[str, dict[str, torch.Tensor]] | None = None, return_with_scale_state_dict: bool = False, ) -> tuple[ dict[str, dict[str, torch.Tensor | float | None]], dict[str, dict[str, torch.Tensor]], dict[str, torch.Tensor | float | None], ]: """Quantize the weights of a diffusion model. Args: model (`nn.Module` or `DiffusionModelStruct`): The diffusion model to quantize. config (`DiffusionQuantConfig`): The quantization configuration. quantizer_state_dict (`dict[str, dict[str, torch.Tensor | float | None]]`, *optional*, defaults to `None`): The state dict of the weight quantizers. branch_state_dict (`dict[str, dict[str, torch.Tensor]]`, *optional*, defaults to `None`): The state dict of the low-rank branches. return_with_scale_state_dict (`bool`, *optional*, defaults to `False`): Whether to return the scale state dict. Returns: `tuple[ dict[str, dict[str, torch.Tensor | float | None]], dict[str, dict[str, torch.Tensor]], dict[str, torch.Tensor | float | None] ]`: The state dict of the weight quantizers, the state dict of the low-rank branches, and the scale state dict. """ logger = tools.logging.getLogger(f"{__name__}.WeightQuant") if not isinstance(model, DiffusionModelStruct): model = DiffusionModelStruct.construct(model) assert isinstance(model, DiffusionModelStruct) quantizer_state_dict = quantizer_state_dict or {} branch_state_dict = branch_state_dict or {} if config.wgts.enabled_low_rank and (not config.wgts.low_rank.compensate or config.wgts.low_rank.num_iters > 1): logger.info("* Adding low-rank branches to weights") tools.logging.Formatter.indent_inc() with tools.logging.redirect_tqdm(): if branch_state_dict: for _, layer in tqdm( model.get_named_layers(skip_pre_modules=True, skip_post_modules=True).items(), desc="adding low-rank branches", leave=False, dynamic_ncols=True, ): calibrate_diffusion_block_low_rank_branch( layer=layer, config=config, branch_state_dict=branch_state_dict ) else: for _, (layer, layer_cache, layer_kwargs) in tqdm( config.calib.build_loader().iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model, config), skip_pre_modules=True, skip_post_modules=True, ), desc="calibrating low-rank branches", leave=False, total=model.num_blocks, dynamic_ncols=True, ): calibrate_diffusion_block_low_rank_branch( layer=layer, config=config, branch_state_dict=branch_state_dict, layer_cache=layer_cache, layer_kwargs=layer_kwargs, ) tools.logging.Formatter.indent_dec() skip_pre_modules = all(key in config.wgts.skips for key in model.get_prev_module_keys()) skip_post_modules = all(key in config.wgts.skips for key in model.get_post_module_keys()) with tools.logging.redirect_tqdm(): if not quantizer_state_dict: if config.wgts.needs_calib_data: iterable = config.calib.build_loader().iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model, config), skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules, ) else: iterable = map( # noqa: C417 lambda kv: (kv[0], (kv[1], {}, {})), model.get_named_layers( skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules ).items(), ) for _, (layer, layer_cache, layer_kwargs) in tqdm( iterable, desc="calibrating weight quantizers", leave=False, total=model.num_blocks + int(not skip_post_modules) + int(not skip_pre_modules) * 3, dynamic_ncols=True, ): update_diffusion_block_weight_quantizer_state_dict( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, layer_cache=layer_cache, layer_kwargs=layer_kwargs, ) scale_state_dict: dict[str, torch.Tensor | float | None] = {} if config.wgts.enabled_gptq: iterable = config.calib.build_loader().iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model, config), skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules, ) else: iterable = map( # noqa: C417 lambda kv: (kv[0], (kv[1], {}, {})), model.get_named_layers(skip_pre_modules=skip_pre_modules, skip_post_modules=skip_post_modules).items(), ) for _, (layer, layer_cache, _) in tqdm( iterable, desc="quantizing weights", leave=False, total=model.num_blocks + int(not skip_post_modules) + int(not skip_pre_modules) * 3, dynamic_ncols=True, ): layer_scale_state_dict = quantize_diffusion_block_weights( layer=layer, config=config, layer_cache=layer_cache, quantizer_state_dict=quantizer_state_dict, return_with_scale_state_dict=return_with_scale_state_dict, ) scale_state_dict.update(layer_scale_state_dict) return quantizer_state_dict, branch_state_dict, scale_state_dict @torch.inference_mode() def load_diffusion_weights_state_dict( model: nn.Module | DiffusionModelStruct, config: DiffusionQuantConfig, state_dict: dict[str, torch.Tensor], branch_state_dict: dict[str, dict[str, torch.Tensor]] | None = None, ) -> None: """Load the state dict of the weights of a diffusion model. Args: model (`nn.Module` or `DiffusionModelStruct`): The diffusion model to load the weights. config (`DiffusionQuantConfig`): The quantization configuration. state_dict (`dict[str, torch.Tensor]`): The state dict of the weights. branch_state_dict (`dict[str, dict[str, torch.Tensor]]`): The state dict of the low-rank branches. """ if not isinstance(model, DiffusionModelStruct): model = DiffusionModelStruct.construct(model) assert isinstance(model, DiffusionModelStruct) if config.enabled_wgts and config.wgts.enabled_low_rank: assert branch_state_dict is not None for _, layer in tqdm( model.get_named_layers(skip_pre_modules=True, skip_post_modules=True).items(), desc="adding low-rank branches", leave=False, dynamic_ncols=True, ): calibrate_diffusion_block_low_rank_branch(layer=layer, config=config, branch_state_dict=branch_state_dict) model.module.load_state_dict(state_dict) gc.collect() torch.cuda.empty_cache() ================================================ FILE: deepcompressor/app/diffusion/utils.py ================================================ import os import random import numpy as np import torch from PIL import Image from deepcompressor.utils.common import hash_str_to_int __all__ = ["get_control"] def update_mask(mask: np.ndarray, x: int, y: int, radius: int | float): mask = mask.copy() H, W = mask.shape for i in range(H): for j in range(W): if (j - x) ** 2 + (i - y) ** 2 <= radius**2: mask[i, j] = True return mask def generate_mask( masked_ratio_range: tuple[int, int], size: int | tuple[int, int], seed: int | None = None, eps=1e-2 ) -> np.ndarray: if seed is not None: random.seed(seed) masked_ratio = random.randint(masked_ratio_range[0], masked_ratio_range[1]) / 100 if isinstance(size, int): size = (size, size) assert len(size) == 2 height, width = size mask = np.zeros((height, width), dtype=bool) while True: radius = random.randint(16, min(height, width) // 2) x = random.randint(0, width - 1) y = random.randint(0, height - 1) new_mask = update_mask(mask, x, y, radius) if new_mask.sum() / (height * width) <= masked_ratio + eps: mask = new_mask if mask.sum() / (height * width) >= masked_ratio - eps: break return mask def center_crop_and_resize(image: Image.Image, target_size: int | tuple[int, int]) -> Image.Image: if isinstance(target_size, int): target_size = (target_size, target_size) else: assert len(target_size) == 2 target_width, target_height = target_size width, height = image.size if width / height > target_width / target_height: new_width = height * target_width / target_height left = round((width - new_width) / 2) right = round(left + new_width) image = image.crop((left, 0, right, height)) elif width / height < width / height: new_height = width * target_height / target_width top = round((height - new_height) / 2) bottom = round(top + new_height) image = image.crop((0, top, width, bottom)) width, height = image.size if width != target_width or height != target_height: image = image.resize((target_width, target_height), Image.Resampling.BICUBIC) return image def get_control( # noqa: C901 task: str, images: Image.Image | list[Image.Image], names: str | list[str] | None = None, data_root: str | None = None, device: str | torch.device = "cuda", **kwargs, ) -> Image.Image | list[Image.Image] | tuple[Image.Image, Image.Image] | tuple[list[Image.Image], list[Image.Image]]: size = kwargs.get("size", 1024) if isinstance(size, int): size = (size, size) assert len(size) == 2 image_batch = [images] if isinstance(images, Image.Image) else images if isinstance(names, str): names = [names] if task == "canny-to-image": processor = kwargs.get("processor", None) control_images = [] for i, image in enumerate(image_batch): if data_root is not None and names is not None: data_path = os.path.join(data_root, "canny_images", f"{names[i]}.png") if os.path.exists(data_path): control_images.append(Image.open(data_path)) continue if processor is None: from controlnet_aux import CannyDetector processor = CannyDetector() image = center_crop_and_resize(image, size) control_image = processor( image, low_threshold=50, high_threshold=200, detect_resolution=max(size), image_resolution=max(size) ) control_images.append(control_image) if isinstance(images, Image.Image): return control_images[0] return control_images elif task == "depth-to-image": processor = kwargs.get("processor", None) control_images = [] for i, image in enumerate(image_batch): if data_root is not None and names is not None: data_path = os.path.join(data_root, "depth_images", f"{names[i]}.png") if os.path.exists(data_path): control_images.append(Image.open(data_path)) continue if processor is None: from image_gen_aux import DepthPreprocessor processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf").to(device) image = center_crop_and_resize(image, size) control_image = processor(image.convert("RGB"))[0].convert("RGB") control_images.append(control_image) if isinstance(images, Image.Image): return control_images[0] return control_images elif task == "inpainting": control_images, mask_images = [], [] for i, image in enumerate(image_batch): name = None if names is None else names[i] if data_root is not None and name is not None: cropped_image_path = os.path.join(data_root, "cropped_images", f"{name}.png") mask_path = os.path.join(data_root, "mask_images", f"{name}.png") if os.path.exists(cropped_image_path) and os.path.exists(mask_path): control_images.append(Image.open(cropped_image_path).convert("RGB")) mask_images.append(Image.open(mask_path)) continue image = center_crop_and_resize(image, size) control_images.append(image.convert("RGB")) if names is not None: seed = hash_str_to_int(names[i]) else: seed = None mask = generate_mask((5, 60), size, seed=seed) mask_image = Image.fromarray(mask.astype(np.uint8) * 255) mask_images.append(mask_image) if isinstance(images, Image.Image): return control_images[0], mask_images[0] return control_images, mask_images else: raise ValueError(f"Unsupported task: {task}") ================================================ FILE: deepcompressor/app/llm/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/app/llm/cache/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/app/llm/cache/config.py ================================================ # -*- coding: utf-8 -*- """LLM quantization cache configuration.""" from dataclasses import dataclass, field from omniconfig import configclass from deepcompressor.utils.config.path import BasePathConfig __all__ = ["LlmQuantCacheConfig", "LlmCacheConfig"] @configclass @dataclass class LlmQuantCacheConfig(BasePathConfig): """Large language model quantization cache path. Args: rotation (`str`, *optional*, default=`""`): The rotation matrix cache path. reorder (`str`, *optional*, default=`""`): The reorder channel indexes cache path. smooth (`str`, *optional*, default=`""`): The smoothing scales cache path. wgts (`str`, *optional*, default=`""`): The weight quantizers state dict cache path. acts (`str`, *optional*, default=`""`): The activation quantizers state dict cache path. """ rotation: str = "" reorder: str = "" smooth: str = "" wgts: str = "" acts: str = "" @configclass @dataclass class LlmCacheConfig: """LLM quantization cache configuration. Attributes: root (`str`, *optional*, default=`""`): The root directory path for the cache. dirpath (`LlmQuantCacheConfig`, *optional*, default=`LlmQuantCacheConfig()`): The directory paths for the cache. path (`LlmQuantCacheConfig`, *optional*, default=`LlmQuantCacheConfig()`): The file paths for the cache. """ root: str = field(default="") dirpath: LlmQuantCacheConfig = field(init=False, default_factory=LlmQuantCacheConfig) path: LlmQuantCacheConfig = field(default_factory=LlmQuantCacheConfig) ================================================ FILE: deepcompressor/app/llm/config.py ================================================ # -*- coding: utf-8 -*- """Configurations for evaluating a large language model.""" import os import random from dataclasses import dataclass, field import numpy as np import omniconfig import torch from omniconfig import ConfigParser, configclass from deepcompressor.data.utils import ScaleUtils from deepcompressor.utils.config.output import OutputConfig from .cache.config import LlmCacheConfig, LlmQuantCacheConfig from .eval.config import LlmEvalConfig from .model.config import LlmModelConfig from .quant.config import LlmQuantConfig __all__ = [ "LlmPtqRunConfig", "LlmCacheConfig", "LlmQuantCacheConfig", "LlmEvalConfig", "LlmModelConfig", "LlmQuantConfig", ] @configclass @dataclass class LlmPtqRunConfig: """Top-level config of post-training quantization for a large language model. Args: cache (`LlmCacheConfig`): Large language model quantization cache path configuration. output (`OutputConfig`): Output directory configuration. model (`LlmModelConfig`): Large language model configuration. eval (`LlmEvalConfig`): Large language model evaluation configuration. quant (`LlmQuantConfig`): Large language model quantization configuration. seed (`int`, *optional*, defaults to `12345`): Random seed. skip_eval (`bool`, *optional*, defaults to `False`): Whether to skip evaluation. load_model (`str`, *optional*, defaults to `""`): Directory path to load the model checkpoint. save_model (`str`, *optional*, defaults to `""`): Directory path to save the model checkpoint. copy_on_save (`bool`, *optional*, defaults to `False`): Whether to copy the quantization cache on save. """ cache: LlmCacheConfig output: OutputConfig model: LlmModelConfig eval: LlmEvalConfig quant: LlmQuantConfig = field(metadata={omniconfig.ARGPARSE_KWARGS: {"prefix": ""}}) seed: int = 12345 skip_eval: bool = False load_from: str = "" save_model: str = "" copy_on_save: bool = False def __post_init__(self): # noqa: C901 # region set scale default dtype if self.quant.enabled_wgts: self.quant.wgts.scale_dtypes = tuple( ScaleUtils.infer_scale_dtypes(self.quant.wgts.scale_dtypes, default_dtype=self.model.dtype) ) if self.quant.enabled_ipts: self.quant.ipts.scale_dtypes = tuple( ScaleUtils.infer_scale_dtypes(self.quant.ipts.scale_dtypes, default_dtype=self.model.dtype) ) if self.quant.enabled_opts: self.quant.opts.scale_dtypes = tuple( ScaleUtils.infer_scale_dtypes(self.quant.opts.scale_dtypes, default_dtype=self.model.dtype) ) # endregion # region set num_gpus and batch_size for auto parallelism of large models self.eval.num_gpus = min(torch.cuda.device_count(), self.eval.num_gpus) if self.model.size < 50: self.eval.batch_size = min(8, self.eval.batch_size) elif self.model.size < 100: self.eval.batch_size = min(4, self.eval.batch_size) else: self.eval.batch_size = min(1, self.eval.batch_size) # endregion if self.quant.is_enabled(): if self.cache.path.is_all_empty(): self.cache.dirpath = self.quant.generate_cache_dirpath( root=self.cache.root, seed=self.seed, default_dtype=self.model.dtype ) self.cache.path = self.cache.dirpath.clone().add_children(f"{self.model.name}.pt") else: self.cache.dirpath = self.cache.path.clone().to_dirpath() if self.output.dirname == "default": self.output.dirname = self.quant.generate_default_dirname() self.output.dirpath = os.path.join( self.output.root, "llm", self.model.family, self.model.name, *self.quant.generate_dirnames(default_dtype=self.model.dtype)[:-1], self.quant.generate_calib_dirname(), self.output.dirname, ) random.seed(self.seed) torch.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) np.random.seed(self.seed) @classmethod def get_parser(cls) -> ConfigParser: """Get a parser for evaluating a large language model. Returns: `ConfigParser`: A parser for evaluating a large language model. """ parser = ConfigParser("Evaluate a large language model") parser.add_config(cls) return parser ================================================ FILE: deepcompressor/app/llm/eval/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/app/llm/eval/base.py ================================================ # -*- coding: utf-8 -*- """Language model evaluator base.""" from abc import ABC, abstractmethod from transformers import PreTrainedModel, PreTrainedTokenizer __all__ = ["LlmEvaluatorBase"] class LlmEvaluatorBase(ABC): def __init__(self, model: PreTrainedModel, tokenizer: PreTrainedTokenizer): self.model, self.tokenizer = model, tokenizer @abstractmethod def filter_tasks(self, tasks: list[str]) -> list[str]: """Filter the tasks to only include supported tasks.""" ... @abstractmethod def evaluate(self, tasks: list[str], **kwargs) -> dict[str, dict[str, dict[str, float]]]: """Evaluate the model on the given tasks.""" ... ================================================ FILE: deepcompressor/app/llm/eval/config.py ================================================ # -*- coding: utf-8 -*- """Language model evaluation config.""" import random import typing as tp from dataclasses import dataclass, field import numpy as np import omniconfig import torch from omniconfig import configclass from transformers import PreTrainedModel, PreTrainedTokenizer from deepcompressor.utils import tools from .custom import LlmCustomEvaluator from .lm_eval import LmevalEvaluator from .longbench import LongbenchEvaluator __all__ = ["LlmEvalConfig"] @configclass @dataclass class LlmEvalConfig: """Large language model evaluation configuration. Attributes: num_gpus (`int`, *optional*, defaults to `1`): The number of GPUs to use. batch_size (`int`, *optional*, defaults to `1`): The batch size used for inference. tasks (`list[str]`, *optional*, defaults to `["zero-shot"]`): Task names, e.g. wikitext, hellaswag, piqa, winogrande. max_seq_length (`int`, *optional*, defaults to `-4096`): Maximum sequence length. If negative, sequence lengths smaller than or equal to the absolute value are used. evaluators (`list[str]`, *optional*, defaults to `["gptq"]`): Evaluators names. num_shot (`int`, *optional*, defaults to `None`): The number of shots for few-shot evaluation. fewshot_as_multiturn (`bool`, *optional*, defaults to `False`): Whether to treat few-shot evaluation as multi-turn. apply_chat_template (`bool`, *optional*, defaults to `False`): Whether to apply chat template for evaluation. """ num_gpus: int = field(default=1, metadata={omniconfig.ARGPARSE_ARGS: ("--num-gpus", "-n")}) batch_size: int = 1 tasks: list[str] = field( default_factory=lambda: ["zero-shot"], metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": str}}, ) max_seq_length: int = -4096 evaluators: list[str] = field( default_factory=lambda: ["gptq"], metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": str}} ) num_shot: int | None = None fewshot_as_multiturn: bool = False apply_chat_template: bool = False def __post_init__(self): if "zero-shot" in self.tasks: self.tasks.remove("zero-shot") self.tasks.extend(("wikitext", "hellaswag", "piqa", "winogrande", "arc_easy", "arc_challenge")) self.tasks = sorted({tast.lower() for tast in self.tasks}) self.evaluators = sorted({evaluator.lower() for evaluator in self.evaluators}) for evaluator in self.evaluators: assert evaluator in ("lm_eval", "gptq", "longbench"), f"Invalid evaluator: {evaluator}" if len(self.evaluators) == 1 and self.evaluators[0] == "gpq": self.tasks = [task for task in self.tasks if task.startswith(("wikitext", "pile", "gsm8k"))] assert len(self.tasks) > 0, "No valid tasks for GPTQ evaluation" def evaluate( self, model: PreTrainedModel, /, tokenizer: PreTrainedTokenizer, model_name: str, eos_token_ids: tp.Sequence[int] = (), output_dirpath: str = "", ) -> dict[str, dict[int, dict[str, dict[tp.Any, dict[str, tp.Any]]]]]: """Evaluate the model. Args: model (`PreTrainedModel`): The model. tokenizer (`PreTrainedTokenizer`): The tokenizer. model_name (`str`): The name of the model. eos_token_ids (`Sequence[int]`, *optional*, defaults to `()`): The EOS token IDs. Returns: `dict[str, dict[int, dict[str, dict[tp.Any, dict[str, tp.Any]]]]]`: The evaluation results. - The first key is the evaluator name. - The second key is the maximum sequence length. - The third key is the content name, e.g., "results", "versions", "config". - The fourth key is the task name for "results". """ logger = tools.logging.getLogger(f"{__name__}.LlmEval") tools.logging.Formatter.indent_inc() tools.logging.Formatter.indent_dec() lm_max_seq_length = get_max_seq_length(model, tokenizer) max_seq_lengths = {2048, 4096, lm_max_seq_length} if self.max_seq_length < 0: if self.max_seq_length == -1: max_seq_length = lm_max_seq_length else: max_seq_length = min(lm_max_seq_length, -self.max_seq_length) max_seq_lengths = [length for length in sorted(max_seq_lengths) if length <= max_seq_length] elif self.max_seq_length == 0: max_seq_lengths = [lm_max_seq_length] else: max_seq_lengths = [self.max_seq_length] results = {} for evaluator_name in self.evaluators: logger.info(f"- Evaluator: {evaluator_name}") tasks = list(self.tasks) if evaluator_name == "gptq": evaluator = LlmCustomEvaluator(model=model, tokenizer=tokenizer) elif evaluator_name == "lm_eval": evaluator = LmevalEvaluator(model=model, tokenizer=tokenizer, batch_size=self.batch_size) elif evaluator_name == "longbench": evaluator = LongbenchEvaluator( model=model, tokenizer=tokenizer, model_name=model_name, eos_token_ids=eos_token_ids, output_dirpath=output_dirpath, ) else: raise ValueError(f"Invalid evaluator: {evaluator_name}") logger.info(f"- Tasks: {tasks}") logger.info(f"- Batch_size: {self.batch_size}") rsts = {} tools.logging.Formatter.indent_inc() for max_seq_length in max_seq_lengths: logger.info(f"+ Max_seq_length: {max_seq_length}") tools.logging.Formatter.indent_inc() tools.logging.Formatter.indent_inc() # set seed torch.manual_seed(42) torch.cuda.manual_seed(42) torch.cuda.manual_seed_all(42) np.random.seed(42) random.seed(42) # evaluate rst = evaluator.evaluate( tasks=tasks, max_length=max_seq_length, num_shot=self.num_shot, fewshot_as_multiturn=self.fewshot_as_multiturn, apply_chat_template=self.apply_chat_template, ) rst["model"] = model_name tools.logging.Formatter.indent_dec() logger.info("- Results:") tools.logging.Formatter.indent_inc() tools.logging.info(self.make_table(rst), logger=logger) tools.logging.Formatter.indent_dec() rsts[max_seq_length] = rst tools.logging.Formatter.indent_dec() tools.logging.Formatter.indent_dec() results[evaluator_name] = rsts return results @staticmethod def make_table(rst: dict[str, dict[tp.Any, dict[str, tp.Any]]]) -> str: """Generate table of results. Args: results (`dict[str, dict[tp.Any, dict[str, tp.Any]]]`): The evaluation results. Returns: `str`: The string representation of the results in a table. """ from pytablewriter import MarkdownTableWriter md_writer = MarkdownTableWriter() md_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"] values = [] for k, dic in rst["results"].items(): version = rst["versions"][k] for m, v in dic.items(): if "_stderr" in m: continue mse = "_stderr,".join(m.split(",")) appended = False if mse in dic: se = dic[mse] if isinstance(se, (int, float)): values.append([k, version, m, "%.4f" % v, "±", "%.4f" % se]) appended = True if not appended and isinstance(v, (int, float)): values.append([k, version, m, "%.4f" % v, "", ""]) k = "" version = "" md_writer.value_matrix = values return md_writer.dumps() def get_max_seq_length(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, default_seq_length: int = 2048) -> int: seqlen_config_attrs = ("n_positions", "max_position_embeddings", "n_ctx") for attr in seqlen_config_attrs: if hasattr(model.config, attr): return getattr(model.config, attr) if hasattr(tokenizer, "model_max_length"): if tokenizer.model_max_length == 1000000000000000019884624838656: return default_seq_length return tokenizer.model_max_length return default_seq_length ================================================ FILE: deepcompressor/app/llm/eval/custom.py ================================================ # -*- coding: utf-8 -*- """Language model customized evaluator.""" import math import torch import torch.nn as nn from datasets import load_dataset from tqdm import tqdm from transformers import PreTrainedModel, PreTrainedTokenizer from .base import LlmEvaluatorBase __all__ = ["LlmCustomEvaluator"] class LlmCustomEvaluator(LlmEvaluatorBase): def filter_tasks(self, tasks: list[str]) -> list[str]: """Filter the tasks to only include supported tasks.""" return [task for task in tasks if task.startswith(("wikitext", "pile"))] def evaluate( self, tasks: list[str], max_length: int | None = None, **kwargs ) -> dict[str, dict[str, dict[str, float]]]: """Evaluate the model on the given tasks. Args: tasks (`list[str]`): List of tasks to evaluate on. max_length (`int`, optional, defaults to `None`): Maximum length for the model. Returns: dict[str, dict[str, dict[str, float]]]: Evaluation results `{"results": {"task": {"metric": score}}}`. """ result = {"results": {}, "versions": {}} for task in tasks: result["results"][task] = { "word_perplexity": _eval_ppl_with_gptq_evaluator( self.model, self.tokenizer, task=task, seq_length=max_length ) } result["versions"][task] = 1 return result def _eval_ppl_with_gptq_evaluator( model: PreTrainedModel, /, tokenizer: PreTrainedTokenizer, task: str, seq_length: int = 2048, max_num_samples: int = -1, ) -> float: """Evaluate the perplexity of a model on a task using GPTQ style evaluation. Args: model (`PreTrainedModel`): The model. tokenizer (`PreTrainedTokenizer`): The tokenizer. task (`str`): The task name. seq_length (`int`, *optional*, defaults to `2048`): The sequence length. max_num_samples (`int`, *optional*, defaults to `-1`): The maximum number of samples to evaluate. Returns: float: The perplexity. """ assert seq_length > 0, "seq_length must be positive" if task.startswith("wikitext"): test_dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") test_dataset = tokenizer("\n\n".join(test_dataset["text"]), return_tensors="pt") elif task.startswith("pile"): test_dataset = load_dataset("pile", task, split="test") test_dataset = tokenizer("\n\n".join(test_dataset["text"]), return_tensors="pt") else: raise ValueError(f"Invalid task: {task}") test_dataset = test_dataset.input_ids.to(model.device) num_samples = test_dataset.numel() // seq_length if max_num_samples > 0: num_samples = min(num_samples, max_num_samples) model = model.eval() nlls = [] for i in tqdm(range(num_samples), desc=f"evaluating on {task} with seq_length {seq_length}", dynamic_ncols=True): batch = test_dataset[:, (i * seq_length) : ((i + 1) * seq_length)] with torch.inference_mode(): shift_logits = model(batch.to(model.device)).logits[:, :-1, :].contiguous().float() shift_labels = batch[:, 1:] loss = nn.CrossEntropyLoss()(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) neg_log_likelihood = loss.float() * seq_length nlls.append(neg_log_likelihood) return math.exp(sum(nlls) / (num_samples * seq_length)) ================================================ FILE: deepcompressor/app/llm/eval/lm_eval.py ================================================ # -*- coding: utf-8 -*- """Language model evaluator using lm_eval.""" import lm_eval import lm_eval.models from transformers import PreTrainedModel, PreTrainedTokenizer from .base import LlmEvaluatorBase __all__ = ["LmevalEvaluator"] class LmevalEvaluator(LlmEvaluatorBase): def __init__(self, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, batch_size: int = 1): super().__init__(model=model, tokenizer=tokenizer) self.lm = lm_eval.models.huggingface.HFLM(pretrained=model, tokenizer=tokenizer, batch_size=batch_size) def filter_tasks(self, tasks: list[str]) -> list[str]: """Filter the tasks to only include supported tasks.""" return tasks def evaluate( self, tasks: list[str], max_length: int | None = None, num_shot: int | None = None, fewshot_as_multiturn: bool = False, apply_chat_template: bool = False, **kwargs, ) -> dict[str, dict[str, dict[str, float]]]: """Evaluate the model on the given tasks. Args: tasks (`list[str]`): List of tasks to evaluate on. max_length (`int`, optional, defaults to `None`): Maximum length for the model. Returns: dict[str, dict[str, dict[str, float]]]: Evaluation results `{"results": {"task": {"metric": score}}}`. """ self.lm._max_length = max_length result = lm_eval.evaluator.simple_evaluate( model=self.lm, tasks=tasks, verbosity="ERROR", num_fewshot=num_shot, fewshot_as_multiturn=fewshot_as_multiturn, apply_chat_template=apply_chat_template, **kwargs, ) self.lm._max_length = None result.pop("samples", None) result.pop("config", None) return result ================================================ FILE: deepcompressor/app/llm/eval/longbench/__init__.py ================================================ from .eval import LongbenchEvaluator, LongbenchScorer ================================================ FILE: deepcompressor/app/llm/eval/longbench/eval.py ================================================ # -*- coding: utf-8 -*- """Language model evaluator for LongBench.""" import json import os import typing as tp import numpy as np import torch import torch.utils.data from datasets import load_dataset from tqdm import tqdm from transformers import PreTrainedModel, PreTrainedTokenizer from deepcompressor.utils import tools from ..base import LlmEvaluatorBase from .metrics import ( classification_score, code_sim_score, count_score, qa_f1_score, qa_f1_zh_score, retrieval_score, retrieval_zh_score, rouge_score, rouge_zh_score, ) __all__ = ["LongbenchEvaluator"] class LongbenchEvaluator(LlmEvaluatorBase): task2maxlen: dict[str, int] = { "narrativeqa": 128, "qasper": 128, "multifieldqa_en": 64, "multifieldqa_zh": 64, "hotpotqa": 32, "2wikimqa": 32, "musique": 32, "dureader": 128, "gov_report": 512, "qmsum": 512, "multi_news": 512, "vcsum": 512, "trec": 64, "triviaqa": 32, "samsum": 128, "lsht": 64, "passage_count": 32, "passage_retrieval_en": 32, "passage_retrieval_zh": 32, "lcc": 64, "repobench-p": 64, } task2prompt: dict[str, str] = None def __init__( self, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, model_name: str, eos_token_ids: tp.Sequence[int], output_dirpath: str = "", task2maxlen: dict[str, int] = None, task2prompt: dict[str, str] = None, ): super().__init__(model=model, tokenizer=tokenizer) self.model_name = model_name self.eos_token_ids = eos_token_ids if task2maxlen is not None: self.task2maxlen = task2maxlen if task2prompt is not None: self.task2prompt = task2prompt self.output_dirpath = output_dirpath self.logger = tools.logging.getLogger(__name__) def filter_tasks(self, tasks: list[str]) -> list[str]: """Filter the tasks to only include supported tasks.""" if "longbench-e" in tasks: return ["longbench-e"] if "longbench" in tasks: return sorted(self.task2maxlen.keys(), key=lambda x: self.task2maxlen[x]) return sorted([task for task in tasks if task in self.task2maxlen], key=lambda x: self.task2maxlen[x]) def evaluate(self, tasks: list[str], max_length: int, **kwargs) -> dict[str, dict[str, dict[str, float]]]: """Evaluate the model on the given tasks.""" ... tools.logging.Formatter.indent_inc() longbench_e = False if "longbench-e" in tasks: assert len(tasks) == 1, "LongBench-E should be the only task" longbench_e = True tasks = [ "hotpotqa", "2wikimqa", "triviaqa", "passage_count", "multifieldqa_en", "trec", "lcc", "repobench-p", "qasper", "samsum", "gov_report", "multi_news", "passage_retrieval_en", ] result = {"results": {}, "versions": {}} for task in tasks: self.logger.info(f"- Evaluating on {task}") tools.logging.Formatter.indent_inc() preds = self.predict(task=task, max_length=max_length) if not preds: self.logger.warning(f"No results for {task}") tools.logging.Formatter.indent_dec() continue if self.output_dirpath: self.logger.info(f"+ Saving results for {task} to {self.output_dirpath}") os.makedirs(os.path.join(self.output_dirpath, "longbench"), exist_ok=True) with open( os.path.join(self.output_dirpath, "longbench", f"{task}.json"), "w", encoding="utf-8", ) as f: for pred in preds: json.dump(pred, f, ensure_ascii=False) f.write("\n") predictions, answers, lengths = [], [], [] for pred in preds: predictions.append(pred["prediction"]) answers.append(pred["answers"]) lengths.append(pred["length"]) all_classes = preds[0]["all_classes"] if longbench_e: scores = LongbenchScorer.scorer_e( task=task, predictions=predictions, answers=answers, lengths=lengths, all_classes=all_classes, ) else: scores = { "score": LongbenchScorer.score( task=task, predictions=predictions, answers=answers, all_classes=all_classes, ) } tools.logging.debug(f"+ Scores: {scores}", self.logger) result["results"][task] = scores result["versions"][task] = 1 tools.logging.Formatter.indent_dec() tools.logging.Formatter.indent_dec() return result def predict( self, task: str, max_length: int, max_gen_length: int | None = None, prompt_format: str = "", ) -> list[dict[str, tp.Any]]: if max_gen_length is None: max_gen_length = self.task2maxlen[task] if prompt_format == "": prompt_format = self.task2prompt[task] dataset = load_dataset("THUDM/LongBench", task, split="test") preds = [] pbar = tqdm(dataset) tools.logging.Formatter.indent_inc() for idx, data in enumerate(pbar): prompt = prompt_format.format(**data) # truncate to fit max_length # (we suggest truncate in the middle, since the left and right side may contain crucial instructions) tokenized_prompt = self.tokenizer(prompt, truncation=False, return_tensors="pt").input_ids[0] if len(tokenized_prompt) > max_length: half = int(max_length / 2) prompt = self.tokenizer.decode( tokenized_prompt[:half], skip_special_tokens=True ) + self.tokenizer.decode(tokenized_prompt[-half:], skip_special_tokens=True) if task not in ("trec", "triviaqa", "samsum", "lsht", "lcc", "repobench-p"): # chat models are better off without build prompts on these tasks prompt = self.build_chat(prompt) input = self.tokenizer(prompt, truncation=False, return_tensors="pt").to("cuda") pbar.set_description(f"Generating for {idx}, len={input.input_ids.shape[-1]}") with torch.no_grad(): output = self.model(input_ids=input.input_ids, past_key_values=None, use_cache=True) past_key_values = output.past_key_values pred_token_idx = output.logits[:, -1, :].argmax(dim=-1).unsqueeze(1) generated_content = [pred_token_idx.item()] for _ in range(max_gen_length - 1): outputs = self.model(input_ids=pred_token_idx, past_key_values=past_key_values, use_cache=True) past_key_values = outputs.past_key_values pred_token_idx = outputs.logits[:, -1, :].argmax(dim=-1).unsqueeze(1) generated_content += [pred_token_idx.item()] if pred_token_idx.item() in self.eos_token_ids: break pred = self.tokenizer.decode(generated_content, skip_special_tokens=True) pred = self.post_process(pred) # tools.logging.debug(f"- Prediction: {pred}", self.logger) preds.append( { "prediction": pred, "answers": data["answers"], "all_classes": data["all_classes"], "length": data["length"], } ) # break tools.logging.Formatter.indent_dec() return preds def build_chat(self, prompt): """Build chat prompt for chat models.""" if "llama-2" in self.model_name: prompt = f"[INST]{prompt}[/INST]" return prompt def post_process(self, response: str) -> str: if "xgen" in self.model_name: response = response.strip().replace("Assistant:", "") elif "internlm" in self.model_name: response = response.split("")[0] elif "llama-3" in self.model_name: response = response.split(".assistant")[0].split("\n\nQuestion")[0].split("")[0].strip() elif "llama-2-7b" in self.model_name and "instruct" in self.model_name and "32k" in self.model_name: response = ( response.split("(Document")[0] .split("\n\nQuestion")[0] .split("\n\nAnswer")[0] .split("(Passage")[0] .strip() ) return response class LongbenchScorer: task2metric = { "narrativeqa": qa_f1_score, "qasper": qa_f1_score, "multifieldqa_en": qa_f1_score, "multifieldqa_zh": qa_f1_zh_score, "hotpotqa": qa_f1_score, "2wikimqa": qa_f1_score, "musique": qa_f1_score, "dureader": rouge_zh_score, "gov_report": rouge_score, "qmsum": rouge_score, "multi_news": rouge_score, "vcsum": rouge_zh_score, "trec": classification_score, "triviaqa": qa_f1_score, "samsum": rouge_score, "lsht": classification_score, "passage_retrieval_en": retrieval_score, "passage_count": count_score, "passage_retrieval_zh": retrieval_zh_score, "lcc": code_sim_score, "repobench-p": code_sim_score, } @staticmethod def score( task: str, predictions: tp.Sequence[str], answers: tp.Sequence[tp.Sequence[str]], all_classes: tp.Sequence[str], task2metric: tp.Mapping[str, tp.Callable[[str, str, tp.Any], float]] = None, ) -> float: if task2metric is None: task2metric = LongbenchScorer.task2metric total_score = 0.0 for prediction, ground_truths in zip(predictions, answers, strict=True): score = 0.0 prediction = ( prediction.split(".assistant")[0] .split("\n\nQuestion")[0] .split("")[0] .split("(Document")[0] .split("\n\nQuestion")[0] .split("\n\nAnswer")[0] .split("(Passage")[0] .strip() ) if task in ["trec", "triviaqa", "samsum", "lsht"]: prediction = prediction.lstrip("\n").split("\n")[0] if task in ["multifieldqa_zh", "dureader"]: prediction = prediction.split("问题:")[0].strip() if task in ["lsht"]: prediction = prediction.split("新闻内容:")[0].strip() if task in ["passage_retrieval_zh"]: prediction = prediction.split("请问")[0].split("提示")[0].strip() for ground_truth in ground_truths: score = max( score, task2metric[task](prediction, ground_truth, all_classes=all_classes), ) total_score += score return round(100 * total_score / len(predictions), 2) @staticmethod def scorer_e( task: str, predictions: tp.Sequence[str], answers: tp.Sequence[tp.Sequence[str]], lengths: tp.Sequence[int], all_classes: tp.Sequence[str], task2metric: tp.Mapping[str, tp.Callable[[str, str, tp.Any], float]] = None, ) -> dict[str, float]: if task2metric is None: task2metric = LongbenchScorer.task2metric scores = {"0-4k": [], "4-8k": [], "8k+": []} for prediction, ground_truths, length in zip(predictions, answers, lengths, strict=True): score = 0.0 if task in ["trec", "triviaqa", "samsum", "lsht"]: prediction = prediction.lstrip("\n").split("\n")[0] for ground_truth in ground_truths: score = max( score, task2metric[task](prediction, ground_truth, all_classes=all_classes), ) if length < 4000: scores["0-4k"].append(score) elif length < 8000: scores["4-8k"].append(score) else: scores["8k+"].append(score) for key in scores.keys(): scores[key] = round(100 * np.mean(scores[key]), 2) return scores # Initialize the evaluator task2prompt by loading the json file with open(os.path.join(os.path.dirname(__file__), "task2prompt.json")) as f: LongbenchEvaluator.task2prompt = json.load(f) ================================================ FILE: deepcompressor/app/llm/eval/longbench/metrics.py ================================================ """LongBench metrics.""" import re import string from collections import Counter import jieba from fuzzywuzzy import fuzz from rouge import Rouge __all__ = [ "classification_score", "code_sim_score", "count_score", "qa_f1_score", "qa_f1_zh_score", "retrieval_score", "retrieval_zh_score", "rouge_score", "rouge_zh_score", ] def normalize_answer(s: str) -> str: """Lower text and remove punctuation, articles and extra whitespace.""" def remove_articles(text: str) -> str: return re.sub(r"\b(a|an|the)\b", " ", text) def white_space_fix(text: str) -> str: return " ".join(text.split()) def remove_punc(text: str) -> str: exclude = set(string.punctuation) return "".join(ch for ch in text if ch not in exclude) return white_space_fix(remove_articles(remove_punc(s.lower()))) def normalize_zh_answer(s: str) -> str: """Lower text and remove punctuation, extra whitespace.""" def white_space_fix(text): return "".join(text.split()) def remove_punc(text): exclude = set( string.punctuation + "!?。。"#$%&'()*+,-/:;<=>@[\]^_`{|}~" "⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏." ) return "".join(ch for ch in text if ch not in exclude) return white_space_fix(remove_punc(s.lower())) def count_score(prediction: str, ground_truth: str, **kwargs) -> float: numbers = re.findall(r"\d+", prediction) right_num = 0 for number in numbers: if str(number) == str(ground_truth): right_num += 1 final_score = 0.0 if len(numbers) == 0 else right_num / len(numbers) return float(final_score) def retrieval_score(prediction: str, ground_truth: str, **kwargs) -> float: pattern = r"Paragraph (\d+)" matches = re.findall(pattern, ground_truth) ground_truth_id = matches[0] numbers = re.findall(r"\d+", prediction) right_num = 0 for number in numbers: if str(number) == str(ground_truth_id): right_num += 1 return 0.0 if len(numbers) == 0 else right_num / len(numbers) def retrieval_zh_score(prediction: str, ground_truth: str, **kwargs) -> float: pattern = r"段落(\d+)" matches = re.findall(pattern, ground_truth) ground_truth_id = matches[0] numbers = re.findall(r"\d+", prediction) right_num = 0 for number in numbers: if str(number) == str(ground_truth_id): right_num += 1 return 0.0 if len(numbers) == 0 else right_num / len(numbers) def code_sim_score(prediction: str, ground_truth: str, **kwargs) -> float: all_lines = prediction.lstrip("\n").split("\n") prediction = "" for line in all_lines: if ("`" not in line) and ("#" not in line) and ("//" not in line): prediction = line break return fuzz.ratio(prediction, ground_truth) / 100 def classification_score(prediction: str, ground_truth: str, **kwargs) -> float: em_match_list = [ class_name for class_name in kwargs["all_classes"] if class_name in prediction and not (class_name in ground_truth and class_name != ground_truth) ] return 1.0 / len(em_match_list) if ground_truth in em_match_list else 0.0 def rouge_score(prediction: str, ground_truth: str, **kwargs) -> float: try: scores = Rouge().get_scores([prediction], [ground_truth], avg=True) except Exception: return 0.0 return scores["rouge-l"]["f"] def rouge_zh_score(prediction: str, ground_truth: str, **kwargs) -> float: prediction = " ".join(list(jieba.cut(prediction, cut_all=False))) ground_truth = " ".join(list(jieba.cut(ground_truth, cut_all=False))) return rouge_score(prediction, ground_truth) def f1_score(prediction: str, ground_truth: str, **kwargs) -> float: common = Counter(prediction) & Counter(ground_truth) num_same = sum(common.values()) if num_same == 0: return 0 precision = 1.0 * num_same / len(prediction) recall = 1.0 * num_same / len(ground_truth) return (2 * precision * recall) / (precision + recall) def qa_f1_score(prediction: str, ground_truth: str, **kwargs) -> float: normalized_prediction = normalize_answer(prediction) normalized_ground_truth = normalize_answer(ground_truth) prediction_tokens = normalized_prediction.split() ground_truth_tokens = normalized_ground_truth.split() return f1_score(prediction_tokens, ground_truth_tokens) def qa_f1_zh_score(prediction: str, ground_truth: str, **kwargs) -> float: prediction_tokens = list(jieba.cut(prediction, cut_all=False)) ground_truth_tokens = list(jieba.cut(ground_truth, cut_all=False)) prediction_tokens = [normalize_zh_answer(token) for token in prediction_tokens] ground_truth_tokens = [normalize_zh_answer(token) for token in ground_truth_tokens] prediction_tokens = [token for token in prediction_tokens if len(token) > 0] ground_truth_tokens = [token for token in ground_truth_tokens if len(token) > 0] return f1_score(prediction_tokens, ground_truth_tokens) ================================================ FILE: deepcompressor/app/llm/eval/longbench/task2prompt.json ================================================ { "narrativeqa": "You are given a story, which can be either a novel or a movie script, and a question. Answer the question asconcisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story asconcisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:", "qasper": "You are given a scientific article and a question. Answer the question as concisely as you can, using a single phrase or sentence if possible. If the question cannot be answered based on the information in the article, write \"unanswerable\". If the question is a yes/no question, answer \"yes\", \"no\", or \"unanswerable\". Do not provide any explanation.\n\nArticle: {context}\n\n Answer the question based on the above article as concisely as you can, using a single phrase or sentence if possible. If the question cannot be answered based on the information in the article, write \"unanswerable\". If the question is a yes/no question, answer \"yes\", \"no\", or \"unanswerable\". Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:", "multifieldqa_en": "Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:", "multifieldqa_zh": "阅读以下文字并用中文简短回答:\n\n{context}\n\n现在请基于上面的文章回答下面的问题,只告诉我答案,不要输出任何其他字词。\n\n问题:{input}\n回答:", "hotpotqa": "Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:", "2wikimqa": "Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:", "musique": "Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:", "dureader": "请基于给定的文章回答下述问题。\n\n文章:{context}\n\n请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:", "gov_report": "You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:", "qmsum": "You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:", "multi_news": "You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:", "vcsum": "下面有一段会议记录,请你阅读后,写一段总结,总结会议的内容。\n会议记录:\n{context}\n\n会议总结:", "trec": "Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}", "triviaqa": "Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}", "samsum": "Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}", "lsht": "请判断给定新闻的类别,下面是一些例子。\n\n{context}\n{input}", "passage_count": "There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ", "passage_retrieval_en": "Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like \"Paragraph 1\", \"Paragraph 2\", etc.\n\nThe answer is: ", "passage_retrieval_zh": "以下是若干段落文字,以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是\"段落1\",\"段落2\"等格式\n\n答案是:", "lcc": "Please complete the code given below. \n{context}Next line of code:\n", "repobench-p": "Please complete the code given below. \n{context}{input}Next line of code:\n" } ================================================ FILE: deepcompressor/app/llm/model/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/app/llm/model/config.py ================================================ # -*- coding: utf-8 -*- """Net configurations.""" import typing as tp from dataclasses import dataclass, field import torch from omniconfig import configclass from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PreTrainedTokenizer from deepcompressor.data.utils.dtype import eval_dtype from deepcompressor.utils.config.model import BaseModelConfig from ..nn.patch import patch_attention, patch_gemma_rms_norm __all__ = ["LlmModelConfig"] @configclass @dataclass class LlmModelConfig(BaseModelConfig): """Arguments for creating a large language model. Args: name (`str`): Name of the model. path (`str`, *optional*, defaults to `""`): Path of the model. root (`str`, *optional*, defaults to `""`): Root directory path for models. local_path (`str`, *optional*, defaults to `""`): Local path of the model. local_root (`str`, *optional*, defaults to `""`): Local root directory path for models. dtype (`torch.dtype`, *optional*, defaults to `None`): Data type of the model. If not specified, the original data type of the model will be used. fast_tokenizer (`bool`, *optional*, defaults to `True`): Whether to use fast tokenizer. Attributes: size (`float`): Size of the model. variant (`str`): Variant of the model. """ _model_factories: tp.ClassVar[dict[str, tp.Callable[[str], tuple[PreTrainedModel, PreTrainedTokenizer]]]] = {} size: float = field(init=False) variant: str = field(init=False) dtype: torch.dtype = field(default_factory=lambda s=None: eval_dtype(s, with_quant_dtype=False)) use_flash_attn: bool = False fast_tokenizer: bool = True orig_dtype: torch.dtype = field(init=False) def __post_init__(self): parts = self.name.split("-") # we first infer the size, it should be a string matching "$\d+[mb]$" family, size, variant = "", "", "" for i, part in enumerate(parts): part = part.lower() if part[-1] == "m" or part[-1] == "b": _part = part[:-1].replace("x", "", 1) if _part.isdigit(): size = part family = "-".join(parts[:i]) if len(parts) > i + 1: variant = "-".join(parts[i + 1 :]) break assert size, f"Cannot infer size from {self.name}" assert family, f"Cannot infer family from {self.name}" if not self.family: self.family = family self.variant = variant if size[-1] == "m": size = float(size[:-1]) / 1000 else: assert size[-1] == "b" size = size[:-1] if "x" in size: num_experts, expert_gb = size.split("x") num_experts = int(num_experts) expert_size = float(expert_gb) size = num_experts * expert_size else: size = float(size) self.size = size super().__post_init__() self.name = self.name.lower() self.family = self.family.lower() self.variant = self.variant.lower() config = AutoConfig.from_pretrained(self.path) self.orig_dtype = config.torch_dtype if self.orig_dtype == torch.float32: self.dtype = self.dtype or torch.float16 elif self.orig_dtype == torch.float16: self.dtype = self.dtype or torch.float16 elif self.orig_dtype == torch.bfloat16: self.dtype = self.dtype or torch.bfloat16 else: raise ValueError(f"Unsupported data type: {self.orig_dtype}") def build(self) -> tuple[PreTrainedModel, PreTrainedTokenizer]: """Build model and tokenizer. Args: dtype (`torch.dtype`, *optional*, defaults to `None`): Data type of the model. Returns: `tuple[PreTrainedModel, PreTrainedTokenizer]`: Model and tokenizer. """ torch_dtype = self.dtype if self.name in self._model_factories: return self._model_factories[self.name]( self.path, torch_dtype=torch_dtype, use_fast=self.fast_tokenizer, use_flash_attn=self.use_flash_attn ) kwargs = {"torch_dtype": torch_dtype} if torch.cuda.is_available() and torch.cuda.device_count() > 0: kwargs["device_map"] = "balanced" return self._default_build(self.path, **kwargs) @staticmethod def _default_build(path: str, **kwargs) -> tuple[PreTrainedModel, PreTrainedTokenizer]: """Build model and tokenizer. Args: dtype (`torch.dtype`, *optional*, defaults to `None`): Data type of the model. Returns: `tuple[PreTrainedModel, PreTrainedTokenizer]`: Model and tokenizer. """ config = AutoConfig.from_pretrained(path) tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=kwargs.pop("use_fast", True)) if "use_flash_attn" in kwargs: use_flash_attn = kwargs.pop("use_flash_attn") if use_flash_attn: kwargs["attn_implementation"] = "flash_attention_2" model = AutoModelForCausalLM.from_pretrained(path, config=config, **kwargs) patch_attention(model) patch_gemma_rms_norm(model) model.eval() return model, tokenizer @classmethod def register_model_factory( cls, names: str | tuple[str, ...], /, factory: tp.Callable[[str, torch.dtype], tuple[PreTrainedModel, PreTrainedTokenizer]], *, overwrite: bool = False, ) -> None: """Register a model factory. Args: names (`str` or `tuple[str, ...]`): Names of the model. factory (`Callable[[str, torch.dtype], tuple[PreTrainedModel, PreTrainedTokenizer]]`): Factory function. overwrite (`bool`, *optional*, defaults to `False`): Whether to overwrite the existing factory for the model. """ if isinstance(names, str): names = (names,) for name in names: if not overwrite and name in cls._model_factories: raise ValueError(f"Factory for {name} already exists") cls._model_factories[name] = factory ================================================ FILE: deepcompressor/app/llm/nn/__init__.py ================================================ # -*- coding: utf-8 -*- from .struct import LlmModelStruct, LlmTransformerBlockStruct, LlmTransformerStruct ================================================ FILE: deepcompressor/app/llm/nn/patch.py ================================================ # -*- coding: utf-8 -*- """Llama model patcher.""" import functools import torch import torch.nn as nn from transformers.models.gemma.modeling_gemma import GemmaRMSNorm from transformers.models.gemma2.modeling_gemma2 import Gemma2RMSNorm from deepcompressor.utils import tools from deepcompressor.utils.patch import copy_func __all__ = ["patch_attention", "patch_gemma_rms_norm", "RotaryEmbedding"] def rotate_half(x): """Rotates half the hidden dims of the input.""" x1 = x[..., : x.shape[-1] // 2] x2 = x[..., x.shape[-1] // 2 :] return torch.cat((-x2, x1), dim=-1) def update_rotary_cos_sin( cos: torch.Tensor, sin: torch.Tensor, position_ids: torch.LongTensor | None, unsqueeze_dim: int = 1 ) -> tuple[torch.Tensor, torch.Tensor]: """Update the cos and sin tensors with new position_ids. Args: cos (``torch.Tensor``): Cosine tensor. sin (``torch.Tensor``): Sine tensor. position_ids (``torch.LongTensor | None``): Position ids. unsqueeze_dim (``int``, *optional*, defaults to ``1``): The dimension along which to unsqueeze cos and sin. Returns: ``tuple[torch.Tensor]``: Updated cos and sin tensors. """ assert unsqueeze_dim in (1, 2), f"unsqueeze_dim must be 1 or 2, got {unsqueeze_dim}" if position_ids is None: if cos.ndim == 2: cos = cos.unsqueeze(0) if sin.ndim == 2: sin = sin.unsqueeze(0) cos = cos.unsqueeze(unsqueeze_dim) sin = sin.unsqueeze(unsqueeze_dim) else: cos = cos[position_ids].unsqueeze(unsqueeze_dim) # [bs, 1, seq_len, dim] if unsqueeze_dim == 1 sin = sin[position_ids].unsqueeze(unsqueeze_dim) # [bs, seq_len, 1, dim] if unsqueeze_dim == 2 assert cos.ndim == 4, f"cos must have 4 dimensions, got {cos.ndim}" assert sin.ndim == 4, f"sin must have 4 dimensions, got {sin.ndim}" return cos, sin class RotaryEmbedding(nn.Module): """Rotary embedding for attention.""" def __init__(self) -> None: """Initialize the class.""" super().__init__() def forward( self, states: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor, unsqueeze_dim: int = 1 ) -> torch.Tensor: """Apply rotary embedding to states. Args: states (torch.Tensor): States. cos (torch.Tensor): Cosine tensor. sin (torch.Tensor): Sine tensor. unsqueeze_dim (int, optional): The dimension along which to unsqueeze cos and sin. Defaults to ``1``. Returns: torch.Tensor: States with rotary embedding. """ states = (states * cos) + (rotate_half(states) * sin) if unsqueeze_dim == 1: batch_size, num_heads, seq_len, head_dim = states.shape states = states.transpose(1, 2) else: batch_size, seq_len, num_heads, head_dim = states.shape return states.view(batch_size, seq_len, num_heads * head_dim) def apply_rotary_pos_emb( self, q: torch.Tensor, k: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor, position_ids: torch.LongTensor = None, unsqueeze_dim: int = 1, ) -> tuple[torch.Tensor, torch.Tensor]: """Apply Rotary Position Embedding to the query and key tensors. Args: q (`torch.Tensor`): The query tensor. k (`torch.Tensor`): The key tensor. cos (`torch.Tensor`): The cosine part of the rotary embedding. sin (`torch.Tensor`): The sine part of the rotary embedding. position_ids (`torch.Tensor`): The position indices of the tokens corresponding to the query and key tensors. For example, this can be used to pass offsetted position ids when working with a KV-cache. unsqueeze_dim (`int`, *optional*, defaults to 1): The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. Returns: `tuple[torch.Tensor, torch.Tensor]`: comprising of the query and key tensors rotated using the Rotary Position Embedding. """ assert unsqueeze_dim == 1 or unsqueeze_dim == 2, f"unsqueeze_dim must be 1 or 2, got {unsqueeze_dim}" if unsqueeze_dim == 1: batch_size, _, seq_len, head_dim = q.shape else: batch_size, seq_len, _, head_dim = q.shape cos, sin = update_rotary_cos_sin(cos, sin, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim) q = self.q_rotary_emb(q, cos=cos, sin=sin, unsqueeze_dim=unsqueeze_dim) k = self.k_rotary_emb(k, cos=cos, sin=sin, unsqueeze_dim=unsqueeze_dim) q = q.view(batch_size, seq_len, -1, head_dim) k = k.view(batch_size, seq_len, -1, head_dim) if unsqueeze_dim == 1: q = q.transpose(1, 2) k = k.transpose(1, 2) return q, k def patch_attention(model: nn.Module) -> nn.Module: """Patch attention.""" logger = tools.logging.getLogger(f"{__name__}.ModelPatcher") for module_name, module in model.named_modules(): classname = type(module).__name__ if classname.lower().endswith("attention"): forward_name = "" if isinstance(module.forward, functools.partial): if hasattr(module, "_deepcompressor_orig_forward"): logger.info(f"- Attention in {module_name} has already been patched") else: # this module has been wrapped in ``accelerate`` package assert hasattr(module, "_old_forward") assert module._old_forward is module.forward.__wrapped__ if "apply_rotary_pos_emb" in module._old_forward.__func__.__globals__: forward_name = "_old_forward" else: if "apply_rotary_pos_emb" in module.forward.__func__.__globals__: forward_name = "forward" if forward_name: logger.info(f"- Patching {classname}.{forward_name} in {module_name}") module.q_rotary_emb = RotaryEmbedding() module.k_rotary_emb = RotaryEmbedding() module.apply_rotary_pos_emb = functools.partial(apply_rotary_pos_emb, module) module._deepcompressor_orig_forward = getattr(module, forward_name) orig_forward = module._deepcompressor_orig_forward.__func__ new_globals = dict(orig_forward.__globals__) new_globals["apply_rotary_pos_emb"] = module.apply_rotary_pos_emb new_forward = copy_func(orig_forward, new_globals) setattr(module, forward_name, new_forward.__get__(module)) return model def gemma_rms_norm_forward(self: GemmaRMSNorm | Gemma2RMSNorm, x: torch.Tensor) -> torch.Tensor: """Forward function for Gemma RMSNorm.""" assert hasattr(self, "_deepcompressor_orig_forward"), "Gemma RMSNorm must be patched before calling forward" output = self._norm(x.float()) # Llama does x.to(float16) * w whilst Gemma2 is (x * w).to(float16) # See https://github.com/huggingface/transformers/pull/29402 output = output * self.weight.float() return output.type_as(x) def patch_gemma_rms_norm(model: nn.Module) -> nn.Module: """Patch Gemma RMSNorm.""" logger = tools.logging.getLogger(f"{__name__}.ModelPatcher") for module_name, module in model.named_modules(): if isinstance(module, (GemmaRMSNorm, Gemma2RMSNorm)): classname = type(module).__name__ forward_name = "" if hasattr(module, "_deepcompressor_orig_forward"): logger.info(f"- {module_name} has already been patched") else: if isinstance(module.forward, functools.partial): assert hasattr(module, "_old_forward") assert module._old_forward is module.forward.__wrapped__ forward_name = "_old_forward" else: forward_name = "forward" if forward_name: logger.info(f"- Patching {classname}.{forward_name} in {module_name}") module.weight.data.add_(1.0) module._deepcompressor_orig_forward = getattr(module, forward_name) setattr(module, forward_name, functools.partial(gemma_rms_norm_forward, module)) return model ================================================ FILE: deepcompressor/app/llm/nn/struct.py ================================================ # -*- coding: utf-8 -*- """Utility functions for Large Language Models.""" # region imports import typing as tp from dataclasses import dataclass, field import torch.nn as nn from transformers import PreTrainedModel from transformers.models.gemma2.modeling_gemma2 import ( Gemma2Attention, Gemma2Config, Gemma2DecoderLayer, Gemma2ForCausalLM, Gemma2ForSequenceClassification, Gemma2MLP, Gemma2Model, ) from transformers.models.llama.modeling_llama import ( LlamaAttention, LlamaConfig, LlamaDecoderLayer, LlamaForCausalLM, LlamaForSequenceClassification, LlamaMLP, LlamaModel, ) from transformers.models.mistral.modeling_mistral import ( MistralAttention, MistralConfig, MistralDecoderLayer, MistralForCausalLM, MistralForSequenceClassification, MistralMLP, MistralModel, ) from transformers.models.mixtral.modeling_mixtral import ( MixtralAttention, MixtralConfig, MixtralDecoderLayer, MixtralForCausalLM, MixtralForSequenceClassification, MixtralModel, MixtralSparseMoeBlock, ) from transformers.models.qwen2.modeling_qwen2 import ( Qwen2Attention, Qwen2Config, Qwen2DecoderLayer, Qwen2ForCausalLM, Qwen2ForSequenceClassification, Qwen2MLP, Qwen2Model, ) from transformers.models.t5.modeling_t5 import ( T5Attention, T5Block, T5Config, T5DenseActDense, T5DenseGatedActDense, T5EncoderModel, T5LayerFF, T5LayerSelfAttention, T5Stack, ) from deepcompressor.nn.struct.attn import ( AttentionConfigStruct, BaseTransformerStruct, FeedForwardConfigStruct, FeedForwardStruct, SelfAttentionStruct, TransformerBlockStruct, ) from deepcompressor.nn.struct.base import BaseModuleStruct from deepcompressor.utils.common import join_name from .patch import RotaryEmbedding # endregion __all__ = [ "LlmConfigStruct", "LlmModelStruct", "LlmTransformerStruct", "LlmTransformerBlockStruct", "LlmSelfAttentionStruct", "LlmFeedForwardStruct", ] # region type aliases ATTENTION_CLS = tp.Union[ LlamaAttention, MistralAttention, MixtralAttention, Qwen2Attention, T5Attention, Gemma2Attention ] FEEDFORWARD_CLS = tp.Union[ LlamaMLP, MistralMLP, MixtralSparseMoeBlock, Qwen2MLP, T5DenseActDense, T5DenseGatedActDense, Gemma2MLP ] TRANSFORMER_BLOCK_CLS = tp.Union[ LlamaDecoderLayer, MistralDecoderLayer, MixtralDecoderLayer, Qwen2DecoderLayer, T5Block, Gemma2DecoderLayer ] TRANSFORMER_CLS = tp.Union[LlamaModel, MistralModel, MixtralModel, Qwen2Model, T5Stack, Gemma2Model] CASUALLM_CLS = tp.Union[LlamaForCausalLM, MistralForCausalLM, MixtralForCausalLM, Qwen2ForCausalLM, Gemma2ForCausalLM] SEQCLSLM_CLS = tp.Union[ LlamaForSequenceClassification, MistralForSequenceClassification, MixtralForSequenceClassification, Qwen2ForSequenceClassification, Gemma2ForSequenceClassification, ] # endregion @dataclass(kw_only=True) class LlmTransformerBlockConfigStruct(FeedForwardConfigStruct, AttentionConfigStruct): """Large Language Model Transformer Block Configuration. Args: hidden_size (`int`): The size of the input/output activations, i.e., the number of input channels. inner_size (`int`): The size of the inner activations, i.e., the number of **query** channels in the attention block. intermediate_size (`int`): The number of intermediate channels in the feedforward network. intermediate_act_type (`str`): The activation function for the intermediate activations in the feedforward network. num_query_heads (`int`): The number of query heads. num_key_value_heads (`int`): The number of key-value heads. num_experts (`int`): The number of experts (for the feedforward network). with_qk_norm (`bool`, *optional*, defaults to `False`): Whether to apply normalization to queries and keys. with_rope (`bool`): Whether to use Rotary Positional Encoding (RoPE). Attributes: head_size (`int`): The size of the head, equal to `num_query_channels // num_query_heads`. num_key_value_groups (`int`): The number of key-value groups, equal to `num_query_heads // num_key_value_heads`. intermediate_lowerbound (`float` or `None`): The lowerbound of the intermediate activations in feedforward network. """ pass @dataclass(kw_only=True) class LlmTransformerConfigStruct(LlmTransformerBlockConfigStruct): """Large Language Model Transformer Configuration. Args: hidden_size (`int`): The size of the input/output activations, i.e., the number of input channels. inner_size (`int`): The size of the inner activations, i.e., the number of **query** channels in the attention block. intermediate_size (`int`): The number of intermediate channels in the feedforward network. intermediate_act_type (`str`): The activation function for the intermediate activations in the feedforward network. num_query_heads (`int`): The number of query heads. num_key_value_heads (`int`): The number of key-value heads. num_experts (`int`): The number of experts (for the feedforward network). with_qk_norm (`bool`, *optional*, defaults to `False`): Whether to apply normalization to queries and keys. with_rope (`bool`): Whether to use Rotary Positional Encoding (RoPE). vocab_size (`int`): The size of the vocabulary. num_hidden_layers (`int`): The number of hidden layers. Attributes: head_size (`int`): The size of the head, equal to `num_query_channels // num_query_heads`. num_key_value_groups (`int`): The number of key-value groups, equal to `num_query_heads // num_key_value_heads`. intermediate_lowerbound (`float` or `None`): The lowerbound of the intermediate activations in feedforward network. """ vocab_size: int num_hidden_layers: int @dataclass(kw_only=True) class LlmConfigStruct(LlmTransformerConfigStruct): """Large Language Model Configuration. Args: hidden_size (`int`): The size of the input/output activations, i.e., the number of input channels. inner_size (`int`): The size of the inner activations, i.e., the number of **query** channels in the attention block. intermediate_size (`int`): The number of intermediate channels in the feedforward network. intermediate_act_type (`str`): The activation function for the intermediate activations in the feedforward network. num_query_heads (`int`): The number of query heads. num_key_value_heads (`int`): The number of key-value heads. num_experts (`int`): The number of experts (for the feedforward network). with_qk_norm (`bool`, *optional*, defaults to `False`): Whether to apply normalization to queries and keys. with_rope (`bool`): Whether to use Rotary Positional Encoding (RoPE). vocab_size (`int`): The size of the vocabulary. num_hidden_layers (`int`): The number of hidden layers. tie_word_embeddings (`bool`): Whether to tie the word embeddings with the head weights. Attributes: head_size (`int`): The size of the head, equal to `num_query_channels // num_query_heads`. num_key_value_groups (`int`): The number of key-value groups, equal to `num_query_heads // num_key_value_heads`. intermediate_lowerbound (`float` or `None`): The lowerbound of the intermediate activations in feedforward network. """ tie_word_embeddings: bool = False @dataclass(kw_only=True) class LlmSelfAttentionStruct(SelfAttentionStruct): """Large Language Model Attention Block.""" # region relative keys q_rkey: tp.ClassVar[str] = "attn_q" k_rkey: tp.ClassVar[str] = "attn_k" v_rkey: tp.ClassVar[str] = "attn_v" # endregion parent: tp.Optional["LlmTransformerBlockStruct"] = field(repr=False) kwargs: tuple[str, ...] def filter_kwargs(self, kwargs: dict) -> dict: """Filter layer kwargs to attn kwargs.""" return {k: v for k, v in kwargs.items() if k in self.kwargs} @staticmethod def _default_construct( module: ATTENTION_CLS, /, parent: tp.Optional["LlmTransformerBlockStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "LlmSelfAttentionStruct": if isinstance(module, T5Attention): with_rope, num_query_heads, num_key_value_heads = False, module.n_heads, module.n_heads q_proj, k_proj, v_proj, o_proj = module.q, module.k, module.v, module.o q_proj_rname, k_proj_rname, v_proj_rname, o_proj_rname = "q", "k", "v", "o" q, k, v = module.q, module.k, module.v q_rname, k_rname, v_rname = "q", "k", "v" kwargs = ( "mask", "key_value_states", "position_bias", "past_key_value", "layer_head_mask", "query_length", "use_cache", "output_attentions", ) elif isinstance(module, (LlamaAttention, MistralAttention, MixtralAttention, Qwen2Attention, Gemma2Attention)): with_rope = True num_query_heads = module.config.num_attention_heads num_key_value_heads = module.config.num_key_value_heads q_proj, k_proj, v_proj, o_proj = module.q_proj, module.k_proj, module.v_proj, module.o_proj q_proj_rname, k_proj_rname, v_proj_rname, o_proj_rname = "q_proj", "k_proj", "v_proj", "o_proj" if hasattr(module, "q_rotary_emb"): q, k = module.q_rotary_emb, module.k_rotary_emb q_rname, k_rname = "q_rotary_emb", "k_rotary_emb" assert isinstance(q, RotaryEmbedding) assert isinstance(k, RotaryEmbedding) else: q, k = module.q_proj, module.k_proj q_rname, k_rname = "q_proj", "k_proj" v, v_rname = module.v_proj, "v_proj" kwargs = ( "attention_mask", "position_ids", "past_key_value", "output_attentions", "use_cache", "position_embeddings", "cache_position", ) else: raise ValueError(f"Unsupported attention type: {type(module)}") config = AttentionConfigStruct( hidden_size=q_proj.weight.shape[1], inner_size=q_proj.weight.shape[0], num_query_heads=num_query_heads, num_key_value_heads=num_key_value_heads, with_qk_norm=False, with_rope=with_rope, ) if parent is not None and parent.config is not None: assert parent.config.hidden_size == config.hidden_size assert parent.config.inner_size == config.inner_size assert parent.config.num_query_heads == config.num_query_heads assert parent.config.num_key_value_heads == config.num_key_value_heads assert parent.config.with_qk_norm == config.with_qk_norm assert parent.config.with_rope == config.with_rope return LlmSelfAttentionStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, q_proj=q_proj, k_proj=k_proj, v_proj=v_proj, o_proj=o_proj, q=q, k=k, v=v, q_proj_rname=q_proj_rname, k_proj_rname=k_proj_rname, v_proj_rname=v_proj_rname, o_proj_rname=o_proj_rname, q_rname=q_rname, k_rname=k_rname, v_rname=v_rname, kwargs=kwargs, ) @dataclass(kw_only=True) class LlmFeedForwardStruct(FeedForwardStruct): """Large Language Model Feedforward Network.""" parent: tp.Optional["LlmTransformerBlockStruct"] = field(repr=False) @staticmethod def _default_construct( module: FEEDFORWARD_CLS, /, parent: tp.Optional["LlmTransformerBlockStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "LlmFeedForwardStruct": if isinstance(module, (LlamaMLP, MistralMLP, Qwen2MLP, Gemma2MLP)): if parent is not None: assert parent.config.intermediate_act_type.endswith("_glu") act_type = parent.config.intermediate_act_type else: act_type = str(module.act_fn.__class__.__name__).removesuffix("activation").lower() + "_glu" up_projs, down_projs = [module.up_proj, module.gate_proj], [module.down_proj] experts = [module] moe_gate = None up_proj_rnames = ["up_proj", "gate_proj"] down_proj_rnames = ["down_proj"] experts_rname = "" moe_gate_rname = "" elif isinstance(module, MixtralSparseMoeBlock): if parent is not None: assert parent.config.intermediate_act_type.endswith("_glu") act_type = parent.config.intermediate_act_type else: act_type = str(module.experts[0].act_fn.__class__.__name__).removesuffix("activation").lower() + "_glu" up_projs = [expert.w3 for expert in module.experts] + [expert.w1 for expert in module.experts] down_projs = [expert.w2 for expert in module.experts] experts = list(module.experts) moe_gate = module.gate up_proj_rnames = ["w3", "w1"] down_proj_rnames = ["w2"] experts_rname = "experts" moe_gate_rname = "gate" elif isinstance(module, T5DenseActDense): if parent is not None: assert not parent.config.intermediate_act_type.endswith("_glu") act_type = parent.config.intermediate_act_type else: act_type = str(module.act.__class__.__name__).removesuffix("activation").lower() up_projs = [module.wi] down_projs = [module.wo] experts = [module] moe_gate = None up_proj_rnames = ["wi"] down_proj_rnames = ["wo"] experts_rname = "" moe_gate_rname = "" elif isinstance(module, T5DenseGatedActDense): if parent is not None: assert parent.config.intermediate_act_type.endswith("_glu") act_type = parent.config.intermediate_act_type else: act_type = str(module.act.__class__.__name__).removesuffix("activation").lower() + "_glu" up_projs = [module.wi_1, module.wi_0] down_projs = [module.wo] experts = [module] moe_gate = None up_proj_rnames = ["wi_1", "wi_0"] down_proj_rnames = ["wo"] experts_rname = "" moe_gate_rname = "" else: raise ValueError(f"Unsupported feed forward network type: {type(module)}") config = FeedForwardConfigStruct( hidden_size=up_projs[0].weight.shape[1], intermediate_size=up_projs[0].weight.shape[0], intermediate_act_type=act_type, num_experts=len(experts), ) if parent is not None and parent.config is not None: assert parent.config.hidden_size == config.hidden_size assert parent.config.intermediate_size == config.intermediate_size assert parent.config.intermediate_act_type == config.intermediate_act_type assert parent.config.num_experts == config.num_experts return LlmFeedForwardStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, up_projs=up_projs, down_projs=down_projs, moe_gate=moe_gate, experts=experts, up_proj_rnames=up_proj_rnames, down_proj_rnames=down_proj_rnames, moe_gate_rname=moe_gate_rname, experts_rname=experts_rname, ) @dataclass(kw_only=True) class LlmTransformerBlockStruct(TransformerBlockStruct): """Large Language Model Transformer Block.""" # region relative keys attn_rkey: tp.ClassVar[str] = "" ffn_rkey: tp.ClassVar[str] = "" add_ffn_rkey: tp.ClassVar[str] = "add" attn_struct_cls: tp.ClassVar[tp.Type[LlmSelfAttentionStruct]] = LlmSelfAttentionStruct ffn_struct_cls: tp.ClassVar[tp.Type[LlmFeedForwardStruct]] = LlmFeedForwardStruct # endregion parent: tp.Optional["LlmTransformerStruct"] = field(repr=False) parallel: bool = field(init=False, repr=False, default=False) config: LlmTransformerBlockConfigStruct = field(default=None) # region child modules pre_attn_add_norms: list[nn.LayerNorm] = field(init=False, repr=False, default_factory=list) post_attn_add_norms: list[nn.LayerNorm] = field(init=False, repr=False, default_factory=list) pre_add_ffn_norm: None = field(init=False, repr=False, default=None) add_ffn: None = field(init=False, repr=False, default=None) post_add_ffn_norm: None = field(init=False, repr=False, default=None) # endregion # region relative names pre_attn_add_norm_rnames: list[str] = field(init=False, repr=False, default_factory=list) post_attn_add_norm_rnames: list[str] = field(init=False, repr=False, default_factory=list) pre_add_ffn_norm_rname: str = field(init=False, repr=False, default="") add_ffn_rname: str = field(init=False, repr=False, default="") post_add_ffn_norm_rname: str = field(init=False, repr=False, default="") # endregion # region child structs attn_structs: list[LlmSelfAttentionStruct] = field(init=False, repr=False) ffn_struct: LlmFeedForwardStruct = field(init=False, repr=False) add_ffn_struct: None = field(init=False, repr=False, default=None) # endregion # region aliases @property def pre_attn_norm(self) -> nn.LayerNorm | None: return self.pre_attn_norms[0] if self.pre_attn_norms else None @property def attn(self) -> nn.Module: return self.attns[0] @property def post_attn_norm(self) -> nn.LayerNorm | None: return self.post_attn_norms[0] if self.post_attn_norms else None @property def pre_attn_norm_rname(self) -> str: return self.pre_attn_norm_rnames[0] if self.pre_attn_norm_rnames else "" @property def attn_rname(self) -> str: return self.attn_rnames[0] @property def post_attn_norm_rname(self) -> str: return self.post_attn_norm_rnames[0] if self.post_attn_norm_rnames else "" @property def pre_attn_norm_name(self) -> str: return self.pre_attn_norm_names[0] if self.pre_attn_norm_names else "" @property def attn_name(self) -> str: return self.attn_names[0] @property def post_attn_norm_name(self) -> str: return self.post_attn_norm_names[0] if self.post_attn_norm_names else "" @property def attn_struct(self) -> LlmSelfAttentionStruct: return self.attn_structs[0] # endregion def __post_init__(self): super().__post_init__() assert len(self.attn_structs) == 1 if self.config is None: self.config = LlmTransformerBlockConfigStruct( hidden_size=self.attn_struct.config.hidden_size, inner_size=self.attn_struct.config.inner_size, num_query_heads=self.attn_struct.config.num_query_heads, num_key_value_heads=self.attn_struct.config.num_key_value_heads, with_qk_norm=self.attn_struct.config.with_qk_norm, with_rope=self.attn_struct.config.with_rope, intermediate_size=self.ffn_struct.config.intermediate_size, intermediate_act_type=self.ffn_struct.config.intermediate_act_type, num_experts=self.ffn_struct.config.num_experts, ) @staticmethod def _default_construct( module: TRANSFORMER_BLOCK_CLS, /, parent: tp.Optional["LlmTransformerStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "LlmTransformerBlockStruct": if isinstance( module, (LlamaDecoderLayer, MistralDecoderLayer, Qwen2DecoderLayer, MixtralDecoderLayer, Gemma2DecoderLayer) ): pre_attn_norms, attns = [module.input_layernorm], [module.self_attn] pre_attn_norm_rnames, attn_rnames = ["input_layernorm"], ["self_attn"] if isinstance(module, Gemma2DecoderLayer): post_attn_norms, post_attn_norm_rnames = [module.post_attention_layernorm], ["post_attention_layernorm"] pre_ffn_norm, pre_ffn_norm_rname = (module.pre_feedforward_layernorm, "pre_feedforward_layernorm") post_ffn_norm, post_ffn_norm_rname = module.post_feedforward_layernorm, "post_feedforward_layernorm" else: post_attn_norms, post_attn_norm_rnames = [], [] pre_ffn_norm, pre_ffn_norm_rname = module.post_attention_layernorm, "post_attention_layernorm" post_ffn_norm, post_ffn_norm_rname = None, "" if isinstance(module, MixtralDecoderLayer): ffn, ffn_rname = module.block_sparse_moe, "block_sparse_moe" else: ffn, ffn_rname = module.mlp, "mlp" elif isinstance(module, T5Block): pre_attn_norms, attns, pre_attn_norm_rnames, attn_rnames = [], [], [], [] post_attn_norms, post_attn_norm_rnames = [], [] post_ffn_norm, post_ffn_norm_rname = None, "" for i, layer in enumerate(module.layer): if isinstance(layer, T5LayerSelfAttention): pre_attn_norms.append(layer.layer_norm) attns.append(layer.SelfAttention) pre_attn_norm_rnames.append(f"layer.{i}.layer_norm") attn_rnames.append(f"layer.{i}.SelfAttention") else: assert isinstance(layer, T5LayerFF) pre_ffn_norm, ffn = layer.layer_norm, layer.DenseReluDense pre_ffn_norm_rname, ffn_rname = f"layer.{i}.layer_norm", f"layer.{i}.DenseReluDense" else: raise ValueError(f"Unsupported layer type: {type(module)}") config = parent.config if parent is not None and parent.config is not None else None return LlmTransformerBlockStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, pre_attn_norms=pre_attn_norms, attns=attns, post_attn_norms=post_attn_norms, pre_ffn_norm=pre_ffn_norm, ffn=ffn, post_ffn_norm=post_ffn_norm, pre_attn_norm_rnames=pre_attn_norm_rnames, attn_rnames=attn_rnames, post_attn_norm_rnames=post_attn_norm_rnames, pre_ffn_norm_rname=pre_ffn_norm_rname, ffn_rname=ffn_rname, post_ffn_norm_rname=post_ffn_norm_rname, ) @dataclass(kw_only=True) class LlmTransformerStruct(BaseTransformerStruct): """Large Language Model Structure.""" # region relative keys layer_rkey: tp.ClassVar[str] = "" layer_struct_cls: tp.ClassVar[tp.Type[LlmTransformerBlockStruct]] = LlmTransformerBlockStruct # endregion parent: tp.Optional["LlmModelStruct"] = field(repr=False) config: LlmTransformerConfigStruct = field(default=None) # region child modules # embeddings: list[nn.Embedding] # """list of embeddings [embed_tokens, embed_positions]""" embed_tokens: nn.Embedding """Token embedding module.""" embed_positions: nn.Embedding | None """Position embedding module.""" layers: nn.ModuleList # endregion # region relative names embed_tokens_rname: str embed_positions_rname: str layers_rname: str # endregion # region absolute names embed_tokens_name: str = field(init=False, repr=False) embed_positions_name: str = field(init=False, repr=False) layers_name: str = field(init=False, repr=False) layer_names: list[str] = field(init=False, repr=False) # endregion # region child structs layer_structs: list[LlmTransformerBlockStruct] = field(init=False, repr=False) # endregion # region abstractmethod implementations @property def num_blocks(self) -> int: """Get the number of transformer blocks.""" return len(self.layers) @property def block_structs(self) -> list[LlmTransformerBlockStruct]: return self.layer_structs @property def block_names(self) -> list[str]: """Get the list of transformer block names.""" return self.layer_names # endregion def __post_init__(self) -> None: super().__post_init__() self.embed_tokens_name = join_name(self.name, self.embed_tokens_rname) if self.embed_positions is not None: self.embed_positions_name = join_name(self.name, self.embed_positions_rname) else: self.embed_positions_name = "" self.layers_name = join_name(self.name, self.layers_rname) layer_rnames = [f"{self.layers_rname}.{idx}" for idx in range(len(self.layers))] self.layer_names = [join_name(self.name, rname) for rname in layer_rnames] self.layer_structs = [ self.layer_struct_cls.construct( layer, parent=self, fname="layer", rname=rname, rkey=self.layer_rkey, idx=idx ) for idx, (layer, rname) in enumerate(zip(self.layers, layer_rnames, strict=True)) ] if self.config is None: assert all(block.config == self.block_structs[0].config for block in self.block_structs) ref_config = self.block_structs[0].config self.config = LlmTransformerConfigStruct( hidden_size=ref_config.hidden_size, inner_size=ref_config.inner_size, num_query_heads=ref_config.num_query_heads, num_key_value_heads=ref_config.num_key_value_heads, with_qk_norm=ref_config.with_qk_norm, with_rope=ref_config.with_rope, intermediate_size=ref_config.intermediate_size, intermediate_act_type=ref_config.intermediate_act_type, num_experts=ref_config.num_experts, vocab_size=self.embed_tokens.num_embeddings, num_hidden_layers=self.num_blocks, ) else: assert self.config.vocab_size == self.embed_tokens.num_embeddings assert self.config.num_hidden_layers == self.num_blocks def get_iter_layer_activations_args( self, **kwargs ) -> tuple[list[nn.Module], list[LlmTransformerBlockStruct], list[bool], list[bool]]: """ Get the arguments for iterating over the layers and their activations. Args: skip_pre_modules (`bool`): Whether to skip the pre-modules skip_post_modules (`bool`): Whether to skip the post-modules Returns: `tuple[list[nn.Module], list[LlmTransformerBlockStruct], list[bool], list[bool]]`: the layers, the layer structs, the recomputes, and the use_prev_layer_outputs """ return self.layers, self.layer_structs, [False] * len(self.layers), [True] * len(self.layers) @staticmethod def _default_construct( module: TRANSFORMER_CLS, /, parent: tp.Optional["LlmModelStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "LlmTransformerStruct": if isinstance(module, (LlamaModel, MistralModel, MixtralModel, Qwen2Model, Gemma2Model)): embed_tokens, embed_positions = module.embed_tokens, None layers = module.layers norm_in, norm_out = None, module.norm proj_in, proj_out = None, None embed_tokens_rname, embed_positions_rname = "embed_tokens", "" layers_rname = "layers" norm_in_rname, norm_out_rname = "", "norm" proj_in_rname, proj_out_rname = "", "" elif isinstance(module, T5Stack): embed_tokens, embed_positions = module.embed_tokens, None layers = module.block norm_in, norm_out = None, module.final_layer_norm proj_in, proj_out = None, None embed_tokens_rname, embed_positions_rname = "embed_tokens", "" layers_rname = "block" norm_in_rname, norm_out_rname = "", "final_layer_norm" proj_in_rname, proj_out_rname = "", "" else: raise ValueError(f"Unsupported backbone type: {type(module)}") config = parent.config if parent is not None and parent.config is not None else None return LlmTransformerStruct( module=module, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config, embed_tokens=embed_tokens, embed_positions=embed_positions, norm_in=norm_in, proj_in=proj_in, layers=layers, norm_out=norm_out, proj_out=proj_out, embed_tokens_rname=embed_tokens_rname, embed_positions_rname=embed_positions_rname, norm_in_rname=norm_in_rname, proj_in_rname=proj_in_rname, layers_rname=layers_rname, norm_out_rname=norm_out_rname, proj_out_rname=proj_out_rname, ) @dataclass(kw_only=True) class LlmModelStruct(BaseModuleStruct): """Large Language Model Structure.""" # region relative keys backbone_rkey: tp.ClassVar[str] = "" head_rkey: tp.ClassVar[str] = "head" backbone_struct_cls: tp.ClassVar[tp.Type[LlmTransformerStruct]] = LlmTransformerStruct # endregion module: PreTrainedModel = field(repr=False, kw_only=False) config: LlmConfigStruct # region child modules backbone: nn.Module head: nn.Linear | None # endregion # region relative names backbone_rname: str head_rname: str # endregion # region absolute names backbone_name: str = field(init=False, repr=False) head_name: str = field(init=False, repr=False) # endregion # region absolute keys head_key: str = field(init=False, repr=False) # endregion # region child structs backbone_struct: LlmTransformerStruct = field(init=False, repr=False) # endregion def __post_init__(self) -> None: super().__post_init__() self.backbone_name = join_name(self.name, self.backbone_rname) if self.head is not None or self.head_rname: self.head_name = join_name(self.name, self.head_rname) else: self.head_name = self.head_rname = "" self.head_key = join_name(self.key, self.head_rkey, sep="_") self.backbone_struct = self.backbone_struct_cls.construct( self.backbone, parent=self, fname="backbone", rname=self.backbone_rname, rkey=self.backbone_rkey ) def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: yield from self.backbone_struct.named_key_modules() if self.head is not None: yield self.head_key, self.head_name, self.head, self, "head" def iter_attention_structs(self) -> tp.Generator[LlmSelfAttentionStruct, None, None]: yield from self.backbone_struct.iter_attention_structs() def iter_transformer_block_structs(self) -> tp.Generator[LlmTransformerBlockStruct, None, None]: yield from self.backbone_struct.iter_transformer_block_structs() def get_iter_layer_activations_args( self, **kwargs ) -> tuple[list[nn.Module], list[LlmTransformerBlockStruct], list[bool], list[bool]]: """ Get the arguments for iterating over the layers and their activations. Args: skip_pre_modules (`bool`): Whether to skip the pre-modules skip_post_modules (`bool`): Whether to skip the post-modules Returns: `tuple[list[nn.Module], list[LlmTransformerBlockStruct], list[bool], list[bool]]`: the layers, the layer structs, the recomputes, and the use_prev_layer_outputs """ return self.backbone_struct.get_iter_layer_activations_args(**kwargs) @staticmethod def _default_construct( model: nn.Module, /, parent: tp.Optional[BaseModuleStruct] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> "LlmModelStruct": """Build the Large Language Model Structure.""" if isinstance(model, CASUALLM_CLS) or isinstance(model, SEQCLSLM_CLS): backbone = model.model backbone_rname = "model" elif isinstance(model, T5EncoderModel): backbone = model.encoder backbone_rname = "encoder" elif isinstance(model, TRANSFORMER_CLS): backbone = model backbone_rname = "" else: raise ValueError(f"Unsupported model type: {type(model)}") if isinstance(model, CASUALLM_CLS): head = model.lm_head head_rname = "lm_head" elif isinstance(model, SEQCLSLM_CLS): head = model.score head_rname = "score" elif isinstance(model, T5EncoderModel): head = None head_rname = "" elif isinstance(model, TRANSFORMER_CLS): head = None head_rname = "" else: raise ValueError(f"Unsupported model type: {type(model)}") config = backbone.config if isinstance(config, (LlamaConfig, MistralConfig, MixtralConfig, Qwen2Config, Gemma2Config)): config_struct = LlmConfigStruct( hidden_size=config.hidden_size, inner_size=config.num_attention_heads * config.head_dim if isinstance(config, Gemma2Config) else config.hidden_size, num_query_heads=config.num_attention_heads, num_key_value_heads=config.num_key_value_heads, with_qk_norm=False, with_rope=True, intermediate_size=config.intermediate_size, intermediate_act_type=f"{config.hidden_act}_glu".lower(), num_experts=getattr(config, "num_local_experts", 1), vocab_size=config.vocab_size, num_hidden_layers=config.num_hidden_layers, tie_word_embeddings=config.tie_word_embeddings, ) elif isinstance(config, T5Config): config_struct = LlmConfigStruct( hidden_size=config.d_model, inner_size=config.d_kv * config.num_heads, num_query_heads=config.num_heads, num_key_value_heads=config.num_heads, with_rope=False, intermediate_size=config.d_ff, intermediate_act_type=config.dense_act_fn.lower(), num_experts=1, vocab_size=config.vocab_size, num_hidden_layers=config.num_layers, tie_word_embeddings=False, ) if config.is_gated_act: config_struct.intermediate_act_type += "_glu" else: raise ValueError(f"Unsupported config type: {type(config)}") return LlmModelStruct( module=model, parent=parent, fname=fname, idx=idx, rname=rname, rkey=rkey, config=config_struct, backbone=backbone, head=head, backbone_rname=backbone_rname, head_rname=head_rname, ) LlmSelfAttentionStruct.register_factory(ATTENTION_CLS, LlmSelfAttentionStruct._default_construct) LlmFeedForwardStruct.register_factory(FEEDFORWARD_CLS, LlmFeedForwardStruct._default_construct) LlmTransformerBlockStruct.register_factory(TRANSFORMER_BLOCK_CLS, LlmTransformerBlockStruct._default_construct) LlmTransformerStruct.register_factory(TRANSFORMER_CLS, LlmTransformerStruct._default_construct) LlmModelStruct.register_factory( tp.Union[TRANSFORMER_CLS, CASUALLM_CLS, SEQCLSLM_CLS, T5EncoderModel], LlmModelStruct._default_construct ) ================================================ FILE: deepcompressor/app/llm/ptq.py ================================================ # -*- coding: utf-8 -*- """Evaluate a large language model.""" import gc import json import os import pprint import traceback import torch from transformers import GenerationConfig, PreTrainedModel, PreTrainedTokenizer from deepcompressor.utils import tools from .config import LlmCacheConfig, LlmPtqRunConfig, LlmQuantCacheConfig, LlmQuantConfig from .nn import LlmModelStruct from .quant import quantize_llm_activations, quantize_llm_weights, reorder_llm, rotate_llm, smooth_llm __all__ = ["ptq"] def ptq( # noqa: C901 model: PreTrainedModel | LlmModelStruct, /, tokenizer: PreTrainedTokenizer, config: LlmQuantConfig, cache: LlmCacheConfig | None = None, load_dirpath: str = "", save_dirpath: str = "", copy_on_save: bool = False, save_model: bool = False, ) -> PreTrainedModel: """Post-training quantization of a large language model. Args: model (`PreTrainedModel` or `LlmStruct`): The large language model. tokenizer (`PreTrainedTokenizer`): The large language model tokenizer. config (`LlmQuantConfig`): The large language model post-training quantization configuration. cache (`LlmCacheConfig`, *optional*, defaults to `None`): The large language model quantization cache path configuration. load_dirpath (`str`, *optional*, defaults to `""`): The directory path to load the quantization checkpoint. save_dirpath (`str`, *optional*, defaults to `""`): The directory path to save the quantization checkpoint. copy_on_save (`bool`, *optional*, defaults to `False`): Whether to copy the cache to the save directory. save_model (`bool`, *optional*, defaults to `False`): Whether to save the quantized model checkpoint. Returns: `PreTrainedModel`: The quantized model. """ logger = tools.logging.getLogger(__name__) if not isinstance(model, LlmModelStruct): model = LlmModelStruct.construct(model) assert isinstance(model, LlmModelStruct) quant_wgts = config.enabled_wgts quant_ipts = config.enabled_ipts quant_opts = config.enabled_opts quant_acts = quant_ipts or quant_opts quant = quant_wgts or quant_acts needs_rotation = quant and config.enabled_rotation needs_reorder = quant and config.enabled_reorder needs_smooth = quant and config.enabled_smooth load_model_path, load_path, save_path = "", None, None if load_dirpath: load_path = LlmQuantCacheConfig( rotation=os.path.join(load_dirpath, "rotation.pt"), reorder=os.path.join(load_dirpath, "reorder.pt"), smooth=os.path.join(load_dirpath, "smooth.pt"), wgts=os.path.join(load_dirpath, "wgts.pt"), acts=os.path.join(load_dirpath, "acts.pt"), ) load_model_path = os.path.join(load_dirpath, "model.pt") if os.path.exists(load_model_path): logger.info(f"* Found the model from {load_model_path}") load_model = True save_dirpath = "" # do not save the model if loading if needs_reorder and not config.reorder.dynamic: needs_reorder = False logger.info("* Safe to skip reordering the model") if needs_smooth: needs_smooth = False logger.info("* Safe to skip smoothing the model") else: logger.warning(f"Model checkpoint {load_model_path} does not exist") load_model, load_model_path = False, "" else: load_model = False if save_dirpath: os.makedirs(save_dirpath, exist_ok=True) save_path = LlmQuantCacheConfig( rotation=os.path.join(save_dirpath, "rotation.pt"), reorder=os.path.join(save_dirpath, "reorder.pt"), smooth=os.path.join(save_dirpath, "smooth.pt"), wgts=os.path.join(save_dirpath, "wgts.pt"), acts=os.path.join(save_dirpath, "acts.pt"), ) else: save_model = False # region rotate model if needs_rotation: logger.info("* Rotating model") tools.logging.Formatter.indent_inc() load_from = "" if load_path and os.path.exists(load_path.rotation): load_from = load_path.rotation elif cache and cache.path.rotation and os.path.exists(cache.path.rotation): load_from = cache.path.rotation elif os.path.exists(config.rotation.path): load_from = config.rotation.path if load_from: logger.info(f"- Loading rotation from {load_from}") rotation = torch.load(load_from).to(dtype=torch.float64) rotate_llm(model, config.rotation, rotation=rotation) else: logger.info("- Generating rotation") rotation = rotate_llm(model, config.rotation) if cache and cache.path.rotation: logger.info(f"- Saving rotation to {cache.path.rotation}") os.makedirs(cache.dirpath.rotation, exist_ok=True) torch.save(rotation, cache.path.rotation) load_from = cache.path.rotation if save_path: if not copy_on_save and load_from: logger.info(f"- Linking rotation to {save_path.rotation}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.rotation) else: logger.info(f"- Saving rotation to {save_path.rotation}") torch.save(rotation, save_path.rotation) del rotation tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() logger.info(f"* Development dtype is {config.develop_dtype}") # endregion # region reorder channels if needs_reorder: logger.info("* Reordering channels") tools.logging.Formatter.indent_inc() load_from = "" if load_path and os.path.exists(load_path.reorder): load_from = load_path.reorder elif cache and cache.path.reorder and os.path.exists(cache.path.reorder): load_from = cache.path.reorder if load_from: logger.info(f"- Loading reorder indices from {load_from}") reorder_cache = torch.load(load_from) reorder_llm(model, config, tokenizer, reorder_cache=reorder_cache) else: logger.info("- Generating reorder indices") reorder_cache = reorder_llm(model, config, tokenizer) if cache and cache.path.reorder: logger.info(f"- Saving reorder indices to {cache.path.reorder}") os.makedirs(cache.dirpath.reorder, exist_ok=True) torch.save(reorder_cache, cache.path.reorder) load_from = cache.path.reorder if save_path: if not copy_on_save and load_from: logger.info(f"- Linking reorder indices to {save_path.reorder}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.reorder) else: logger.info(f"- Saving reorder indices to {save_path.reorder}") torch.save(reorder_cache, save_path.reorder) del reorder_cache tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() # endregion # region smooth quantization if needs_smooth: logger.info("* Smoothing model for quantization") tools.logging.Formatter.indent_inc() load_from = "" if load_path and os.path.exists(load_path.smooth): load_from = load_path.smooth elif cache and cache.path.smooth and os.path.exists(cache.path.smooth): load_from = cache.path.smooth if load_from: logger.info(f"- Loading smooth scales from {load_from}") smooth_cache = torch.load(load_from) smooth_llm(model, config, smooth_cache=smooth_cache) else: logger.info("- Generating smooth scales") smooth_cache = smooth_llm(model, config, tokenizer=tokenizer) if cache and cache.path.smooth: logger.info(f"- Saving smooth scales to {cache.path.smooth}") os.makedirs(cache.dirpath.smooth, exist_ok=True) torch.save(smooth_cache, cache.path.smooth) load_from = cache.path.smooth if save_path: if not copy_on_save and load_from: logger.info(f"- Linking smooth scales to {save_path.smooth}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.smooth) else: logger.info(f"- Saving smooth scales to {save_path.smooth}") torch.save(smooth_cache, save_path.smooth) del smooth_cache tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() # endregion # region collect original state dict if config.needs_acts_quantizer_cache: if load_path and os.path.exists(load_path.acts): orig_state_dict = None elif cache and cache.path.acts and os.path.exists(cache.path.acts): orig_state_dict = None else: orig_state_dict: dict[str, torch.Tensor] = { name: param.detach().clone() for name, param in model.module.named_parameters() if param.ndim > 1 } else: orig_state_dict = None # endregion if load_model: logger.info(f"* Loading model checkpoint from {load_model_path}") model.module.load_state_dict(torch.load(load_model_path)) gc.collect() torch.cuda.empty_cache() elif quant_wgts: logger.info("* Quantizing weights") tools.logging.Formatter.indent_inc() load_from = "" if load_path and os.path.exists(load_path.wgts): load_from = load_path.wgts elif cache and cache.path.wgts and os.path.exists(cache.path.wgts): load_from = cache.path.wgts if load_from: logger.info(f"- Loading weight quantizer settings from {load_from}") quantizer_state_dict = torch.load(load_from) _, scale_state_dict = quantize_llm_weights( model, config, tokenizer=tokenizer, quantizer_state_dict=quantizer_state_dict, return_with_scale_state_dict=save_model, ) else: logger.info("- Generating weight quantizer settings") quantizer_state_dict, scale_state_dict = quantize_llm_weights( model, config, tokenizer=tokenizer, return_with_scale_state_dict=save_model ) if cache and cache.dirpath.wgts: logger.info(f"- Saving weight quantizer settings to {cache.path.wgts}") os.makedirs(cache.dirpath.wgts, exist_ok=True) torch.save(quantizer_state_dict, cache.path.wgts) load_from = cache.path.wgts if save_path: if not copy_on_save and load_from: logger.info(f"- Linking weight quantizer settings to {save_path.wgts}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.wgts) else: logger.info(f"- Saving weight quantizer settings to {save_path.wgts}") torch.save(quantizer_state_dict, save_path.wgts) if save_model: logger.info(f"- Saving model checkpoint to {save_dirpath}") torch.save(scale_state_dict, os.path.join(save_dirpath, "scale.pt")) torch.save(model.module.state_dict(), os.path.join(save_dirpath, "model.pt")) del quantizer_state_dict, scale_state_dict tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() if quant_acts: logger.info(" * Quantizing activations") tools.logging.Formatter.indent_inc() if config.needs_acts_quantizer_cache: load_from = "" if load_path and os.path.exists(load_path.acts): load_from = load_path.acts elif cache and cache.path.acts and os.path.exists(cache.path.acts): load_from = cache.path.acts if load_from: logger.info(f"- Loading activation quantizer settings from {load_from}") quantizer_state_dict = torch.load(load_from) quantize_llm_activations( model, config, tokenizer=tokenizer, quantizer_state_dict=quantizer_state_dict, orig_state_dict=orig_state_dict, ) else: logger.info("- Generating activation quantizer settings") quantizer_state_dict = quantize_llm_activations( model, config, tokenizer=tokenizer, orig_state_dict=orig_state_dict ) if cache and cache.dirpath.acts: logger.info(f"- Saving activation quantizer settings to {cache.path.acts}") os.makedirs(cache.dirpath.acts, exist_ok=True) torch.save(quantizer_state_dict, cache.path.acts) load_from = cache.path.acts if save_dirpath: if not copy_on_save and load_from: logger.info(f"- Linking activation quantizer settings to {save_path.acts}") os.symlink(os.path.relpath(load_from, save_dirpath), save_path.acts) else: logger.info(f"- Saving activation quantizer settings to {save_path.acts}") torch.save(quantizer_state_dict, save_path.acts) del quantizer_state_dict else: logger.info("- No need to generate/load activation quantizer settings") quantize_llm_activations(model, config, tokenizer=tokenizer, orig_state_dict=orig_state_dict) tools.logging.Formatter.indent_dec() del orig_state_dict gc.collect() torch.cuda.empty_cache() return model.module def main(config: LlmPtqRunConfig, logging_level: int = tools.logging.DEBUG) -> None: # noqa: C901 """Post-training quantization and evaluation of a large language model. Args: config (`LlmPtqConfig`): The large language model post-training quantization configuration. logging_level (`int`, *optional*, defaults to `logging.DEBUG`): The logging level. """ config.output.lock() config.dump(path=config.output.get_running_job_path("config.yaml")) tools.logging.setup(path=config.output.get_running_job_path("run.log"), level=logging_level) logger = tools.logging.getLogger(__name__) # region log configurations logger.info("=== Configurations ===") tools.logging.info(config.formatted_str(), logger=logger) logger.info("=== Dumped Configurations ===") tools.logging.info(pprint.pformat(config.dump(), indent=2, width=120), logger=logger) logger.info("=== Output Directory ===") logger.info(config.output.job_dirpath) # endregion logger.info("=== Start Evaluating ===") logger.info(f"* Building model {config.model.name} from {config.model.path}") tools.logging.Formatter.indent_inc() model, tokenizer = config.model.build() tools.logging.Formatter.indent_dec() save_dirpath = os.path.join(config.output.running_job_dirpath, "cache") if config.save_model: if config.save_model.lower() in ("false", "none", "null", "nil"): save_model = False elif config.save_model.lower() in ("true", "default"): save_dirpath, save_model = os.path.join(config.output.running_job_dirpath, "model"), True else: save_dirpath, save_model = config.save_model, True else: save_model = False model = ptq( model, tokenizer=tokenizer, config=config.quant, cache=config.cache, load_dirpath=config.load_from, save_dirpath=save_dirpath, copy_on_save=config.copy_on_save, save_model=save_model, ) # region evaluate model if not config.skip_eval: logger.info("* Evaluating model") eos_token_ids = GenerationConfig.from_pretrained(config.model.path).eos_token_id if not isinstance(eos_token_ids, list): eos_token_ids = [eos_token_ids] tools.logging.Formatter.indent_inc() results = config.eval.evaluate( model, tokenizer, model_name=config.model.name, eos_token_ids=eos_token_ids, output_dirpath=config.output.get_running_job_path("eval"), ) tools.logging.Formatter.indent_dec() logger.info(f"* Saving results to {config.output.job_dirpath}") # dump results with open(os.path.join(config.output.get_running_job_path("results.json")), "w") as f: json.dump(results, f, indent=2) # endregion config.output.unlock() if __name__ == "__main__": config, _, unused_cfgs, unused_args, unknown_args = LlmPtqRunConfig.get_parser().parse_known_args() if len(unused_cfgs) > 0: tools.logging.warning(f"Unused configurations: {unused_cfgs}") if unused_args is not None: tools.logging.warning(f"Unused arguments: {unused_args}") assert len(unknown_args) == 0, f"Unknown arguments: {unknown_args}" try: main(config, logging_level=tools.logging.DEBUG) except Exception as e: tools.logging.Formatter.indent_reset() tools.logging.error("=== Error ===") tools.logging.error(traceback.format_exc()) tools.logging.shutdown() traceback.print_exc() config.output.unlock(error=True) raise e ================================================ FILE: deepcompressor/app/llm/quant/__init__.py ================================================ # -*- coding: utf-8 -*- from .activation import quantize_llm_activations from .config import LlmQuantCacheConfig, LlmQuantConfig from .quantizer import LlmActivationQuantizer, LlmWeightQuantizer from .reorder import reorder_llm from .rotate import rotate_llm from .smooth import smooth_llm from .weight import quantize_llm_weights ================================================ FILE: deepcompressor/app/llm/quant/activation.py ================================================ # -*- coding: utf-8 -*- """LLM activation quantization calibration module.""" import gc import typing as tp import torch import torch.nn as nn from tqdm import tqdm from transformers import PreTrainedTokenizer from deepcompressor.data.cache import IOTensorsCache from deepcompressor.data.common import TensorType from deepcompressor.utils import tools from ..nn import LlmModelStruct, LlmTransformerBlockStruct from .config import LlmQuantConfig from .quantizer import LlmActivationQuantizer from .utils import get_needs_inputs_fn, get_needs_outputs_fn __all__ = ["quantize_llm_activations"] @torch.inference_mode() def quantize_llm_layer_activations( # noqa: C901 layer: LlmTransformerBlockStruct, config: LlmQuantConfig, quantizer_state_dict: dict[str, tp.Any], layer_cache: dict[str, IOTensorsCache] | None = None, layer_kwargs: dict[str, tp.Any] | None = None, orig_state_dict: dict[str, torch.Tensor] | None = None, ) -> None: """Calibrate the activation quantization ranges of modules in a layer. Args: layer (`LlmTransformerBlockStruct`): Layer. config (`LlmQuantConfig`): Quantization configuration. quantizer_state_dict (`dict[str, Any]`): Activation quantizer state dict. layer_cache (`dict[str, IOTensorsCache]` or `None`, *optional*, defaults to `None`): Layer activations cache. layer_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): Keyword arguments for the layer. orig_state_dict (`dict[str, torch.Tensor]` or `None`, *optional*, defaults to `None`): Original weight state dict. """ logger = tools.logging.getLogger(f"{__name__}.ActivationQuant") logger.debug("- Quantizing layer %s", layer.name) layer_cache = layer_cache or {} layer_kwargs = layer_kwargs or {} orig_state_dict = orig_state_dict or {} args_caches: list[ tuple[ str, # key TensorType, list[nn.Linear], # modules str, # module name nn.Module, # eval module str, # eval name dict[str, tp.Any], # eval kwargs list[tuple[nn.Parameter, torch.Tensor]], # original wgts ] ] = [] In, Out = TensorType.Inputs, TensorType.Outputs attn, ffn = layer.attn_struct, layer.ffn_struct # region attn attn_kwargs = attn.filter_kwargs(layer_kwargs) if orig_state_dict: orig_wgts = [ (module.weight, orig_state_dict[f"{module_name}.weight"]) for module_name, module in zip(attn.qkv_proj_names, attn.qkv_proj, strict=True) ] + [(attn.out_proj.weight, orig_state_dict[f"{attn.out_proj_name}.weight"])] else: orig_wgts = None # region qkv_proj (Inputs) module_name = attn.v_proj_name module_key, cache_key, modules = attn.qkv_proj_key, f"{module_name}.input", attn.qkv_proj args_caches.append((module_key, In, modules, module_name, attn, attn.name, attn_kwargs, orig_wgts)) # endregion # region qkv_attn (Outputs) orig_proj_wgts = (orig_wgts + orig_wgts) if orig_wgts else None for idx, module_key in enumerate((attn.q_key, attn.k_key, attn.v_key)): module = getattr(attn, "qkv"[idx]) module_name = getattr(attn, f"{'qkv'[idx]}_name") cache_key = f"{module_name}.output" orig_wgts = orig_proj_wgts[idx : idx + 4] if orig_proj_wgts else None args_caches.append((module_key, Out, [module], module_name, attn, attn.name, attn_kwargs, orig_wgts)) # endregion # region out_proj (Inputs) module_name, module = attn.out_proj_name, attn.out_proj module_key, cache_key = attn.out_proj_key, f"{module_name}.input" orig_wgts = [(module.weight, orig_state_dict[f"{module_name}.weight"])] if orig_state_dict else None args_caches.append((module_key, In, [module], module_name, module, module_name, None, orig_wgts)) # endregion del orig_wgts # endregion # region ffn # region ffn block projections for expert_idx in range(ffn.config.num_experts): expert = ffn.experts[expert_idx] expert_name = ffn.expert_names[expert_idx] # region proj 1st in expert (Inputs) module_name = ffn.up_proj_names[expert_idx] modules = ffn.up_projs[expert_idx :: ffn.config.num_experts] module_key, cache_key = ffn.up_proj_key, f"{module_name}.input" if orig_state_dict: orig_wgts = [ (module.weight, orig_state_dict[f"{expert_name}.{ffn.up_proj_rnames[module_idx]}.weight"]) for module_idx, module in enumerate(modules) ] else: orig_wgts = None args_caches.append((module_key, In, modules, module_name, expert, module_name, None, orig_wgts)) # endregion # region proj 2nd in expert (Inputs) module_name, module = ffn.down_proj_names[expert_idx], ffn.down_projs[expert_idx] module_key, cache_key = ffn.down_proj_key, f"{module_name}.input" if orig_state_dict: orig_wgts = [(module.weight, orig_state_dict[f"{module_name}.weight"])] else: orig_wgts = None args_caches.append((module_key, In, [module], module_name, module, module_name, None, orig_wgts)) # endregion # endregion # endregion quantizers: dict[str, LlmActivationQuantizer] = {} tools.logging.Formatter.indent_inc() for module_key, tensor_type, modules, module_name, eval_module, eval_name, eval_kwargs, orig_wgts in args_caches: if tensor_type == TensorType.Inputs: cache_key = f"{module_name}.input" quantizer_config = config.ipts activations = layer_cache.get(module_name, IOTensorsCache()).inputs device = modules[0].weight.device else: cache_key = f"{module_name}.output" quantizer_config = config.opts activations = layer_cache.get(module_name, IOTensorsCache()).outputs device = attn.out_proj.weight.device quantizer = LlmActivationQuantizer( quantizer_config, channels_dim=-1, develop_dtype=config.develop_dtype, key=module_key, tensor_type=tensor_type, ) if quantizer.is_enabled(): quantizers[cache_key] = quantizer if cache_key not in quantizer_state_dict: logger.debug("- Calibrating %s", cache_key) quantizer.calibrate_dynamic_range( modules=modules, activations=activations, eval_module=eval_module, eval_inputs=layer_cache[eval_name].inputs if layer_cache else None, eval_kwargs=eval_kwargs, orig_weights=orig_wgts, ) quantizer_state_dict[cache_key] = quantizer.state_dict() gc.collect() torch.cuda.empty_cache() else: quantizer.load_state_dict(quantizer_state_dict[cache_key], device=device) if tensor_type == TensorType.Inputs: if attn.v_proj_rname in cache_key: for proj_name in [attn.q_proj_rname, attn.k_proj_rname]: quantizers[cache_key.replace(attn.v_proj_rname, proj_name)] = quantizer if ffn.up_proj_rnames[0] in cache_key: for proj_name in ffn.up_proj_rnames[1:]: quantizers[cache_key.replace(ffn.up_proj_rnames[0], proj_name)] = quantizer del quantizer for name, module in layer.module.named_modules(): module_name = f"{layer.name}.{name}" ipts_quantizer = quantizers.get(f"{module_name}.input", None) opts_quantizer = quantizers.get(f"{module_name}.output", None) needs_quant_ipts = ipts_quantizer is not None and ipts_quantizer.is_enabled() needs_quant_opts = opts_quantizer is not None and opts_quantizer.is_enabled() if needs_quant_ipts or needs_quant_opts: logger.debug( "- Quantizing %s (%s)", module_name, ("inputs" if needs_quant_ipts else "") + (" and " if needs_quant_ipts and needs_quant_opts else "") + ("outputs" if needs_quant_opts else ""), ) if needs_quant_ipts: ipts_quantizer.as_hook(is_output=False).register(module) if needs_quant_opts: opts_quantizer.as_hook(is_output=True).register(module) tools.logging.Formatter.indent_dec() @torch.inference_mode() def quantize_llm_activations( model: nn.Module | LlmModelStruct, config: LlmQuantConfig, tokenizer: PreTrainedTokenizer | None = None, quantizer_state_dict: dict[str, tp.Any] | None = None, orig_state_dict: dict[str, torch.Tensor] | None = None, ) -> dict[str, tp.Any]: """Quantize the large foundation model activations. Args: model (`nn.Module` or `LlmStruct`): Model to be quantized. config (`LlmQuantConfig`): Quantization configuration. tokenizer (`PreTrainedTokenizer`, *optional*, defaults to `None`): Tokenizer. quantizer_state_dict (`dict[str, Any]`, *optional*, defaults to `None`): Activation quantizer state dict cache. orig_state_dict (`dict[str, torch.Tensor]`, *optional*, defaults to `None`): Original weight state dict Returns: `dict[str, Any]`: Activation quantizer state dict cache. """ if not isinstance(model, LlmModelStruct): model = LlmModelStruct.construct(model) assert isinstance(model, LlmModelStruct) quantizer_state_dict = quantizer_state_dict or {} with tools.logging.redirect_tqdm(): if not quantizer_state_dict and config.needs_acts_quantizer_cache: for _, (layer, layer_cache, layer_kwargs) in tqdm( config.calib.build_loader(tokenizer).iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model=model, config=config), needs_outputs_fn=get_needs_outputs_fn(model=model, config=config), ), desc="quantizing activations", leave=False, total=len(model.backbone_struct.layer_structs), dynamic_ncols=True, ): quantize_llm_layer_activations( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, layer_cache=layer_cache, layer_kwargs=layer_kwargs, orig_state_dict=orig_state_dict, ) else: for layer in tqdm( model.backbone_struct.layer_structs, desc="quantizing activations", leave=False, dynamic_ncols=True, ): quantize_llm_layer_activations( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, orig_state_dict=orig_state_dict, ) return quantizer_state_dict ================================================ FILE: deepcompressor/app/llm/quant/config.py ================================================ # -*- coding: utf-8 -*- """Quantization config.""" import os from dataclasses import dataclass, field import torch from omniconfig import configclass from deepcompressor.calib.config import ( QuantRotationConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective, SearchBasedCalibStrategy, SkipBasedChannelOrderConfig, SmoothTransfomerConfig, ) from deepcompressor.data.utils.dtype import eval_dtype from deepcompressor.utils.common import num2str from ..cache.config import LlmQuantCacheConfig from ..nn.struct import LlmFeedForwardStruct, LlmSelfAttentionStruct from .dataset import LlmCalibDataLoaderConfig from .quantizer import LlmModuleQuantizerConfig __all__ = ["LlmQuantConfig"] @configclass @dataclass class LlmQuantConfig(LlmModuleQuantizerConfig): """Large Language Model Module quantization configuration. Args: wgts (`LlmWeightQuantizerConfig`): The weight quantization configuration. ipts (`LlmActivationQuantizerConfig`): The input activation quantization configuration. opts (`LlmActivationQuantizerConfig`): The output activation quantization configuration. calib (`LlmCalibDataLoaderConfig`): The calibration dataset configuration. rotation (`QuantRotationConfig` or `None`, *optional*, defaults to `None`): The quantization rotation configuration. reorder (`SkipBasedChannelOrderConfig` or `None`, *optional*, defaults to `None`): The quantization reordering configuration. smooth (`SmoothTransfomerConfig`, *optional*, defaults to `None`): The quantization smoothing configuration. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type during quantization. """ calib: LlmCalibDataLoaderConfig rotation: QuantRotationConfig | None = None reorder: SkipBasedChannelOrderConfig | None = None smooth: SmoothTransfomerConfig | None = None develop_dtype: torch.dtype = field(default_factory=lambda s=torch.float32: eval_dtype(s, with_quant_dtype=False)) def __post_init__(self) -> None: # noqa: C901 if self.smooth is not None: if not self.smooth.enabled_proj and not self.smooth.enabled_attn: self.smooth = None if self.rotation is not None and self.reorder is not None: self.reorder.skips.append("residual") if self.rotation.transforms: self.reorder.skips.extend(self.rotation.transforms) self.reorder.skips = sorted(set(self.reorder.skips)) if self.enabled_ipts: if self.ipts.enabled_calib_range and self.ipts.calib_range.granularity == SearchBasedCalibGranularity.Group: self.ipts.calib_range.granularity = SearchBasedCalibGranularity.ChannelGroup if self.ipts.static: assert self.ipts.smallest_group_shape[0] == -1, "static quantization requires batch group size to be -1" if self.enabled_opts: if self.opts.enabled_calib_range and self.opts.calib_range.granularity == SearchBasedCalibGranularity.Group: self.opts.calib_range.granularity = SearchBasedCalibGranularity.ChannelGroup if self.opts.static: assert self.opts.smallest_group_shape[0] == -1, "static quantization requires batch group size to be -1" if self.enabled_reorder: if not self.reorder.dynamic: qkv_proj_rkey, up_proj_rkey = LlmSelfAttentionStruct.qkv_proj_rkey, LlmFeedForwardStruct.up_proj_rkey skips_to_remove = [] for skip in self.reorder.skips: if skip.startswith(qkv_proj_rkey) or skip.endswith(f"_{qkv_proj_rkey}"): self.reorder.skips.append("residual") skips_to_remove.append(skip) elif skip.startswith(up_proj_rkey) or skip.endswith(f"_{up_proj_rkey}"): self.reorder.skips.append("residual") skips_to_remove.append(skip) self.reorder.skips = sorted(set(self.reorder.skips)) for skip in skips_to_remove: self.reorder.skips.remove(skip) self.reorder.skips = sorted(set(self.reorder.skips)) @property def enabled_smooth(self) -> bool: """Whether to enable smooth quantization.""" return self.smooth is not None @property def enabled_smooth_proj(self) -> bool: """Whether to enable xw smooth quantization.""" return self.enabled_smooth and self.smooth.enabled_proj @property def enabled_smooth_attn(self) -> bool: """Whether to enable yy smooth quantization.""" return self.enabled_smooth and self.smooth.enabled_attn @property def enabled_reorder(self) -> bool: """Whether to enable channel reorder.""" return self.reorder is not None and self.reorder.is_enabled() @property def enabled_rotation(self) -> bool: """Whether to enable rotation.""" return self.rotation is not None @property def needs_acts_quantizer_cache(self) -> bool: """Whether to cache the activations quantizer settings.""" if self.enabled_ipts and self.ipts.needs_calib_data: return True if self.enabled_opts and self.opts.needs_calib_data: return True return False def generate_calib_dirname(self) -> str: name = "" if self.enabled_rotation: name += "-rotate" if self.rotation.random: name += ".rnd" if self.enabled_reorder: name += "-reorder" if self.reorder.dynamic: name += ".dyn" if self.enabled_smooth: name += "-smooth" if self.enabled_smooth_proj: name += ".proj" if self.enabled_smooth_attn: name += ".attn" calib_name = super().generate_calib_dirname() if calib_name: name += f"-{calib_name}" return name[1:] if name else name def generate_default_dirname(self) -> str: # noqa: C901 """Generate directory name for a large language model quantization configuration.""" w_names = x_names = {"qkv_proj": "qkv", "out_proj": "out", "up_proj": "fc1", "down_proj": "fc2"} y_names = {"attn_q": "q", "attn_k": "k", "attn_v": "v"} skip_name = "" if self.enabled_opts: skip_y_name = "+".join(y_names[y] for y in self.opts.skips if y in y_names) if skip_y_name: skip_name += f".y.[{skip_y_name}]" if self.enabled_wgts: skip_w_name = "+".join(w_names[w] for w in self.wgts.skips if w in w_names) if skip_w_name: skip_name += f".w.[{skip_w_name}]" if self.enabled_ipts: skip_x_name = "+".join(x_names[x] for x in self.ipts.skips if x in x_names) if skip_x_name: skip_name += f".x.[{skip_x_name}]" if skip_name: skip_name = "-skip" + skip_name if self.enabled_wgts and self.wgts.enabled_gptq: skip_name += "-gptq" rotation_name = "" if self.enabled_rotation: rotation_name = "-rot" if self.rotation.path: rotation_name += f".{self.rotation.name}" elif self.rotation.random: rotation_name += ".rnd" if self.rotation.transforms: rotation_name += ".[+{}]".format("+".join(w_names[w] for w in self.rotation.transforms)) reorder_name = "" if self.enabled_reorder: reorder_name = "-rodr" if self.reorder.strategy == SearchBasedCalibStrategy.Manual: if self.reorder.channel_metric.value != "xMax": reorder_name += f".{self.reorder.channel_metric.value}" if self.reorder.channel_index.value != "Seq": reorder_name += f".{self.reorder.channel_index.value}" else: reorder_name += f".{self.reorder.strategy.name}" reorders, skips = [], [] for k in w_names.keys() if self.reorder.dynamic else ("residual", "out_proj", "down_proj"): v = w_names.get(k, "res") if k in self.reorder.skips: skips.append(v) else: reorders.append(v) if len(reorders) <= len(skips): reorder_name += ".[{}]".format("+".join(reorders)) elif skips: reorder_name += ".skip.[{}]".format("+".join(skips)) smooth_name = "" if self.enabled_smooth: smooth_name = "-smth" if self.smooth.enabled_proj: smooth_name += ".proj" if self.smooth.proj.granularity != SearchBasedCalibGranularity.Layer: smooth_name += f".{self.smooth.proj.granularity.name}" if self.smooth.proj.strategy != SearchBasedCalibStrategy.Manual: smooth_name += f".{self.smooth.proj.strategy.name}" if self.smooth.proj.alpha <= 0: smooth_name += f".a{num2str(self.smooth.proj.alpha)}" if self.smooth.proj.beta <= 0: smooth_name += f".b{num2str(self.smooth.proj.beta)}" else: smooth_name += f".a{num2str(self.smooth.proj.alpha)}" smooth_name += f".b{num2str(self.smooth.proj.beta)}" xspan_eq_wspan = True for xspan, wspan in self.smooth.proj.spans: if xspan != wspan: xspan_eq_wspan = False break if xspan_eq_wspan: smooth_name += ".[{}]".format("+".join(xspan.name for xspan, _ in self.smooth.proj.spans)) else: smooth_name += ".[{}]".format( "+".join(f"x.{xspan.name}.w.{wspan.name}" for xspan, wspan in self.smooth.proj.spans) ) smooths, skips = [], [] for k, v in w_names.items(): if k in self.smooth.proj.skips: skips.append(v) else: smooths.append(v) if len(smooths) <= len(skips): smooth_name += ".[{}]".format("+".join(smooths)) elif skips: smooth_name += ".skip.[{}]".format("+".join(skips)) if self.smooth.enabled_attn: smooth_name += ".attn" if self.smooth.attn.granularity != SearchBasedCalibGranularity.Layer: smooth_name += f".{self.smooth.attn.granularity.name}" if self.smooth.attn.strategy != SearchBasedCalibStrategy.Manual: smooth_name += f".{self.smooth.attn.strategy.name}" if self.smooth.attn.alpha <= 0: smooth_name += f".a{num2str(self.smooth.attn.alpha)}" if self.smooth.attn.beta <= 0: smooth_name += f".b{num2str(self.smooth.attn.beta)}" else: smooth_name += f".a{num2str(self.smooth.attn.alpha)}" smooth_name += f".b{num2str(self.smooth.attn.beta)}" xspan_eq_yspan = True for xspan, yspan in self.smooth.attn.spans: if xspan != yspan: xspan_eq_yspan = False break if xspan_eq_yspan: smooth_name += ".[{}]".format("+".join(xspan.name for xspan, _ in self.smooth.attn.spans)) else: smooth_name += ".[{}]".format( "+".join(f"x.{xspan.name}.y.{yspan.name}" for xspan, yspan in self.smooth.attn.spans) ) wrange_name = "" if ( self.enabled_wgts and self.wgts.enabled_calib_range and (self.wgts.calib_range.needs_search or self.wgts.calib_range.ratio != 1) ): wrange_name = "-w.range" if self.wgts.calib_range.needs_search: if self.wgts.calib_range.granularity != SearchBasedCalibGranularity.Group: wrange_name += f".{self.wgts.calib_range.granularity.name}" if self.wgts.calib_range.objective != SearchBasedCalibObjective.OutputsError: wrange_name += f".{self.wgts.calib_range.objective.name}" if self.wgts.calib_range.degree != 2: wrange_name += f".d{num2str(self.wgts.calib_range.degree)}" wrange_name += f".[{num2str(self.wgts.calib_range.max_shrink)}" wrange_name += f".{num2str(self.wgts.calib_range.max_expand)}" wrange_name += f".g{self.wgts.calib_range.num_grids}]" else: wrange_name += f".r{num2str(self.wgts.calib_range.ratio)}" if self.wgts.calib_range.skips: wrange_name += ".skip.[{}]".format("+".join(w_names[w] for w in self.wgts.calib_range.skips)) xrange_name = "" if ( self.enabled_ipts and self.ipts.enabled_calib_range and (self.ipts.calib_range.needs_search or self.ipts.calib_range.ratio != 1) ): xrange_name = "-x.range" if self.ipts.calib_range.needs_search: if self.ipts.calib_range.granularity != SearchBasedCalibGranularity.Group: xrange_name += f".{self.ipts.calib_range.granularity.name}" if self.ipts.calib_range.objective != SearchBasedCalibObjective.OutputsError: xrange_name += f".{self.ipts.calib_range.objective.name}" if self.ipts.calib_range.degree != 2: xrange_name += f".d{num2str(self.ipts.calib_range.degree)}" xrange_name += f".[{num2str(self.ipts.calib_range.max_shrink)}" xrange_name += f".{num2str(self.ipts.calib_range.max_expand)}" xrange_name += f".g{self.ipts.calib_range.num_grids}]" else: xrange_name += f".r{num2str(self.ipts.calib_range.ratio)}" if self.ipts.calib_range.skips: xrange_name += ".skip.[{}]".format("+".join(w_names[w] for w in self.ipts.calib_range.skips)) yrange_name = "" if ( self.enabled_opts and self.opts.enabled_calib_range and (self.opts.calib_range.needs_search or self.opts.calib_range.ratio != 1) ): yrange_name = "-y.range" if self.opts.calib_range.needs_search: if self.opts.calib_range.granularity != SearchBasedCalibGranularity.Group: yrange_name += f".{self.opts.calib_range.granularity.name}" if self.opts.calib_range.objective != SearchBasedCalibObjective.OutputsError: yrange_name += f".{self.opts.calib_range.objective.name}" if self.opts.calib_range.degree != 2: yrange_name += f".d{num2str(self.opts.calib_range.degree)}" yrange_name += f".[{num2str(self.opts.calib_range.max_shrink)}" yrange_name += f".{num2str(self.opts.calib_range.max_expand)}" yrange_name += f".g{self.opts.calib_range.num_grids}]" else: yrange_name += f".r{num2str(self.opts.calib_range.ratio)}" if self.opts.calib_range.skips: yrange_name += ".skip.[{}]".format("+".join(y_names[y] for y in self.opts.calib_range.skips)) name = skip_name + rotation_name + reorder_name + smooth_name + wrange_name + xrange_name + yrange_name name = name[1:] if name else "default" name += f"-{self.calib.generate_dirnames()[0]}" return name def generate_cache_dirpath( self, *, root: str, seed: int, default_dtype: torch.dtype = torch.float16 ) -> LlmQuantCacheConfig: # noqa: C901 """Generate the cache paths for the module quantization configuration.""" quant_names = self.generate_dirnames(default_dtype=default_dtype) w_kernel_names = [] if self.enabled_wgts and self.wgts.enabled_gptq: w_kernel_names = self.wgts.kernel_gptq.generate_dirnames(prefix="w.kernel") if self.enabled_rotation: quant_names.extend(self.rotation.generate_dirnames(prefix="rotate")) reorder_dirpath = "" if self.enabled_reorder: reorder_names = self.reorder.generate_dirnames(prefix="reorder") quant_names.extend(reorder_names) reorder_dirpath = os.path.join("reorder", *quant_names) smooth_dirpath = "" if self.enabled_smooth: smooth_names = self.smooth.generate_dirnames(prefix="smooth") quant_names.extend(smooth_names) smooth_dirpath = os.path.join("smooth", *quant_names) quant_names.extend(w_kernel_names) wgts_dirpath = "" if self.enabled_wgts and self.wgts.enabled_calib_range: quant_names.extend(self.wgts.calib_range.generate_dirnames(prefix="w.range")) wgts_dirpath = os.path.join("wgts", *quant_names) acts_dirpath = "" if self.needs_acts_quantizer_cache: if self.enabled_ipts and self.ipts.enabled_calib_range: quant_names.extend(self.ipts.calib_range.generate_dirnames(prefix="x.range")) if self.enabled_opts and self.opts.enabled_calib_range: quant_names.extend(self.opts.calib_range.generate_dirnames(prefix="y.range")) acts_dirpath = os.path.join("acts", *quant_names) cache_dirpath = LlmQuantCacheConfig( reorder=reorder_dirpath, smooth=smooth_dirpath, wgts=wgts_dirpath, acts=acts_dirpath, ).add_parent_dirs(*self.calib.generate_dirnames()) if self.enabled_rotation: if self.rotation.path: cache_dirpath.rotation = "" else: cache_dirpath.rotation = os.path.join( "rotation", f"seed.{seed}" if self.rotation.random else "hadamard", ) cache_dirpath.add_parent_dirs(root, "llm", "cache", "quant") return cache_dirpath ================================================ FILE: deepcompressor/app/llm/quant/dataset.py ================================================ # -*- coding: utf-8 -*- """Functions for collecting calibration dataset for quantization.""" import os import random import typing as tp from dataclasses import MISSING, dataclass, field import torch import torch.nn as nn import torch.utils.data from datasets import load_dataset from omniconfig import configclass from transformers import PreTrainedTokenizer from transformers.cache_utils import Cache from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock from transformers.models.t5.modeling_t5 import T5DenseActDense, T5DenseGatedActDense from deepcompressor.data.cache import IOTensorsCache, ModuleForwardInput, TensorCache from deepcompressor.data.utils.reshape import LinearReshapeFn from deepcompressor.dataset.action import CacheAction, ConcatCacheAction from deepcompressor.dataset.cache import BaseCalibCacheLoader from deepcompressor.dataset.config import BaseDataLoaderConfig from ..nn.patch import RotaryEmbedding from ..nn.struct import LlmModelStruct, LlmTransformerBlockStruct __all__ = ["LlmCalibDataLoaderConfig", "LlmCalibCacheLoader"] @configclass @dataclass(kw_only=True) class LlmCalibDataLoaderConfig(BaseDataLoaderConfig): """Configuration for collecting calibration dataset for quantization. Args: data (`str`): Dataset name. num_samples (`int`): Number of dataset samples. path (`str`): Path to the dataset. seq_length (`int`): Sequence length of each sample. min_seq_length (`int`, *optional*, defaults to `0`): Minimum sequence length of each sample. max_seq_length (`int`, *optional*, defaults to `0`): Maximum sequence length of each sample. local_path (`str`, *optional*, defaults to `""`): Local path to the dataset. """ path: str seq_length: int min_seq_length: int = 0 max_seq_length: int = 0 local_path: str = "" batch_size: int = field(init=False, default=1) def __post_init__(self) -> None: self.min_seq_length = max(0, self.min_seq_length) self.max_seq_length = max(0, self.max_seq_length) self.path = os.path.expanduser(self.path) self.local_path = os.path.expanduser(self.local_path) if os.path.exists(self.local_path): self.path = self.local_path def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Get the names of the configuration fields.""" name = f"{self.data}.{self.num_samples}x{self.seq_length}.[{self.min_seq_length}-{self.max_seq_length}]" return [f"{prefix}.{name}" if prefix else name] def build_dataset(self, tokenizer: PreTrainedTokenizer) -> "LlmCalibDataset": """Build calibration dataset. Args: tokenizer (`PreTrainedTokenizer`): Tokenizer for encoding text. Returns: `LlmCalibDataset`: Calibration dataset. """ return LlmCalibDataset( tokenizer, data=self.data, path=self.path, num_samples=self.num_samples, seq_length=self.seq_length, max_seq_length=self.max_seq_length, min_seq_length=self.min_seq_length, ) def build_loader(self, tokenizer: PreTrainedTokenizer) -> "LlmCalibCacheLoader": """Build calibration data cache. Args: tokenizer (`PreTrainedTokenizer`): Tokenizer for encoding text. Returns: `LlmCalibDataCache`: Calibration data cache. """ return LlmCalibCacheLoader(config=self, tokenizer=tokenizer) class LlmCalibDataset(torch.utils.data.Dataset): data: list[torch.Tensor] def __init__( self, tokenizer: PreTrainedTokenizer, data: str, path: str, num_samples: int, seq_length: int, max_seq_length: int = -1, min_seq_length: int = -1, seed: int = 42, ) -> None: assert num_samples > 0, "num_samples should be positive" assert seq_length > 0, "seq_length should be positive" num_tokens = num_samples * seq_length assert tokenizer is not None, "tokenizer is required" if data == "pileval": dataset = load_dataset(path, split="validation") else: raise NotImplementedError(f"Calibration dataset {data} is not supported") dataset = dataset.shuffle(seed=seed) rng = random.Random(seed) seqs, toks = [], 0 for sample in dataset: line = tokenizer.encode(sample["text"].strip()) length = len(line) if length == 0: continue if min_seq_length > 0 and length < min_seq_length: continue if max_seq_length > 0 and length > max_seq_length: continue # sample is a tensor of shape (seq_length, ) seq = torch.tensor(line) if length > seq_length: tok = rng.randint(0, length - seq_length) seq = seq[tok : tok + seq_length] seqs.append(seq) toks += seq.numel() if len(seqs) >= num_samples and toks >= num_tokens: break # now concatenate all samples and split according to seq_length seqs = torch.cat(seqs).split(seq_length) if toks > num_tokens: seqs = seqs[:-1] seqs = seqs[:num_samples] self.data = seqs def __len__(self) -> int: return len(self.data) def __getitem__(self, idx: int) -> torch.Tensor: return self.data[idx] class LlmCalibCacheLoader(BaseCalibCacheLoader): """Cache for collecting calibration dataset for quantizing large language models.""" config: LlmCalibDataLoaderConfig dataset: LlmCalibDataset def __init__(self, config: LlmCalibDataLoaderConfig, tokenizer: PreTrainedTokenizer) -> None: """Initialize large language model calibration cache loader. Args: config (`LlmCalibDataLoaderConfig`): Configuration for loading calibration dataset. tokenizer (`PreTrainedTokenizer`): Tokenizer for encoding text. """ super().__init__(dataset=config.build_dataset(tokenizer=tokenizer), batch_size=config.batch_size) self.batch_size = min(self.batch_size, len(self.dataset)) self.config = config def _init_cache(self, name: str, module: nn.Module) -> IOTensorsCache: """Initialize cache. Args: name (`str`): Module name. module (`nn.Module`): Module. Returns: `IOTensorsCache`: Input and output tensors cache. """ if isinstance( module, (nn.Linear, RotaryEmbedding, MixtralSparseMoeBlock, T5DenseActDense, T5DenseGatedActDense) ) or module.__class__.__name__.endswith(("DecoderLayer", "Attention", "MLP")): return IOTensorsCache( inputs=TensorCache(channels_dim=-1, reshape=LinearReshapeFn()), outputs=TensorCache(channels_dim=-1, reshape=LinearReshapeFn()), ) else: super()._init_cache(name, module) def _convert_layer_inputs( self, m: nn.Module, args: tuple[tp.Any, ...], kwargs: dict[str, tp.Any], save_all: bool = False ) -> ModuleForwardInput: """Convert layer inputs to module forward input. Args: m (`nn.Module`): Layer. args (`tuple[Any, ...]`): Layer input arguments. kwargs (`dict[str, Any]`): Layer input keyword arguments. save_all (`bool`, *optional*, defaults to `False`): Whether to save all inputs. Returns: `ModuleForwardInput`: Module forward input. """ x = args[0].detach().cpu() if save_all else MISSING return ModuleForwardInput( args=[x, *args[1:]], kwargs={k: None if isinstance(v, Cache) else v for k, v in kwargs.items()} ) def iter_samples(self) -> tp.Generator[ModuleForwardInput, None, None]: """Iterate over model input samples. Args: tokenizer (`nn.Module`): Tokenizer for encoding text. Yields: `ModuleForwardInput`: Module forward input. """ dataloader = torch.utils.data.DataLoader( self.dataset, batch_size=self.batch_size, shuffle=False, drop_last=True ) for data in dataloader: yield ModuleForwardInput(args=(data,)) def iter_layer_activations( # noqa: C901 self, model: nn.Module | LlmModelStruct, *args, action: CacheAction | None = None, needs_inputs_fn: tp.Callable[[str, nn.Module], bool] | bool | None = True, needs_outputs_fn: tp.Callable[[str, nn.Module], bool] | bool | None = None, **kwargs, ) -> tp.Generator[ tuple[ str, tuple[ LlmTransformerBlockStruct, dict[str, IOTensorsCache], dict[str, tp.Any], ], ], None, None, ]: """Iterate over model activations for each layer. Args: model (`nn.Module`): Model. action (`CacheAction`, *optional*, defaults to `None`): Action for caching activations. If ``None``, ``ConcatCacheAction("cpu")`` is used. needs_inputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `True`): Function for determining whether to cache inputs for a module given its name and itself. needs_outputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `None`): Function for determining whether to cache outputs for a module given its name and itself. *args: Arguments for ``_iter_samples``. **kwargs: Keyword arguments for ``_iter_samples``. Yields: Generator[ tuple[str, tuple[LlmTransformerBlockStruct, dict[str, IOTensorsCache], dict[str, Any]]], None, None ]: Generator of tuple of - layer name - a tuple of - layer struct, - input and output caches for each module in the layer, - layer input keyword arguments. """ if isinstance(model, LlmModelStruct): model_struct = model model = model_struct.module else: model_struct = LlmModelStruct.construct(model) layers, layer_structs, recomputes, use_prev_layer_outputs = model_struct.get_iter_layer_activations_args() action = ConcatCacheAction("cpu") if action is None else action for layer_idx, (layer_name, (layer, layer_cache, layer_inputs)) in enumerate( self._iter_layer_activations( model, *args, action=action, layers=layers, needs_inputs_fn=needs_inputs_fn, needs_outputs_fn=needs_outputs_fn, recomputes=recomputes, use_prev_layer_outputs=use_prev_layer_outputs, **kwargs, ) ): layer_kwargs = layer_inputs[0].kwargs for layer_input in layer_inputs: for key, value in layer_input.kwargs.items(): if isinstance(value, torch.Tensor): assert torch.equal(value, layer_kwargs[key]) else: assert value == layer_kwargs[key] layer_struct = layer_structs[layer_idx] assert layer_name == layer_struct.name, f"Expected {layer_struct.name}, got {layer_name}" assert layer is layer_struct.module for transformer_block_struct in layer_struct.iter_transformer_block_structs(): for attn_struct in transformer_block_struct.iter_attention_structs(): if attn_struct.v_proj_name in layer_cache: cache = layer_cache[attn_struct.v_proj_name] layer_cache[attn_struct.q_proj_name] = cache layer_cache[attn_struct.k_proj_name] = cache ffn_struct = transformer_block_struct.ffn_struct up_proj_names = ffn_struct.up_proj_names if up_proj_names[0] in layer_cache: for expert_idx in range(ffn_struct.config.num_experts): cache = layer_cache[up_proj_names[expert_idx]] for name in up_proj_names[expert_idx :: ffn_struct.config.num_experts]: layer_cache[name] = cache if ffn_struct.config.num_experts == 1 and ffn_struct.name not in layer_cache: layer_cache[ffn_struct.name] = layer_cache[up_proj_names[0]] if ffn_struct.config.num_experts > 1 and ffn_struct.name in layer_cache: layer_cache[ffn_struct.moe_gate_name] = layer_cache[ffn_struct.name] yield layer_name, (layer_struct, layer_cache, layer_kwargs) ================================================ FILE: deepcompressor/app/llm/quant/quantizer/__init__.py ================================================ # -*- coding: utf-8 -*- from .config import LlmModuleQuantizerConfig from .quantizer import LlmActivationQuantizer, LlmWeightQuantizer ================================================ FILE: deepcompressor/app/llm/quant/quantizer/config.py ================================================ # -*- coding: utf-8 -*- """Quantizatizer config.""" import typing as tp from dataclasses import dataclass, field import torch from omniconfig import configclass from deepcompressor.calib.config import SkipBasedDynamicRangeCalibConfig from deepcompressor.data.dtype import QuantDataType from deepcompressor.quantizer.config import ProgressiveQuantizerConfig from deepcompressor.quantizer.kernel import QuantGptqConfig from deepcompressor.utils.config import EnableConfig, SkipBasedConfig __all__ = ["LlmQuantizerConfig", "LlmWeightQuantizerConfig", "LlmActivationQuantizerConfig", "LlmModuleQuantizerConfig"] @configclass @dataclass class LlmQuantizerConfig(SkipBasedConfig, ProgressiveQuantizerConfig): """Llm Quantizer Configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. intermediate_dtypes (`Sequence[QuantDataType]`, *optional*, defaults to `()`): The intermediate quantization data types. intermediate_levels (Sequence[int], *optional*, defaults to `()`): The intermediate quantization levels. needs_dequant_saturation (`bool`, *optional*, defaults to `False`): Whether the dequantization needs saturation. skips (`Sequence[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. static (`bool`, *optional*, defaults to `False`): Whether to use static quantization. kernel_gptq (`QuantGptqConfig` or `None`, *optional*, defaults to `None`): The GPTQ kernel configuration. calib_range (`SkipBasedDynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The dynamic range calibration configuration. """ static: bool = False kernel_gptq: QuantGptqConfig | None = None calib_range: SkipBasedDynamicRangeCalibConfig | None = None def __post_init__(self) -> None: super().__post_init__() if self.quant_dtype is None: self.static = False self.kernel_gptq = None self.calib_range = None if self.static and self.calib_range is None: self.calib_range = SkipBasedDynamicRangeCalibConfig() @property def enabled_gptq(self) -> bool: """Whether quantization kernel calibration is enabled.""" return self.kernel_gptq is not None @property def enabled_calib_range(self) -> bool: """Whether quantization dynamic range calibration is enabled.""" return self.calib_range is not None @property def needs_calib_data(self) -> bool: return self.enabled_calib_range and (self.calib_range.needs_search or self.static) def generate_calib_dirname(self) -> str: """Generate the name for quantization calibration. Returns: str: The name. """ name = "" if self.static: name += ".static" if self.enabled_gptq: name += ".gptq" if self.enabled_calib_range and (self.calib_range.needs_search or self.calib_range.ratio != 1): name += ".range" return name[1:] if name else "" @configclass @dataclass class LlmWeightQuantizerConfig(LlmQuantizerConfig): """Llm Weight Quantizer Configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. intermediate_dtypes (`Sequence[QuantDataType]`, *optional*, defaults to `()`): The intermediate quantization data types. intermediate_levels (Sequence[int], *optional*, defaults to `()`): The intermediate quantization levels. needs_dequant_saturation (`bool`, *optional*, defaults to `False`): Whether the dequantization needs saturation. skips (`Sequence[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. kernel_gptq (`QuantGptqConfig` or `None`, *optional*, defaults to `None`): The GPTQ kernel configuration. calib_range (`SkipBasedDynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The dynamic range calibration configuration. """ static: bool = field(init=False, default=True) @configclass @dataclass class LlmActivationQuantizerConfig(LlmQuantizerConfig): """Llm Activation quantization configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. skips (`Sequence[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. static (`bool`, *optional*, defaults to `False`): Whether to use static quantization. calib_range (`SkipBasedDynamicRangeCalibConfig` or `None`, *optional*, defaults to `None`): The dynamic range calibration configuration. """ intermediate_dtypes: tp.Sequence[QuantDataType] = field(init=False, default=()) intermediate_levels: tp.Sequence[int] = field(init=False, default=()) needs_dequant_saturation: bool = field(init=False, default=False) kernel_gptq: None = field(init=False, default=None) @configclass @dataclass class LlmModuleQuantizerConfig(EnableConfig): """Llm Module quantization configuration. Args: wgts (`LlmWeightQuantizerConfig`): The weight quantization configuration. ipts (`LlmActivationQuantizerConfig`): The input activation quantization configuration. opts (`LlmActivationQuantizerConfig`): The output activation quantization configuration. """ wgts: LlmWeightQuantizerConfig ipts: LlmActivationQuantizerConfig opts: LlmActivationQuantizerConfig def is_enabled(self) -> bool: """Whether the quantization is enabled.""" return self.enabled_wgts or self.enabled_ipts or self.enabled_opts @property def enabled_wgts(self) -> bool: """Whether to enable weight quantization.""" return self.wgts is not None and self.wgts.is_enabled() @property def enabled_ipts(self) -> bool: """Whether to enable activation quantization.""" return self.ipts is not None and self.ipts.is_enabled() @property def enabled_opts(self) -> bool: """Whether to enable activation quantization.""" return self.opts is not None and self.opts.is_enabled() def generate_dirnames( self, *, prefix: str = "", shape: torch.Size | tuple[int, ...] = (1024, 1024, 16, 16), default_dtype: torch.dtype = torch.float16, **kwargs, ) -> list[str]: """Get the directory names of the quantization configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. shape (`torch.Size` or `tuple[int, ...]`, *optional*, defaults to `(1024, 1024, 16, 16)`): The shape of the tensor to be quantized. default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The dtype of the tensor to be quantized. Returns: `list[str]`: The directory names of the quantization configuration. - The number of effective bits. - The name of the quantization data type. - The name of the group shapes. - The name of the modules to skip. """ wgts_names = self.wgts.generate_dirnames(prefix="w", shape=shape, default_dtype=default_dtype) ipts_names = self.ipts.generate_dirnames(prefix="x", shape=shape, default_dtype=default_dtype) opts_names = self.opts.generate_dirnames(prefix="y", shape=shape, default_dtype=default_dtype) names = [ f"{wgts_name}-{ipts_name}-{opts_name}" for wgts_name, ipts_name, opts_name in zip(wgts_names, ipts_names, opts_names, strict=True) ] if prefix: names = [f"{prefix}.[{name}]" for name in names] return names def generate_calib_dirname(self) -> str: """Generate the name for quantization calibration. Returns: `str`: The name. """ name = "" if self.enabled_wgts: calib_name = self.wgts.generate_calib_dirname() if calib_name: name += f"-w.{calib_name}" if self.enabled_ipts: calib_name = self.ipts.generate_calib_dirname() if calib_name: name += f"-x.{calib_name}" if self.enabled_opts: calib_name = self.opts.generate_calib_dirname() if calib_name: name += f"-y.{calib_name}" return name[1:] if name else name ================================================ FILE: deepcompressor/app/llm/quant/quantizer/quantizer.py ================================================ # -*- coding: utf-8 -*- """Tensor Quantizer module.""" import typing as tp from dataclasses import dataclass, field import torch import torch.nn as nn from deepcompressor.calib.range import calibrate_dynamic_range from deepcompressor.data.cache import TensorsCache from deepcompressor.data.common import TensorType from deepcompressor.data.range import DynamicRange from deepcompressor.quantizer.kernel import QuantGptqConfig from deepcompressor.quantizer.processor import Quantizer from .config import LlmActivationQuantizerConfig, LlmQuantizerConfig, LlmWeightQuantizerConfig __all__ = ["LlmQuantizer", "LlmWeightQuantizer", "LlmActivationQuantizer"] @dataclass class LlmQuantizer(Quantizer): """Llm quantizer class. Args: config (`LlmQuantizerConfig`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. tensor_type (`TensorType`, *optional*, defaults to `TensorType.Weights`): The type of the tensor to quantize. channels_dim (`int` or `None`, *optional*, defaults to `None`): The dimension of channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. low_rank (`QuantLowRankConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch configuration. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). """ config: LlmQuantizerConfig kernel: QuantGptqConfig | None = field(init=False) tensor_type: TensorType = TensorType.Weights def __post_init__(self) -> None: self.kernel = self.config.kernel_gptq def calibrate_dynamic_range( self, modules: tp.Sequence[nn.Module], activations: TensorsCache, weights: tp.Sequence[nn.Parameter] = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_weights: tp.Sequence[tuple[nn.Parameter, torch.Tensor]] | None = None, orig_activations: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, ) -> tp.Sequence[DynamicRange] | None: """Calibrate the dynamic range. Args: modules (`Sequence[nn.Module]`): The modules to calibrate. activations (`TensorsCache`): The inputs cache if the tensor type is not outputs, or the outputs cache if the tensor type is outputs. weights (`Sequence[nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights to calibrate. If not provided, the weights of the modules will be used. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs for evaluation. If not provided, the `activations` cache will be used. eval_module (`nn.Module` or `None`, *optional*, defaults to `None`): The module to evaluate the quantization error. If not provided, the module to calibrate will be used. eval_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): The keyword arguments for evaluation. orig_weights (`Sequence[tuple[nn.Parameter, torch.Tensor]]` or `None`, *optional*, defaults to `None`): The original weights. orig_activations (`TensorsCache` or `None`, *optional*, defaults to `None`): The original activations. orig_eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original evaluation inputs. Returns: `Sequence[DynamicRange]` or `None`: The dynamic ranges of each quantization step. """ if ( not self.is_enabled() or self.config.calib_range is None or not self.config.calib_range.is_enabled_for(self.key) ): self.dynamic_range = None else: self.dynamic_range = calibrate_dynamic_range( tensor_type=self.tensor_type, config=self.config.calib_range, static=self.config.static, quantizer=self, modules=modules, activations=activations, weights=weights, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, orig_weights=orig_weights, orig_activations=orig_activations, orig_eval_inputs=orig_eval_inputs, ) return self.dynamic_range @dataclass class LlmWeightQuantizer(LlmQuantizer): """Llm Weight Quantizer class. Args: config (`LlmWeightQuantizerConfig`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. low_rank (`QuantLowRankConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch configuration. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). """ config: LlmWeightQuantizerConfig channels_dim: None = field(init=False, default=None) tensor_type: TensorType = field(init=False, default=TensorType.Weights) def calibrate_dynamic_range( self, module: nn.Module, inputs: TensorsCache, weight: nn.Parameter | None = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_inputs: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, ) -> DynamicRange | tuple[DynamicRange, ...]: """Calibrate the dynamic range. Args: module (`nn.Module`): The module to calibrate. inputs (`TensorsCache`): The inputs cache. weight (`nn.Parameter` or `None`, *optional*, defaults to `None`): The weight parameter to calibrate. If not provided, the weight of the `module` will be used. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs for evaluation. If not provided, the `activations` cache will be used. eval_module (`nn.Module` or `None`, *optional*, defaults to `None`): The module to evaluate the quantization error. If not provided, the module to calibrate will be used. eval_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): The keyword arguments for evaluation. orig_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original inputs. orig_eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original evaluation inputs. Returns: `Sequence[DynamicRange]` or `None`: The dynamic ranges of each quantization step. """ return super().calibrate_dynamic_range( modules=[module], weights=[weight] if weight is not None else [module.weight], activations=inputs, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, orig_activations=orig_inputs, orig_eval_inputs=orig_eval_inputs, ) @dataclass class LlmActivationQuantizer(LlmQuantizer): """Llm Activation Quantizer class. Args: config (`LlmActivationQuantizerConfig`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. tensor_type (`TensorType`, *optional*, defaults to `TensorType.Inputs`): The type of the tensor to quantize. channels_dim (`int` or `None`, *optional*, defaults to `None`): The dimension of channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. low_rank (`QuantLowRankConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch configuration. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). """ config: LlmActivationQuantizerConfig tensor_type: TensorType = TensorType.Inputs def __post_init__(self) -> None: super().__post_init__() assert self.tensor_type != TensorType.Weights, "The tensor type cannot be weights." assert isinstance(self.channels_dim, int), "The channels dimension must be provided." ================================================ FILE: deepcompressor/app/llm/quant/reorder.py ================================================ # -*- coding: utf-8 -*- """LLM quantization channel reordering module.""" import gc import typing as tp import torch import torch.nn as nn import torch.utils from tqdm import tqdm from transformers import PreTrainedTokenizer from deepcompressor.calib.reorder import ChannelOrderCalibrator, ChannelReorderer from deepcompressor.data.cache import IOTensorsCache, TensorCache, TensorsCache from deepcompressor.quantizer.processor import Quantizer from deepcompressor.utils import tools from ..nn import LlmModelStruct, LlmTransformerBlockStruct from .config import LlmQuantConfig from .utils import get_needs_inputs_fn __all__ = ["reorder_llm"] def _extend_params_( params: list[tuple[nn.Parameter, int]], modules: list[nn.Linear | nn.Embedding, nn.LayerNorm], out_channels_dim: int | None = None, in_channels_dim: int | None = None, ) -> list[tuple[nn.Parameter, int]]: """Extend the parameters to be reordered.""" if out_channels_dim is not None: assert in_channels_dim is None else: assert in_channels_dim is not None for module in modules: if module is None: continue if out_channels_dim is not None: params.append((module.weight, out_channels_dim)) if hasattr(module, "bias") and module.bias is not None: params.append((module.bias, 0)) else: params.append((module.weight, in_channels_dim)) return params @torch.inference_mode() def reorder_llm_layer( # noqa: C901 layer: LlmTransformerBlockStruct, config: LlmQuantConfig, reorder_cache: dict[str, torch.Tensor], residual_calibrator: ChannelOrderCalibrator | None = None, layer_cache: dict[str, IOTensorsCache] | None = None, layer_kwargs: dict[str, tp.Any] | None = None, ) -> ChannelOrderCalibrator | None: """Calibrate the channel order in a layer. Args: layer (`LlmTransformerBlockStruct`): Large language model layer to be reordered. config (`LlmQuantConfig`): Quantization config. reorder_cache (`dict[str, torch.Tensor]`): Reorder indexes cache. residual_calibrator (`ChannelOrderCalibrator` or `None`, *optional*, defaults to `None`): Channel order calibrator for residual modules. layer_cache (`dict[str, IOTensorsCache]`, *optional*, defaults to `None`): Layer activations cache. layer_kwargs (`dict[str, tp.Any]`, *optional*, defaults to `None`): Layer keyword arguments. Returns: `ChannelOrderCalibrator` or `None`: Channel order calibrator for residual modules. """ logger = tools.logging.getLogger(f"{__name__}.Reorder") layer_cache = layer_cache or {} attn = layer.attn_struct qkv_proj, out_proj = attn.qkv_proj, attn.out_proj num_heads, num_head_repeats = attn.config.num_query_heads, attn.config.num_head_repeats # region reorder in attention module if config.reorder.dynamic and config.reorder.is_enabled_for(attn.qkv_proj_key): logger.debug("- Reordering %s", attn.qkv_proj_names) cache_key = attn.name if cache_key not in reorder_cache: index = ChannelOrderCalibrator( config=config.reorder, weight_quantizer=Quantizer(config.wgts, key=attn.qkv_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=attn.qkv_proj_key), develop_dtype=config.develop_dtype, ).calibrate( x_wgts=[m.weight for m in qkv_proj], x_acts=layer_cache[attn.v_proj_name].inputs if layer_cache else None, x_mods=qkv_proj, eval_inputs=layer_cache[cache_key].inputs if layer_cache else None, eval_module=attn.module, eval_kwargs=attn.filter_kwargs(layer_kwargs), reorder_wgts=[(m.weight, 1) for m in qkv_proj], reorder_ipt_mods=[(attn.module, -1, None)], reorder_opt_mods=[], ) reorder_cache[cache_key] = index.to(device=torch.device("cpu")) index = reorder_cache[cache_key] for proj in qkv_proj: index = index.to(proj.weight.device) proj.weight.data = proj.weight.data.index_select(1, index) ChannelReorderer(index, channels_dim=-1).as_hook().register(attn.module) gc.collect() torch.cuda.empty_cache() if config.reorder.is_enabled_for(attn.out_proj_key): logger.debug("- Reordering %s", attn.out_proj_name) cache_key = attn.out_proj_name if cache_key not in reorder_cache: index = ChannelOrderCalibrator( config=config.reorder, weight_quantizer=Quantizer(config.wgts, key=attn.out_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=attn.out_proj_key), num_heads=num_heads, num_head_repeats=num_head_repeats, develop_dtype=config.develop_dtype, ).calibrate( x_wgts=[out_proj.weight], x_acts=layer_cache[cache_key].inputs if layer_cache else None, x_mods=[out_proj], eval_inputs=layer_cache[cache_key].inputs if layer_cache else None, eval_module=out_proj, reorder_wgts=[(out_proj.weight, 1)], reorder_ipt_mods=[(out_proj, -1, None)], reorder_opt_mods=[], ) reorder_cache[cache_key] = index.to(device=torch.device("cpu")) index = reorder_cache[cache_key] index = index.to(out_proj.weight.device) out_proj.weight.data = out_proj.weight.data.index_select(1, index) v_proj = qkv_proj[2] if num_heads > 1 and num_head_repeats > 1: num_channels = index.numel() head_channels = num_channels // num_heads index = index.view(num_heads, head_channels) delta = torch.arange(0, num_channels, head_channels, device=index.device).view(num_heads, 1) index = index - delta num_v_channels = num_channels // num_head_repeats num_v_heads = num_heads // num_head_repeats index = index.view(num_v_heads, num_head_repeats, head_channels)[:, 0, :] delta = torch.arange(0, num_v_channels, head_channels, device=index.device).view(num_v_heads, 1) index = index + delta index = index.view(-1) v_proj.weight.data = v_proj.weight.data.index_select(0, index.to(v_proj.weight.device)) if v_proj.bias is not None: v_proj.bias.data = v_proj.bias.data[index.to(v_proj.bias.device)].contiguous() gc.collect() torch.cuda.empty_cache() # endregion ffn = layer.ffn_struct num_experts = ffn.config.num_experts up_proj, down_proj = ffn.up_projs, ffn.down_projs # region reorder in feed-forward module if config.reorder.dynamic and config.reorder.is_enabled_for(ffn.up_proj_key): logger.debug("- Reordering %s", ffn.name) cache_key = ffn.name if cache_key not in reorder_cache: index = ChannelOrderCalibrator( config=config.reorder, weight_quantizer=Quantizer(config.wgts, key=ffn.up_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=ffn.up_proj_key), develop_dtype=config.develop_dtype, ).calibrate( x_wgts=[m.weight for m in up_proj], x_acts=layer_cache[cache_key].inputs if layer_cache else None, x_mods=up_proj, eval_inputs=layer_cache[cache_key].inputs if layer_cache else None, eval_module=ffn.module, reorder_wgts=[(m.weight, 1) for m in up_proj], reorder_ipt_mods=[(ffn.module, -1, None)], reorder_opt_mods=[], ) reorder_cache[cache_key] = index.to(device=torch.device("cpu")) index = reorder_cache[cache_key] index = index.to(device=up_proj[0].weight.device) for fc in up_proj: fc.weight.data = fc.weight.data.index_select(1, index.to(fc.weight.device)) moe_gate = ffn.moe_gate if moe_gate is not None: moe_gate.weight.data = moe_gate.weight.data.index_select(1, index.to(moe_gate.weight.device)) ChannelReorderer(index, channels_dim=-1).as_hook().register(ffn.module) if config.reorder.is_enabled_for(ffn.down_proj_key): for expert_idx, (fc2_name, fc2) in enumerate(zip(ffn.down_proj_names, down_proj, strict=True)): logger.debug("- Reordering module %s", fc2_name) cache_key = fc2_name if cache_key not in reorder_cache: index = ChannelOrderCalibrator( config=config.reorder, weight_quantizer=Quantizer(config.wgts, key=ffn.down_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=ffn.down_proj_key), develop_dtype=config.develop_dtype, ).calibrate( x_wgts=[fc2.weight], x_acts=layer_cache[cache_key].inputs if layer_cache else None, x_mods=[fc2], eval_inputs=layer_cache[cache_key].inputs if layer_cache else None, eval_module=fc2, reorder_wgts=[(fc2.weight, 1)], reorder_ipt_mods=[(fc2, -1, None)], reorder_opt_mods=[], ) reorder_cache[cache_key] = index.to(device=torch.device("cpu")) index = reorder_cache[cache_key] index = index.to(fc2.weight.device) fc2.weight.data = fc2.weight.data.index_select(1, index.to(fc2.weight.device)) for fc1 in up_proj[expert_idx::num_experts]: fc1.weight.data = fc1.weight.data.index_select(0, index.to(fc1.weight.device)) if fc1.bias is not None: fc1.bias.data = fc1.bias.data[index.to(fc1.bias.device)].contiguous() gc.collect() torch.cuda.empty_cache() # endregion if residual_calibrator is not None and ( config.reorder.dynamic or not config.reorder.is_enabled_for(attn.qkv_proj_key) or not config.reorder.is_enabled_for(ffn.up_proj_key) ): residual_calibrator = None if residual_calibrator is not None and "residual" not in reorder_cache: residual_calibrator.update_channel_metrics( weights=[m.weight for m in qkv_proj], inputs=layer_cache[attn.v_proj_name].inputs if layer_cache else None, ) for expert_idx in range(num_experts): residual_calibrator.update_channel_metrics( weights=[m.weight for m in up_proj[expert_idx::num_experts]], inputs=layer_cache[ffn.up_proj_names[expert_idx]].inputs if layer_cache else None, ) return residual_calibrator @torch.inference_mode() def reorder_llm( # noqa: C901 model: nn.Module | LlmModelStruct, config: LlmQuantConfig, tokenizer: PreTrainedTokenizer | None = None, reorder_cache: dict[str, torch.Tensor] | None = None, ) -> dict[str, torch.Tensor]: """Quantize the large foundation model weights. Args: model (`nn.Module` or `LlmStruct`): Model to be reordered. config (`LlmQuantConfig`): Quantization config. tokenizer (`PreTrainedTokenizer` or `None`, *optional*, defaults to `None`): Tokenizer. reorder_cache (`dict[str, torch.Tensor]`, *optional*, defaults to `None`): Reorder indexes cache. Returns: `dict[str, torch.Tensor]`: Reorder indexes cache. """ if not isinstance(model, LlmModelStruct): model = LlmModelStruct.construct(model) assert isinstance(model, LlmModelStruct) logger = tools.logging.getLogger(f"{__name__}.Reorder") reorder_cache = {} if reorder_cache is None else reorder_cache residual_calibrator = None if "residual" not in reorder_cache and not config.reorder.dynamic and config.reorder.is_enabled_for("residual"): residual_calibrator = ChannelOrderCalibrator( config=config.reorder, weight_quantizer=Quantizer(config.wgts), input_quantizer=Quantizer(config.ipts, channels_dim=-1), develop_dtype=config.develop_dtype, ) with tools.logging.redirect_tqdm(): if not reorder_cache: calib_cache = config.calib.build_loader(tokenizer) for _, (layer, layer_cache, layer_kwargs) in tqdm( calib_cache.iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model=model, config=config), ), desc="reordering", leave=False, total=len(model.backbone_struct.layer_structs), dynamic_ncols=True, ): residual_calibrator = reorder_llm_layer( layer=layer, config=config, reorder_cache=reorder_cache, residual_calibrator=residual_calibrator, layer_cache=layer_cache, layer_kwargs=layer_kwargs, ) gc.collect() torch.cuda.empty_cache() else: calib_cache = None for layer in tqdm( model.backbone_struct.layer_structs, desc="reordering", leave=False, dynamic_ncols=True, ): residual_calibrator = reorder_llm_layer( layer=layer, config=config, reorder_cache=reorder_cache, residual_calibrator=residual_calibrator, ) if residual_calibrator is None: return reorder_cache # region add extra params to be reordered backbone = model.backbone_struct x_mods: list[nn.Linear] = [] reorder_wgts: list[tuple[nn.Parameter, int]] = [] for layer in backbone.layer_structs: x_mods.extend(layer.attn_struct.qkv_proj) x_mods.extend(layer.ffn_struct.up_projs) _extend_params_( reorder_wgts, [ layer.pre_attn_norm, layer.attn_struct.out_proj, layer.post_attn_norm, layer.pre_ffn_norm, *layer.ffn_struct.down_projs, layer.post_ffn_norm, ], out_channels_dim=0, ) _extend_params_(reorder_wgts, [layer.ffn_struct.moe_gate], in_channels_dim=1) need_reorder_head = model.head is not None if backbone.proj_in is not None: _extend_params_(reorder_wgts, [backbone.proj_in], out_channels_dim=0) _extend_params_(reorder_wgts, [backbone.embed_positions], out_channels_dim=1) else: _extend_params_(reorder_wgts, [backbone.embed_tokens, backbone.embed_positions], out_channels_dim=1) _extend_params_(reorder_wgts, [backbone.norm_in, backbone.norm_out], out_channels_dim=0) if backbone.proj_out is not None: _extend_params_(reorder_wgts, [backbone.proj_out], in_channels_dim=1) need_reorder_head = False logger.debug("- Reordering residual modules") _extend_params_(reorder_wgts, x_mods, in_channels_dim=1) if "residual" not in reorder_cache: calib_cache = calib_cache or config.calib.build_loader(tokenizer) residual_calibrator.init_channel_indexes() index = residual_calibrator.calibrate( x_wgts=[m.weight for m in x_mods], x_acts=None, eval_inputs=TensorsCache(TensorCache(calib_cache.dataset.data, channels_dim=-1, orig_device="cuda")), eval_module=model.backbone, x_mods=x_mods, reorder_wgts=reorder_wgts, reorder_ipt_mods=[], reorder_opt_mods=[(model.backbone, -1, None)] if need_reorder_head else [], ) reorder_cache["residual"] = index.to(device=torch.device("cpu")) del x_mods, residual_calibrator, calib_cache gc.collect() torch.cuda.empty_cache() index = reorder_cache["residual"] for wgt, dim in reorder_wgts: wgt.data = wgt.data.index_select(dim=dim, index=index.to(wgt.data.device)) if need_reorder_head and not model.config.tie_word_embeddings: model.head.weight.data = model.head.weight.data.index_select(dim=1, index=index.to(model.head.weight.device)) gc.collect() torch.cuda.empty_cache() return reorder_cache ================================================ FILE: deepcompressor/app/llm/quant/rotate.py ================================================ # -*- coding: utf-8 -*- """Large Language Model Rotation module.""" import gc import torch from tqdm import tqdm from transformers import PreTrainedModel from deepcompressor.calib.config import QuantRotationConfig from deepcompressor.calib.rotate import ( get_rotation_matrix, hadamard_in_channels, rotate_in_channels, rotate_out_channels, transform_norm_and_linear, ) from deepcompressor.utils import tools from ..nn import LlmModelStruct __all__ = ["rotate_llm"] @torch.inference_mode() def rotate_llm( # noqa: C901 model: PreTrainedModel | LlmModelStruct, /, config: QuantRotationConfig, rotation: torch.Tensor | None = None, ) -> torch.Tensor: """Rotate the weights of the large language model. Args: model (`PreTrainedModel` or `LlmStruct`): Model to be rotated. config (`QuantRotationConfig`): Rotation configuration. rotation (`torch.Tensor` or `None`, *optional*, defaults to `None`): Rotation matrix. Returns: `torch.Tensor`: The rotation matrix. """ if not isinstance(model, LlmModelStruct): model = LlmModelStruct.construct(model) assert isinstance(model, LlmModelStruct) devices: list[torch.device] = [] dtypes: list[torch.dtype] = [] linears: list[torch.nn.Linear] = [] size: float = 0 for m in model.module.modules(): if isinstance(m, torch.nn.Linear): devices.append(m.weight.device) dtypes.append(m.weight.dtype) linears.append(m) size += m.weight.numel() / 1e9 for linear in linears: linear.to(dtype=torch.float32, device="cpu" if size > 30 else None) for block in model.iter_transformer_block_structs(): assert not block.post_attn_norms, "Rotation is only supported for models without post-attention norms." assert not block.post_ffn_norm, "Rotation is only supported for models without post-FFN norms." logger = tools.logging.getLogger(f"{__name__}.Rotate") backbone = model.backbone_struct layers = backbone.layer_structs # region transform norm and linear if backbone.norm_in is None: if backbone.proj_in is None: prev_modules = [backbone.embed_tokens] prev_out_channels_dims = 1 if backbone.embed_positions is not None: prev_modules.append(backbone.embed_positions) elif backbone.embed_positions is None: prev_modules = [backbone.proj_in] prev_out_channels_dims = 0 else: prev_modules = [backbone.proj_in, backbone.embed_positions] prev_out_channels_dims = [0, 1] else: prev_modules = [backbone.norm_in] prev_out_channels_dims = 0 with tools.logging.redirect_tqdm(): for layer in tqdm(layers, desc="Transforming norm and linear", dynamic_ncols=True): logger.debug(f"- Transforming norm and linear in {layer.name}") transform_norm_and_linear( parent=layer.module, norm_name=layer.pre_attn_norm_rname, next_modules=layer.attn_struct.qkv_proj, prev_modules=prev_modules, prev_out_channels_dims=prev_out_channels_dims, ) prev_modules = [layer.attn_struct.out_proj] prev_out_channels_dims = 0 transform_norm_and_linear( parent=layer.module, norm_name=layer.pre_ffn_norm_rname, next_modules=layer.ffn_struct.up_projs + ([layer.ffn_struct.moe_gate] if layer.ffn_struct.moe_gate is not None else []), prev_modules=prev_modules, prev_out_channels_dims=prev_out_channels_dims, ) prev_modules = layer.ffn_struct.down_projs prev_out_channels_dims = 0 gc.collect() torch.cuda.empty_cache() logger.debug(f"- Transforming {backbone.norm_out_name}") transform_norm_and_linear( parent=backbone.module, norm_name=backbone.norm_out_rname, next_modules=[model.head if backbone.proj_out is None else backbone.proj_out], prev_modules=prev_modules, prev_out_channels_dims=prev_out_channels_dims, ) # endregion if rotation is None: rotation = get_rotation_matrix(backbone.config.num_channels, random=config.random) # region rotate embeddings if backbone.proj_in is None: logger.debug(f"- Rotating {backbone.embed_tokens_name}") weight = backbone.embed_tokens.weight rotation = rotation.to(weight.device) rotate_in_channels(weight, rotation=rotation) else: logger.debug(f"- Rotating {backbone.proj_in_name} (out)") weight = backbone.proj_in.weight rotation = rotation.to(weight.device) rotate_out_channels(weight, rotation=rotation, bias=backbone.proj_in.bias) if backbone.embed_positions is not None: logger.debug(f"- Rotating {backbone.embed_positions_name}") weight = backbone.embed_positions.weight rotation = rotation.to(weight.device) rotate_in_channels(weight, rotation=rotation) # endregion down_proj = [] # region rotate backbone layers head_rotation = get_rotation_matrix(model.config.num_head_channels, random=config.random) with tools.logging.redirect_tqdm(): for layer in tqdm(layers, desc="Rotating backbone layers", dynamic_ncols=True): logger.debug(f"- Rotating {layer.name}") tools.logging.Formatter.indent_inc() attn, ffn = layer.attn_struct, layer.ffn_struct for proj_name, proj in zip(attn.qkv_proj_names, attn.qkv_proj, strict=True): logger.debug(f"- Rotating {proj_name} (in)") rotation = rotation.to(proj.weight.device) rotate_in_channels(proj.weight, rotation=rotation) logger.debug(f"- Rotating {attn.out_proj_name} (out)") rotation = rotation.to(attn.out_proj.weight.device) rotate_out_channels(attn.out_proj.weight, rotation=rotation, bias=attn.out_proj.bias) if attn.out_proj_key in config.transforms: logger.debug(f"- Rotating {attn.v_proj_name} (out)") rotate_out_channels(attn.v_proj.weight, rotation=head_rotation, bias=attn.v_proj.bias) logger.debug(f"- Rotating {attn.o_proj_name} (in)") rotate_in_channels(attn.o_proj.weight, rotation=head_rotation) for fc_name, fc in zip(ffn.up_proj_names, ffn.up_projs, strict=True): logger.debug(f"- Rotating {fc_name} (in)") rotation = rotation.to(fc.weight.device) rotate_in_channels(fc.weight, rotation=rotation) if ffn.moe_gate is not None: logger.debug(f"- Rotating {ffn.moe_gate_name} (in)") rotation = rotation.to(ffn.moe_gate.weight.device) rotate_in_channels(ffn.moe_gate.weight, rotation=rotation) for fc_name, fc in zip(ffn.down_proj_names, ffn.down_projs, strict=True): logger.debug(f"- Rotating {fc_name} (out)") rotation = rotation.to(fc.weight.device) rotate_out_channels(fc.weight, rotation=rotation, bias=fc.bias) if ffn.down_proj_key in config.transforms: down_proj.extend(ffn.down_projs) tools.logging.Formatter.indent_dec() gc.collect() torch.cuda.empty_cache() if backbone.proj_out is not None: logger.debug(f"- Rotating {backbone.proj_out_name} (in)") weight = backbone.proj_out.weight rotation = rotation.to(weight.device) rotate_in_channels(weight, rotation=rotation) logger.debug(f"- Rotating {backbone.proj_out_name} (out)") rotation = rotation.to(weight.device) rotate_out_channels(weight, rotation=rotation, bias=backbone.proj_out.bias) # endregion if down_proj: logger.debug(f"- Applying Hadamard transform on {backbone.name}.down_proj (in)") hadamard_in_channels(down_proj) if backbone.proj_out is not None: logger.debug(f"- Rotating {backbone.proj_out_name} (in)") weight = backbone.proj_out.weight else: logger.debug(f"- Rotating {model.head_name} (in)") weight = model.head.weight rotation = rotation.to(weight.device) rotate_in_channels(weight, rotation=rotation) for device, dtype, linear in zip(devices, dtypes, linears, strict=True): linear.to(device=device, dtype=dtype) return rotation.cpu() ================================================ FILE: deepcompressor/app/llm/quant/smooth.py ================================================ # -*- coding: utf-8 -*- """LLM smooth quantization module.""" import typing as tp import torch import torch.nn as nn from tqdm import tqdm from transformers import PreTrainedTokenizer from deepcompressor.calib.smooth import ActivationSmoother, smooth_attention, smooth_linear_modules from deepcompressor.data.cache import IOTensorsCache from deepcompressor.quantizer.processor import Quantizer from deepcompressor.utils import tools from ..nn.struct import LlmModelStruct, LlmTransformerBlockStruct from .config import LlmQuantConfig from .utils import get_needs_inputs_fn, get_needs_outputs_fn __all__ = ["smooth_llm"] @torch.inference_mode() def smooth_llm_layer( # noqa: C901 layer: LlmTransformerBlockStruct, config: LlmQuantConfig, smooth_cache: dict[str, torch.Tensor], layer_cache: dict[str, IOTensorsCache] | None = None, layer_kwargs: dict[str, tp.Any] | None = None, ) -> None: """Smooth a large language model layer. Args: layer (`LlmTransformerBlockStruct`): Large language model layer to smooth. config (`LlmQuantConfig`): Quantization configuration. smooth_cache (`dict[str, torch.Tensor]`): Smoothing scale caches. layer_caches (`dict[str, IOTensorsCache]` or `None`, *optional*, defaults to `None`): Activation caches of the layer. layer_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): Keyword arguments for the layer. """ logger = tools.logging.getLogger(f"{__name__}.SmoothQuant") logger.debug("- Smoothing %s", layer.name) tools.logging.Formatter.indent_inc() layer_cache = layer_cache or {} layer_kwargs = layer_kwargs or {} attn, ffn = layer.attn_struct, layer.ffn_struct # region attention qk needs_quant = config.enabled_opts needs_quant = needs_quant and (config.opts.is_enabled_for(attn.q_key) or config.opts.is_enabled_for(attn.k_key)) if config.smooth.enabled_attn and needs_quant: logger.debug("- %s.%s", attn.name, attn.k_rkey) cache_key = f"{attn.name}.{attn.k_rkey}" smooth_cache[cache_key] = smooth_attention( k_proj=attn.k_proj, q_proj=attn.q_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.attn, query_quantizer=Quantizer(config.opts, channels_dim=-1, key=attn.q_key), key_quantizer=Quantizer(config.opts, channels_dim=-1, key=attn.k_key), queries=layer_cache[attn.q_name].outputs if layer_cache else None, keys=layer_cache[attn.k_name].outputs if layer_cache else None, attn_q=attn.q, attn_k=attn.k, eval_inputs=layer_cache[attn.name].inputs if layer_cache else None, eval_module=attn, eval_kwargs=attn.filter_kwargs(layer_kwargs), num_heads=attn.config.num_query_heads, num_head_repeats=attn.config.num_head_repeats, with_rope=attn.config.with_rope, develop_dtype=config.develop_dtype, ) # endregion # region qkv projection needs_quant = config.enabled_ipts and config.ipts.is_enabled_for(attn.qkv_proj_key) needs_quant = needs_quant or (config.enabled_wgts and config.wgts.is_enabled_for(attn.qkv_proj_key)) if config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(attn.qkv_proj_key) and needs_quant: logger.debug("- %s.%s", attn.name, attn.qkv_proj_rkey) cache_key = attn.v_proj_name smooth_cache[cache_key] = smooth_linear_modules( attn.parent.pre_attn_norm, attn.qkv_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config.wgts, key=attn.qkv_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=attn.qkv_proj_key), inputs=layer_cache[attn.q_proj_name].inputs if layer_cache else None, eval_inputs=layer_cache[attn.name].inputs if layer_cache else None, eval_module=attn, eval_kwargs=attn.filter_kwargs(layer_kwargs), develop_dtype=config.develop_dtype, ) if not attn.parent.pre_attn_norm: ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(attn.qkv_proj) # endregion # region output projection needs_quant = config.enabled_ipts and config.ipts.is_enabled_for(attn.out_proj_key) needs_quant = needs_quant or (config.enabled_wgts and config.wgts.is_enabled_for(attn.out_proj_key)) if config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(attn.out_proj_key) and needs_quant: logger.debug("- %s.%s", attn.name, attn.out_proj_rkey) cache_key = attn.o_proj_name smooth_cache[cache_key] = smooth_linear_modules( None if attn.config.linear_attn else attn.v_proj, attn.o_proj, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config.wgts, key=attn.out_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=attn.out_proj_key), inputs=layer_cache[cache_key].inputs if layer_cache else None, eval_inputs=layer_cache[cache_key].inputs if layer_cache else None, eval_module=attn.o_proj, num_heads=attn.config.num_query_heads, num_head_repeats=attn.config.num_head_repeats, develop_dtype=config.develop_dtype, ) if attn.config.linear_attn: ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(attn.o_proj) # endregion num_experts = ffn.config.num_experts # region up projection needs_quant = config.enabled_ipts and config.ipts.is_enabled_for(ffn.up_proj_key) needs_quant = needs_quant or (config.enabled_wgts and config.wgts.is_enabled_for(ffn.up_proj_key)) if config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(ffn.up_proj_key) and needs_quant: logger.debug("- %s.%s", ffn.name, ffn.up_proj_rkey) cache_key = ffn.name smooth_cache[cache_key] = smooth_linear_modules( ffn.parent.pre_ffn_norm, ffn.up_projs, scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config.wgts, key=ffn.up_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=ffn.up_proj_key), inputs=layer_cache[ffn.name].inputs if layer_cache else None, eval_inputs=layer_cache[ffn.name].inputs if layer_cache else None, eval_module=ffn, extra_modules=[ffn.moe_gate] if num_experts > 1 else None, develop_dtype=config.develop_dtype, ) if not ffn.parent.pre_ffn_norm: hook = ActivationSmoother(smooth_cache[cache_key], channels_dim=-1).as_hook().register(ffn.up_projs) if num_experts > 1: hook.register(ffn.moe_gate) # endregion # region down projection needs_quant = config.enabled_ipts and config.ipts.is_enabled_for(ffn.down_proj_key) needs_quant = needs_quant or (config.enabled_wgts and config.wgts.is_enabled_for(ffn.down_proj_key)) if config.smooth.enabled_proj and config.smooth.proj.is_enabled_for(ffn.down_proj_key) and needs_quant: for expert_idx in range(num_experts): logger.debug("- %s.%s", ffn.expert_names[expert_idx], ffn.down_proj_rkey) cache_key = ffn.down_proj_names[expert_idx] smooth_cache[cache_key] = smooth_linear_modules( ffn.up_projs[expert_idx], ffn.down_projs[expert_idx], scale=smooth_cache.get(cache_key, None), config=config.smooth.proj, weight_quantizer=Quantizer(config.wgts, key=ffn.down_proj_key), input_quantizer=Quantizer(config.ipts, channels_dim=-1, key=ffn.down_proj_key), inputs=layer_cache[ffn.down_proj_names[expert_idx]].inputs if layer_cache else None, eval_inputs=layer_cache[ffn.down_proj_names[expert_idx]].inputs if layer_cache else None, eval_module=ffn.down_projs[expert_idx], develop_dtype=config.develop_dtype, ) # endregion tools.logging.Formatter.indent_dec() @torch.inference_mode() def smooth_llm( model: nn.Module | LlmModelStruct, /, config: LlmQuantConfig, tokenizer: PreTrainedTokenizer | None = None, smooth_cache: dict[str, torch.Tensor] | None = None, ) -> dict[str, torch.Tensor]: """Smooth the large language model. Args: model (`nn.Module` or `LlmStruct`): Model to be smoothed. config (`LlmQuantConfig`): Quantization configuration. tokenizer (`PreTrainedTokenizer`, *optional*, defaults to `None`): Tokenizer. smooth_cache (`dict[str, torch.Tensor]`, *optional*, defaults to `None`): Smoothing scale caches. Returns: `dict[str, torch.Tensor]`: Dictionary mapping module names to smoothing scales. """ if not isinstance(model, LlmModelStruct): model = LlmModelStruct.construct(model) assert isinstance(model, LlmModelStruct) smooth_cache = smooth_cache or {} if not smooth_cache: with tools.logging.redirect_tqdm(): for _, (layer, layer_cache, layer_kwargs) in tqdm( config.calib.build_loader(tokenizer).iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model=model, config=config), needs_outputs_fn=get_needs_outputs_fn(model=model, config=config), ), desc="smoothing", leave=False, total=len(model.backbone_struct.layer_structs), dynamic_ncols=True, ): smooth_llm_layer( layer=layer, config=config, smooth_cache=smooth_cache, layer_cache=layer_cache, layer_kwargs=layer_kwargs, ) else: for layer in model.backbone_struct.layer_structs: smooth_llm_layer(layer=layer, config=config, smooth_cache=smooth_cache) return smooth_cache ================================================ FILE: deepcompressor/app/llm/quant/utils.py ================================================ # -*- coding: utf-8 -*- """LLM quantization utils module.""" import typing as tp import torch.nn as nn from ..nn.struct import LlmModelStruct from .quantizer.config import LlmModuleQuantizerConfig __all__ = ["get_needs_inputs_fn", "get_needs_outputs_fn"] def get_needs_inputs_fn(model: LlmModelStruct, config: LlmModuleQuantizerConfig) -> tp.Callable[[str, nn.Module], bool]: """Get function that checks if the module needs to cache the inputs. Args: model (`LlmStruct`): Model struct. config (`LlmModuleQuantizerConfig`): Module quantization config. Returns: `Callable[[str, nn.Module], bool]`: Function to check if the module needs to cache the inputs. """ needs_inputs_names = set() example_layer = model.backbone_struct.layer_structs[0] attn, ffn = example_layer.attn_struct, example_layer.ffn_struct if (config.enabled_wgts and config.wgts.is_enabled_for(attn.qkv_proj_key)) or ( config.enabled_ipts and config.ipts.is_enabled_for(attn.qkv_proj_key) ): needs_inputs_names.add(attn.rname) needs_inputs_names.add(attn.v_proj_rname) if (config.enabled_wgts and config.wgts.is_enabled_for(attn.out_proj_key)) or ( config.enabled_ipts and config.ipts.is_enabled_for(attn.out_proj_key) ): needs_inputs_names.add(attn.o_proj_rname) if (config.enabled_wgts and config.wgts.is_enabled_for(ffn.up_proj_key)) or ( config.enabled_ipts and config.ipts.is_enabled_for(ffn.up_proj_key) ): needs_inputs_names.add(ffn.rname) needs_inputs_names.add(ffn.up_proj_rnames[0]) if (config.enabled_wgts and config.wgts.is_enabled_for(ffn.down_proj_key)) or ( config.enabled_ipts and config.ipts.is_enabled_for(ffn.down_proj_key) ): needs_inputs_names.add(ffn.down_proj_rnames[0]) if config.enabled_opts: needs_inputs_names.add(attn.rname) needs_inputs_names = tuple(needs_inputs_names) def needs_inputs(name: str, module: nn.Module) -> bool: return name.endswith(needs_inputs_names) return needs_inputs def get_needs_outputs_fn( model: LlmModelStruct, config: LlmModuleQuantizerConfig ) -> tp.Callable[[str, nn.Module], bool]: """Get function that checks if the module needs to cache the outputs. Args: model (`LlmStruct`): Model struct. config (`LlmModuleQuantizerConfig`): Module quantization config. Returns: `Callable[[str, nn.Module], bool]`: Function to check if the module needs to cache the outputs. """ attn = model.backbone_struct.layer_structs[0].attn_struct needs_outputs_names = set() if config.enabled_opts: needs_outputs_names.add(attn.q_rname) needs_outputs_names.add(attn.k_rname) needs_outputs_names.add(attn.v_rname) needs_outputs_names = tuple(needs_outputs_names) def needs_outputs(name: str, module: nn.Module) -> bool: return name.endswith(needs_outputs_names) return needs_outputs ================================================ FILE: deepcompressor/app/llm/quant/weight.py ================================================ # -*- coding: utf-8 -*- """LLM weight quantization calibration module.""" import gc import typing as tp import torch import torch.nn as nn from tqdm import tqdm from transformers import PreTrainedTokenizer from deepcompressor.data.cache import IOTensorsCache from deepcompressor.data.zero import ZeroPointDomain from deepcompressor.utils import tools from ..nn.struct import LlmModelStruct, LlmSelfAttentionStruct, LlmTransformerBlockStruct from .config import LlmQuantConfig from .quantizer import LlmWeightQuantizer from .utils import get_needs_inputs_fn __all__ = ["quantize_llm_weights"] @torch.inference_mode() def quantize_llm_layer_weights( # noqa: C901 layer: LlmTransformerBlockStruct, config: LlmQuantConfig, quantizer_state_dict: dict[str, tp.Any], layer_cache: dict[str, IOTensorsCache] | None = None, layer_kwargs: dict[str, tp.Any] | None = None, return_with_scale_state_dict: bool = False, ) -> dict[str, torch.Tensor | float | None]: """Calibrate the weight quantization ranges of modules in a layer. Args: layer (`LlmTransformerBlockStruct`): Layer. config (`LlmQuantConfig`): Quantization config. quantizer_state_dict (`dict[str, Any]`): Weight quantizer. layer_cache (`dict[str, IOTensorsCache]` or `None`, *optional*, defaults to `None`): Layer activations cache. layer_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): Keyword arguments for the layer. return_with_scale_state_dict (bool, *optional*, defaults to `False`): Whether to return with scale state dict. Returns: `dict[str, torch.Tensor | float | None]`: Scale state dict. """ logger = tools.logging.getLogger(f"{__name__}.WeightQuant") logger.debug("- Quantizing layer %s", layer.name) tools.logging.Formatter.indent_inc() layer_cache = layer_cache or {} layer_kwargs = layer_kwargs or {} for module_key, module_name, module, parent, field_name in layer.named_key_modules(): assert isinstance(module, nn.Linear) if field_name in ("q_proj", "k_proj"): assert isinstance(parent, LlmSelfAttentionStruct) eval_name, eval_module, eval_kwargs = parent.name, parent.module, parent.filter_kwargs(layer_kwargs) else: eval_name, eval_module, eval_kwargs = module_name, module, None quantizer = LlmWeightQuantizer(config.wgts, develop_dtype=config.develop_dtype, key=module_key) if quantizer.is_enabled(): if module_name not in quantizer_state_dict: logger.debug("- Calibrating %s.weight", module_name) quantizer.calibrate_dynamic_range( module=module, inputs=layer_cache[module_name].inputs if layer_cache else None, eval_inputs=layer_cache[eval_name].inputs if layer_cache else None, eval_module=eval_module, eval_kwargs=eval_kwargs, ) quantizer_state_dict[module_name] = quantizer.state_dict() gc.collect() torch.cuda.empty_cache() scale_state_dict: dict[str, torch.Tensor | float | None] = {} for module_key, module_name, module, _, _ in layer.named_key_modules(): assert isinstance(module, nn.Linear) quantizer = LlmWeightQuantizer(config.wgts, develop_dtype=config.develop_dtype, key=module_key) param_name = f"{module_name}.weight" if quantizer.is_enabled(): logger.debug("- Quantizing %s", param_name) quantizer.load_state_dict(quantizer_state_dict[module_name], device=module.weight.device) result = quantizer.quantize( module.weight.data, inputs=layer_cache[module_name].inputs.front() if layer_cache else None, return_with_dequant=True, return_with_quant=return_with_scale_state_dict, ) module.weight.data = result.data if return_with_scale_state_dict: scale_state_dict.update(result.scale.state_dict(f"{param_name}.scale")) zero_name = "scaled_zero" if config.wgts.zero_domain is ZeroPointDomain.PostScale else "zero" if isinstance(result.zero, torch.Tensor): scale_state_dict[f"{param_name}.{zero_name}"] = result.zero.to("cpu") else: scale_state_dict[f"{param_name}.{zero_name}"] = result.zero del result gc.collect() torch.cuda.empty_cache() tools.logging.Formatter.indent_dec() return scale_state_dict @torch.inference_mode() def quantize_llm_weights( model: nn.Module | LlmModelStruct, config: LlmQuantConfig, tokenizer: PreTrainedTokenizer | None = None, quantizer_state_dict: dict[str, tp.Any] | None = None, return_with_scale_state_dict: bool = False, ) -> tuple[dict[str, tp.Any], dict[str, torch.Tensor | float | None]]: """Quantize the large language model weights. Args: model (`nn.Module` or `LlmStruct`): Model to be quantized. config (`LlmQuantConfig`): Quantization configuration. tokenizer (`PreTrainedTokenizer`, *optional*, defaults to `None`): Tokenizer. quantizer_state_dict (`dict[str, Any]`, *optional*, defaults to `None`): Weight quantizer state dict. return_with_scale_state_dict (bool, *optional*, defaults to `False`): Whether to return with scale state dict. Returns: `tuple[dict[str, Any], dict[str, torch.Tensor | float | None]`: Weight quantizer cache and scale state dict. """ if not isinstance(model, LlmModelStruct): model = LlmModelStruct.construct(model) assert isinstance(model, LlmModelStruct) quantizer_state_dict = quantizer_state_dict or {} scale_state_dict: dict[str, torch.Tensor | float | None] = {} with tools.logging.redirect_tqdm(): if config.wgts.enabled_gptq or (not quantizer_state_dict and config.wgts.needs_calib_data): for _, (layer, layer_cache, layer_kwargs) in tqdm( config.calib.build_loader(tokenizer).iter_layer_activations( model, needs_inputs_fn=get_needs_inputs_fn(model=model, config=config), ), desc="quantizing weights", leave=False, total=len(model.backbone_struct.layer_structs), dynamic_ncols=True, ): scale_state_dict.update( quantize_llm_layer_weights( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, layer_cache=layer_cache, layer_kwargs=layer_kwargs, return_with_scale_state_dict=return_with_scale_state_dict, ) ) else: for layer in tqdm( model.backbone_struct.layer_structs, desc="quantizing weights", leave=False, dynamic_ncols=True ): scale_state_dict.update( quantize_llm_layer_weights( layer=layer, config=config, quantizer_state_dict=quantizer_state_dict, return_with_scale_state_dict=return_with_scale_state_dict, ) ) return quantizer_state_dict, scale_state_dict ================================================ FILE: deepcompressor/backend/__init__.py ================================================ ================================================ FILE: deepcompressor/backend/nunchaku/__init__.py ================================================ ================================================ FILE: deepcompressor/backend/nunchaku/convert.py ================================================ """Converts a DeepCompressor state dict to a Nunchaku state dict.""" import argparse import os import safetensors.torch import torch import tqdm from .utils import convert_to_nunchaku_w4x4y16_linear_weight, convert_to_nunchaku_w4x16_linear_weight def convert_to_nunchaku_w4x4y16_linear_state_dict( weight: torch.Tensor, scale: torch.Tensor, bias: torch.Tensor | None = None, smooth: torch.Tensor | None = None, lora: tuple[torch.Tensor, torch.Tensor] | None = None, shift: torch.Tensor | None = None, smooth_fused: bool = False, float_point: bool = False, subscale: torch.Tensor | None = None, ) -> dict[str, torch.Tensor]: if weight.ndim > 2: # pointwise conv assert weight.numel() == weight.shape[0] * weight.shape[1] weight = weight.view(weight.shape[0], weight.shape[1]) if scale.numel() > 1: assert scale.ndim == weight.ndim * 2 assert scale.numel() == scale.shape[0] * scale.shape[2] scale = scale.view(scale.shape[0], 1, scale.shape[2], 1) scale_key = "wcscales" if scale.shape[2] == 1 else "wscales" else: scale_key = "wtscale" if subscale is None: subscale_key = "" else: assert subscale.ndim == weight.ndim * 2 assert subscale.numel() == subscale.shape[0] * subscale.shape[2] assert subscale.numel() > 1 subscale = subscale.view(subscale.shape[0], 1, subscale.shape[2], 1) subscale_key = "wcscales" if subscale.shape[2] == 1 else "wscales" if lora is not None and (smooth is not None or shift is not None): # unsmooth lora down projection dtype = weight.dtype lora_down, lora_up = lora lora_down = lora_down.to(dtype=torch.float64) if smooth is not None and not smooth_fused: lora_down = lora_down.div_(smooth.to(torch.float64).unsqueeze(0)) if shift is not None: bias = torch.zeros([lora_up.shape[0]], dtype=torch.float64) if bias is None else bias.to(torch.float64) if shift.numel() == 1: shift = shift.view(1, 1).expand(lora_down.shape[1], 1).to(torch.float64) else: shift = shift.view(-1, 1).to(torch.float64) bias = bias.add_((lora_up.to(dtype=torch.float64) @ lora_down @ shift).view(-1)) bias = bias.to(dtype=dtype) lora = (lora_down.to(dtype=dtype), lora_up) weight, scale, bias, smooth, lora, subscale = convert_to_nunchaku_w4x4y16_linear_weight( weight, scale=scale, bias=bias, smooth=smooth, lora=lora, float_point=float_point, subscale=subscale ) state_dict: dict[str, torch.Tensor] = {} state_dict["qweight"] = weight state_dict[scale_key] = scale if subscale is not None: state_dict[subscale_key] = subscale state_dict["bias"] = bias state_dict["smooth_orig"] = smooth state_dict["smooth"] = torch.ones_like(smooth) if smooth_fused else smooth.clone() if lora is not None: state_dict["lora_down"] = lora[0] state_dict["lora_up"] = lora[1] return state_dict def convert_to_nunchaku_w4x16_adanorm_single_state_dict( weight: torch.Tensor, scale: torch.Tensor, bias: torch.Tensor, ) -> dict[str, torch.Tensor]: weight, scale, zero, bias = convert_to_nunchaku_w4x16_linear_weight( weight, scale=scale, bias=bias, adanorm_splits=3 ) state_dict: dict[str, torch.Tensor] = {} state_dict = {} state_dict["qweight"] = weight state_dict["wscales"] = scale state_dict["wzeros"] = zero state_dict["bias"] = bias return state_dict def convert_to_nunchaku_w4x16_adanorm_zero_state_dict( weight: torch.Tensor, scale: torch.Tensor, bias: torch.Tensor, ) -> dict[str, torch.Tensor]: weight, scale, zero, bias = convert_to_nunchaku_w4x16_linear_weight( weight, scale=scale, bias=bias, adanorm_splits=6 ) state_dict: dict[str, torch.Tensor] = {} state_dict = {} state_dict["qweight"] = weight state_dict["wscales"] = scale state_dict["wzeros"] = zero state_dict["bias"] = bias return state_dict def update_state_dict( lhs: dict[str, torch.Tensor], rhs: dict[str, torch.Tensor], prefix: str = "" ) -> dict[str, torch.Tensor]: for rkey, value in rhs.items(): lkey = f"{prefix}.{rkey}" if prefix else rkey assert lkey not in lhs, f"Key {lkey} already exists in the state dict." lhs[lkey] = value return lhs def convert_to_nunchaku_transformer_block_state_dict( state_dict: dict[str, torch.Tensor], scale_dict: dict[str, torch.Tensor], smooth_dict: dict[str, torch.Tensor], branch_dict: dict[str, torch.Tensor], block_name: str, local_name_map: dict[str, str | list[str]], smooth_name_map: dict[str, str], branch_name_map: dict[str, str], convert_map: dict[str, str], float_point: bool = False, ) -> dict[str, torch.Tensor]: print(f"Converting block {block_name}...") converted: dict[str, torch.Tensor] = {} candidates: dict[str, torch.Tensor] = { param_name: param for param_name, param in state_dict.items() if param_name.startswith(block_name) } for converted_local_name, candidate_local_names in tqdm.tqdm( local_name_map.items(), desc=f"Converting {block_name}", dynamic_ncols=True ): if isinstance(candidate_local_names, str): candidate_local_names = [candidate_local_names] candidate_names = [f"{block_name}.{candidate_local_name}" for candidate_local_name in candidate_local_names] weight = [candidates[f"{candidate_name}.weight"] for candidate_name in candidate_names] bias = [candidates.get(f"{candidate_name}.bias", None) for candidate_name in candidate_names] scale = [scale_dict.get(f"{candidate_name}.weight.scale.0", None) for candidate_name in candidate_names] subscale = [scale_dict.get(f"{candidate_name}.weight.scale.1", None) for candidate_name in candidate_names] if len(weight) > 1: bias = None if all(b is None for b in bias) else torch.concat(bias, dim=0) if all(s is None for s in scale): scale = None else: if scale[0].numel() == 1: # switch from per-tensor to per-channel scale assert all(s.numel() == 1 for s in scale) scale = torch.concat( [ s.view(-1).expand(weight[i].shape[0]).reshape(weight[i].shape[0], 1, 1, 1) for i, s in enumerate(scale) ], dim=0, ) else: scale = torch.concat(scale, dim=0) subscale = None if all(s is None for s in subscale) else torch.concat(subscale, dim=0) weight = torch.concat(weight, dim=0) else: weight, bias, scale, subscale = weight[0], bias[0], scale[0], subscale[0] smooth = smooth_dict.get(f"{block_name}.{smooth_name_map.get(converted_local_name, '')}", None) branch = branch_dict.get(f"{block_name}.{branch_name_map.get(converted_local_name, '')}", None) if branch is not None: branch = (branch["a.weight"], branch["b.weight"]) if scale is None: assert smooth is None and branch is None and subscale is None print(f" - Copying {block_name} weights of {candidate_local_names} as {converted_local_name}.weight") converted[f"{converted_local_name}.weight"] = weight.clone().cpu() if bias is not None: print(f" - Copying {block_name} biases of {candidate_local_names} as {converted_local_name}.bias") converted[f"{converted_local_name}.bias"] = bias.clone().cpu() continue if convert_map[converted_local_name] == "adanorm_single": print(f" - Converting {block_name} weights of {candidate_local_names} to {converted_local_name}.") update_state_dict( converted, convert_to_nunchaku_w4x16_adanorm_single_state_dict(weight=weight, scale=scale, bias=bias), prefix=converted_local_name, ) elif convert_map[converted_local_name] == "adanorm_zero": print(f" - Converting {block_name} weights of {candidate_local_names} to {converted_local_name}.") update_state_dict( converted, convert_to_nunchaku_w4x16_adanorm_zero_state_dict(weight=weight, scale=scale, bias=bias), prefix=converted_local_name, ) elif convert_map[converted_local_name] == "linear": smooth_fused = "out_proj" in converted_local_name and smooth_dict.get("proj.fuse_when_possible", True) shift = [candidates.get(f"{candidate_name[:-7]}.shift", None) for candidate_name in candidate_names] assert all(s == shift[0] for s in shift) shift = shift[0] print( f" - Converting {block_name} weights of {candidate_local_names} to {converted_local_name}." f" (smooth_fused={smooth_fused}, shifted={shift is not None}, float_point={float_point})" ) update_state_dict( converted, convert_to_nunchaku_w4x4y16_linear_state_dict( weight=weight, scale=scale, bias=bias, smooth=smooth, lora=branch, shift=shift, smooth_fused=smooth_fused, float_point=float_point, subscale=subscale, ), prefix=converted_local_name, ) else: raise NotImplementedError(f"Conversion of {convert_map[converted_local_name]} is not implemented.") return converted def convert_to_nunchaku_flux_single_transformer_block_state_dict( state_dict: dict[str, torch.Tensor], scale_dict: dict[str, torch.Tensor], smooth_dict: dict[str, torch.Tensor], branch_dict: dict[str, torch.Tensor], block_name: str, float_point: bool = False, ) -> dict[str, torch.Tensor]: down_proj_local_name = "proj_out.linears.1.linear" if f"{block_name}.{down_proj_local_name}.weight" not in state_dict: down_proj_local_name = "proj_out.linears.1" assert f"{block_name}.{down_proj_local_name}.weight" in state_dict return convert_to_nunchaku_transformer_block_state_dict( state_dict=state_dict, scale_dict=scale_dict, smooth_dict=smooth_dict, branch_dict=branch_dict, block_name=block_name, local_name_map={ "norm.linear": "norm.linear", "qkv_proj": ["attn.to_q", "attn.to_k", "attn.to_v"], "norm_q": "attn.norm_q", "norm_k": "attn.norm_k", "out_proj": "proj_out.linears.0", "mlp_fc1": "proj_mlp", "mlp_fc2": down_proj_local_name, }, smooth_name_map={ "qkv_proj": "attn.to_q", "out_proj": "proj_out.linears.0", "mlp_fc1": "attn.to_q", "mlp_fc2": down_proj_local_name, }, branch_name_map={ "qkv_proj": "attn.to_q", "out_proj": "proj_out.linears.0", "mlp_fc1": "proj_mlp", "mlp_fc2": down_proj_local_name, }, convert_map={ "norm.linear": "adanorm_single", "qkv_proj": "linear", "out_proj": "linear", "mlp_fc1": "linear", "mlp_fc2": "linear", }, float_point=float_point, ) def convert_to_nunchaku_flux_transformer_block_state_dict( state_dict: dict[str, torch.Tensor], scale_dict: dict[str, torch.Tensor], smooth_dict: dict[str, torch.Tensor], branch_dict: dict[str, torch.Tensor], block_name: str, float_point: bool = False, ) -> dict[str, torch.Tensor]: down_proj_local_name = "ff.net.2.linear" if f"{block_name}.{down_proj_local_name}.weight" not in state_dict: down_proj_local_name = "ff.net.2" assert f"{block_name}.{down_proj_local_name}.weight" in state_dict context_down_proj_local_name = "ff_context.net.2.linear" if f"{block_name}.{context_down_proj_local_name}.weight" not in state_dict: context_down_proj_local_name = "ff_context.net.2" assert f"{block_name}.{context_down_proj_local_name}.weight" in state_dict return convert_to_nunchaku_transformer_block_state_dict( state_dict=state_dict, scale_dict=scale_dict, smooth_dict=smooth_dict, branch_dict=branch_dict, block_name=block_name, local_name_map={ "norm1.linear": "norm1.linear", "norm1_context.linear": "norm1_context.linear", "qkv_proj": ["attn.to_q", "attn.to_k", "attn.to_v"], "qkv_proj_context": ["attn.add_q_proj", "attn.add_k_proj", "attn.add_v_proj"], "norm_q": "attn.norm_q", "norm_k": "attn.norm_k", "norm_added_q": "attn.norm_added_q", "norm_added_k": "attn.norm_added_k", "out_proj": "attn.to_out.0", "out_proj_context": "attn.to_add_out", "mlp_fc1": "ff.net.0.proj", "mlp_fc2": down_proj_local_name, "mlp_context_fc1": "ff_context.net.0.proj", "mlp_context_fc2": context_down_proj_local_name, }, smooth_name_map={ "qkv_proj": "attn.to_q", "qkv_proj_context": "attn.add_k_proj", "out_proj": "attn.to_out.0", "out_proj_context": "attn.to_out.0", "mlp_fc1": "ff.net.0.proj", "mlp_fc2": down_proj_local_name, "mlp_context_fc1": "ff_context.net.0.proj", "mlp_context_fc2": context_down_proj_local_name, }, branch_name_map={ "qkv_proj": "attn.to_q", "qkv_proj_context": "attn.add_k_proj", "out_proj": "attn.to_out.0", "out_proj_context": "attn.to_add_out", "mlp_fc1": "ff.net.0.proj", "mlp_fc2": down_proj_local_name, "mlp_context_fc1": "ff_context.net.0.proj", "mlp_context_fc2": context_down_proj_local_name, }, convert_map={ "norm1.linear": "adanorm_zero", "norm1_context.linear": "adanorm_zero", "qkv_proj": "linear", "qkv_proj_context": "linear", "out_proj": "linear", "out_proj_context": "linear", "mlp_fc1": "linear", "mlp_fc2": "linear", "mlp_context_fc1": "linear", "mlp_context_fc2": "linear", }, float_point=float_point, ) def convert_to_nunchaku_flux_state_dicts( state_dict: dict[str, torch.Tensor], scale_dict: dict[str, torch.Tensor], smooth_dict: dict[str, torch.Tensor], branch_dict: dict[str, torch.Tensor], float_point: bool = False, ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: block_names: set[str] = set() other: dict[str, torch.Tensor] = {} for param_name in state_dict.keys(): if param_name.startswith(("transformer_blocks.", "single_transformer_blocks.")): block_names.add(".".join(param_name.split(".")[:2])) else: other[param_name] = state_dict[param_name] block_names = sorted(block_names, key=lambda x: (x.split(".")[0], int(x.split(".")[-1]))) print(f"Converting {len(block_names)} transformer blocks...") converted: dict[str, torch.Tensor] = {} for block_name in block_names: convert_fn = convert_to_nunchaku_flux_single_transformer_block_state_dict if block_name.startswith("transformer_blocks"): convert_fn = convert_to_nunchaku_flux_transformer_block_state_dict update_state_dict( converted, convert_fn( state_dict=state_dict, scale_dict=scale_dict, smooth_dict=smooth_dict, branch_dict=branch_dict, block_name=block_name, float_point=float_point, ), prefix=block_name, ) return converted, other if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--quant-path", type=str, required=True, help="path to the quantization checkpoint directory.") parser.add_argument("--output-root", type=str, default="", help="root to the output checkpoint directory.") parser.add_argument("--model-name", type=str, default=None, help="name of the model.") parser.add_argument("--float-point", action="store_true", help="use float-point 4-bit quantization.") args = parser.parse_args() if not args.output_root: args.output_root = args.quant_path if args.model_name is None: assert args.model_path is not None, "model name or path is required." model_name = args.model_path.rstrip(os.sep).split(os.sep)[-1] print(f"Model name not provided, using {model_name} as the model name.") else: model_name = args.model_name assert model_name, "Model name must be provided." assert "flux" in model_name.lower(), "Only Flux models are supported." state_dict_path = os.path.join(args.quant_path, "model.pt") scale_dict_path = os.path.join(args.quant_path, "scale.pt") smooth_dict_path = os.path.join(args.quant_path, "smooth.pt") branch_dict_path = os.path.join(args.quant_path, "branch.pt") map_location = "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu" state_dict = torch.load(state_dict_path, map_location=map_location) scale_dict = torch.load(scale_dict_path, map_location="cpu") smooth_dict = torch.load(smooth_dict_path, map_location=map_location) if os.path.exists(smooth_dict_path) else {} branch_dict = torch.load(branch_dict_path, map_location=map_location) if os.path.exists(branch_dict_path) else {} converted_state_dict, other_state_dict = convert_to_nunchaku_flux_state_dicts( state_dict=state_dict, scale_dict=scale_dict, smooth_dict=smooth_dict, branch_dict=branch_dict, float_point=args.float_point, ) output_dirpath = os.path.join(args.output_root, model_name) os.makedirs(output_dirpath, exist_ok=True) safetensors.torch.save_file(converted_state_dict, os.path.join(output_dirpath, "transformer_blocks.safetensors")) safetensors.torch.save_file(other_state_dict, os.path.join(output_dirpath, "unquantized_layers.safetensors")) print(f"Quantized model saved to {output_dirpath}.") ================================================ FILE: deepcompressor/backend/nunchaku/convert_lora.py ================================================ """Convert LoRA weights to Nunchaku format.""" import argparse import os import safetensors import safetensors.torch import torch import tqdm from ..utils import load_state_dict_in_safetensors, pad from .convert import update_state_dict from .utils import NunchakuWeightPacker def reorder_adanorm_lora_up(lora_up: torch.Tensor, splits: int) -> torch.Tensor: c, r = lora_up.shape assert c % splits == 0 return lora_up.view(splits, c // splits, r).transpose(0, 1).reshape(c, r).contiguous() def convert_to_nunchaku_transformer_block_lowrank_dict( # noqa: C901 orig_state_dict: dict[str, torch.Tensor], extra_lora_dict: dict[str, torch.Tensor], converted_block_name: str, candidate_block_name: str, local_name_map: dict[str, str | list[str]], convert_map: dict[str, str], default_dtype: torch.dtype = torch.bfloat16, ) -> dict[str, torch.Tensor]: print(f"Converting LoRA branch for block {candidate_block_name}...") converted: dict[str, torch.Tensor] = {} packer = NunchakuWeightPacker(bits=4) for converted_local_name, candidate_local_names in tqdm.tqdm( local_name_map.items(), desc=f"Converting {candidate_block_name}", dynamic_ncols=True ): if isinstance(candidate_local_names, str): candidate_local_names = [candidate_local_names] # region original LoRA orig_lora = ( orig_state_dict.get(f"{converted_block_name}.{converted_local_name}.lora_down", None), orig_state_dict.get(f"{converted_block_name}.{converted_local_name}.lora_up", None), ) if orig_lora[0] is None or orig_lora[1] is None: assert orig_lora[0] is None and orig_lora[1] is None orig_lora = None else: assert orig_lora[0] is not None and orig_lora[1] is not None orig_lora = ( packer.unpack_lowrank_weight(orig_lora[0], down=True), packer.unpack_lowrank_weight(orig_lora[1], down=False), ) print(f" - Found {converted_block_name} LoRA of {converted_local_name} (rank: {orig_lora[0].shape[0]})") # endregion # region extra LoRA extra_lora = [ ( extra_lora_dict.get(f"{candidate_block_name}.{candidate_local_name}.lora_A.weight", None), extra_lora_dict.get(f"{candidate_block_name}.{candidate_local_name}.lora_B.weight", None), ) for candidate_local_name in candidate_local_names ] # if any of the extra LoRA is None, all of them should be None if any(lora[0] is not None or lora[1] is not None for lora in extra_lora): # merge extra LoRAs into one LoRA if len(extra_lora) > 1: first_lora = None for lora in extra_lora: if lora[0] is not None: assert lora[1] is not None first_lora = lora break assert first_lora is not None for lora_index in range(len(extra_lora)): if extra_lora[lora_index][0] is None: assert extra_lora[lora_index][1] is None extra_lora[lora_index] = (first_lora[0].clone(), torch.zeros_like(first_lora[1])) if all(lora[0].equal(extra_lora[0][0]) for lora in extra_lora): # if all extra LoRAs have the same lora_down, use it extra_lora_down = extra_lora[0][0] extra_lora_up = torch.cat([lora[1] for lora in extra_lora], dim=0) else: extra_lora_down = torch.cat([lora[0] for lora in extra_lora], dim=0) extra_lora_up_c = sum(lora[1].shape[0] for lora in extra_lora) extra_lora_up_r = sum(lora[1].shape[1] for lora in extra_lora) assert extra_lora_up_r == extra_lora_down.shape[0] extra_lora_up = torch.zeros((extra_lora_up_c, extra_lora_up_r), dtype=extra_lora_down.dtype) c, r = 0, 0 for lora in extra_lora: c_next, r_next = c + lora[1].shape[0], r + lora[1].shape[1] extra_lora_up[c:c_next, r:r_next] = lora[1] c, r = c_next, r_next else: extra_lora_down, extra_lora_up = extra_lora[0] extra_lora: tuple[torch.Tensor, torch.Tensor] = (extra_lora_down, extra_lora_up) print(f" - Found {candidate_block_name} LoRA of {candidate_local_names} (rank: {extra_lora[0].shape[0]})") # endregion # region merge LoRA if orig_lora is None: if extra_lora is None: lora = None else: print(" - Using extra LoRA") lora = (extra_lora[0].to(default_dtype), extra_lora[1].to(default_dtype)) elif extra_lora is None: print(" - Using original LoRA") lora = orig_lora else: lora = ( torch.cat([orig_lora[0], extra_lora[0].to(orig_lora[0].dtype)], dim=0), torch.cat([orig_lora[1], extra_lora[1].to(orig_lora[1].dtype)], dim=1), ) print(f" - Merging original and extra LoRA (rank: {lora[0].shape[0]})") # endregion if lora is not None: if convert_map[converted_local_name] == "adanorm_single": update_state_dict( converted, { "lora_down": pad(lora[0], divisor=16, dim=0), "lora_up": pad(reorder_adanorm_lora_up(lora[1], splits=3), divisor=16, dim=1), }, prefix=converted_local_name, ) elif convert_map[converted_local_name] == "adanorm_zero": update_state_dict( converted, { "lora_down": pad(lora[0], divisor=16, dim=0), "lora_up": pad(reorder_adanorm_lora_up(lora[1], splits=6), divisor=16, dim=1), }, prefix=converted_local_name, ) elif convert_map[converted_local_name] == "linear": update_state_dict( converted, { "lora_down": packer.pack_lowrank_weight(lora[0], down=True), "lora_up": packer.pack_lowrank_weight(lora[1], down=False), }, prefix=converted_local_name, ) return converted def convert_to_nunchaku_flux_single_transformer_block_lowrank_dict( orig_state_dict: dict[str, torch.Tensor], extra_lora_dict: dict[str, torch.Tensor], converted_block_name: str, candidate_block_name: str, default_dtype: torch.dtype = torch.bfloat16, ) -> dict[str, torch.Tensor]: if f"{candidate_block_name}.proj_out.lora_A.weight" in extra_lora_dict: assert f"{converted_block_name}.out_proj.qweight" in orig_state_dict assert f"{converted_block_name}.mlp_fc2.qweight" in orig_state_dict n1 = orig_state_dict[f"{converted_block_name}.out_proj.qweight"].shape[1] * 2 n2 = orig_state_dict[f"{converted_block_name}.mlp_fc2.qweight"].shape[1] * 2 lora_down = extra_lora_dict[f"{candidate_block_name}.proj_out.lora_A.weight"] lora_up = extra_lora_dict[f"{candidate_block_name}.proj_out.lora_B.weight"] assert lora_down.shape[1] == n1 + n2 extra_lora_dict[f"{candidate_block_name}.proj_out.linears.0.lora_A.weight"] = lora_down[:, :n1].clone() extra_lora_dict[f"{candidate_block_name}.proj_out.linears.0.lora_B.weight"] = lora_up.clone() extra_lora_dict[f"{candidate_block_name}.proj_out.linears.1.lora_A.weight"] = lora_down[:, n1:].clone() extra_lora_dict[f"{candidate_block_name}.proj_out.linears.1.lora_B.weight"] = lora_up.clone() extra_lora_dict.pop(f"{candidate_block_name}.proj_out.lora_A.weight") extra_lora_dict.pop(f"{candidate_block_name}.proj_out.lora_B.weight") return convert_to_nunchaku_transformer_block_lowrank_dict( orig_state_dict=orig_state_dict, extra_lora_dict=extra_lora_dict, converted_block_name=converted_block_name, candidate_block_name=candidate_block_name, local_name_map={ "norm.linear": "norm.linear", "qkv_proj": ["attn.to_q", "attn.to_k", "attn.to_v"], "norm_q": "attn.norm_q", "norm_k": "attn.norm_k", "out_proj": "proj_out.linears.0", "mlp_fc1": "proj_mlp", "mlp_fc2": "proj_out.linears.1", }, convert_map={ "norm.linear": "adanorm_single", "qkv_proj": "linear", "out_proj": "linear", "mlp_fc1": "linear", "mlp_fc2": "linear", }, default_dtype=default_dtype, ) def convert_to_nunchaku_flux_transformer_block_lowrank_dict( orig_state_dict: dict[str, torch.Tensor], extra_lora_dict: dict[str, torch.Tensor], converted_block_name: str, candidate_block_name: str, default_dtype: torch.dtype = torch.bfloat16, ) -> dict[str, torch.Tensor]: return convert_to_nunchaku_transformer_block_lowrank_dict( orig_state_dict=orig_state_dict, extra_lora_dict=extra_lora_dict, converted_block_name=converted_block_name, candidate_block_name=candidate_block_name, local_name_map={ "norm1.linear": "norm1.linear", "norm1_context.linear": "norm1_context.linear", "qkv_proj": ["attn.to_q", "attn.to_k", "attn.to_v"], "qkv_proj_context": ["attn.add_q_proj", "attn.add_k_proj", "attn.add_v_proj"], "norm_q": "attn.norm_q", "norm_k": "attn.norm_k", "norm_added_q": "attn.norm_added_q", "norm_added_k": "attn.norm_added_k", "out_proj": "attn.to_out.0", "out_proj_context": "attn.to_add_out", "mlp_fc1": "ff.net.0.proj", "mlp_fc2": "ff.net.2", "mlp_context_fc1": "ff_context.net.0.proj", "mlp_context_fc2": "ff_context.net.2", }, convert_map={ "norm1.linear": "adanorm_zero", "norm1_context.linear": "adanorm_zero", "qkv_proj": "linear", "qkv_proj_context": "linear", "out_proj": "linear", "out_proj_context": "linear", "mlp_fc1": "linear", "mlp_fc2": "linear", "mlp_context_fc1": "linear", "mlp_context_fc2": "linear", }, default_dtype=default_dtype, ) def convert_to_nunchaku_flux_lowrank_dict( orig_state_dict: dict[str, torch.Tensor], extra_lora_dict: dict[str, torch.Tensor], default_dtype: torch.dtype = torch.bfloat16, ) -> dict[str, torch.Tensor]: block_names: set[str] = set() for param_name in orig_state_dict.keys(): if param_name.startswith(("transformer_blocks.", "single_transformer_blocks.")): block_names.add(".".join(param_name.split(".")[:2])) block_names = sorted(block_names, key=lambda x: (x.split(".")[0], int(x.split(".")[-1]))) print(f"Converting {len(block_names)} transformer blocks...") converted: dict[str, torch.Tensor] = {} for block_name in block_names: if block_name.startswith("transformer_blocks"): convert_fn = convert_to_nunchaku_flux_transformer_block_lowrank_dict else: convert_fn = convert_to_nunchaku_flux_single_transformer_block_lowrank_dict update_state_dict( converted, convert_fn( orig_state_dict=orig_state_dict, extra_lora_dict=extra_lora_dict, converted_block_name=block_name, candidate_block_name=block_name, default_dtype=default_dtype, ), prefix=block_name, ) return converted if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--quant-path", type=str, required=True, help="path to the quantized model safetensor file") parser.add_argument("--lora-path", type=str, required=True, help="path to LoRA weights safetensor file") parser.add_argument("--output-root", type=str, default="", help="root to the output safetensor file") parser.add_argument("--lora-name", type=str, default=None, help="name of the LoRA weights") parser.add_argument( "--dtype", type=str, default="bfloat16", choices=["bfloat16", "float16"], help="default data type of the converted LoRA weights", ) args = parser.parse_args() if not args.output_root: # output to the parent directory of the quantized model safetensor file args.output_root = os.path.dirname(args.quant_path) if args.lora_name is None: assert args.lora_path is not None, "LoRA name or path must be provided" lora_name = args.lora_path.rstrip(os.sep).split(os.sep)[-1].replace(".safetensors", "") print(f"Lora name not provided, using {lora_name} as the LoRA name") else: lora_name = args.lora_name assert lora_name, "LoRA name must be provided." assert args.quant_path.endswith(".safetensors"), "Quantized model must be a safetensor file" assert args.lora_path.endswith(".safetensors"), "LoRA weights must be a safetensor file" orig_state_dict = load_state_dict_in_safetensors(args.quant_path) extra_lora_dict = load_state_dict_in_safetensors(args.lora_path, filter_prefix="transformer.") converted = convert_to_nunchaku_flux_lowrank_dict( orig_state_dict=orig_state_dict, extra_lora_dict=extra_lora_dict, default_dtype=torch.bfloat16 if args.dtype == "bfloat16" else torch.float16, ) os.makedirs(args.output_root, exist_ok=True) safetensors.torch.save_file(converted, os.path.join(args.output_root, f"{lora_name}.safetensors")) print(f"Saved LoRA weights to {args.output_root}.") ================================================ FILE: deepcompressor/backend/nunchaku/utils.py ================================================ # -*- coding: utf-8 -*- """Nunchaku backend utilities.""" import torch from ..tinychat.utils import convert_to_tinychat_w4x16y16_linear_weight from ..utils import MmaWeightPackerBase, ceil_divide, fp_quantize, pad __all__ = [ "convert_to_nunchaku_w4x4y16_linear_weight", "convert_to_nunchaku_w8x8y16_linear_weight", "convert_to_nunchaku_w4x16_linear_weight", ] class NunchakuWeightPacker(MmaWeightPackerBase): def __init__(self, bits: int, warp_n: int = 128): super().__init__(bits=bits, warp_n=warp_n) self.num_k_unrolls = 2 def pack_weight(self, weight: torch.Tensor) -> torch.Tensor: assert weight.dtype == torch.int32, f"quantized weight should be torch.int32, but got {weight.dtype}." n, k = weight.shape assert n % self.mem_n == 0, f"output channel size ({n}) should be divisible by mem_n ({self.mem_n})." # currently, Nunchaku did not check the boundry of unrolled `k` dimension assert k % (self.mem_k * self.num_k_unrolls) == 0, ( f"input channel size ({k}) should be divisible by " f"mem_k ({self.mem_k}) * num_k_unrolls ({self.num_k_unrolls})." ) n_tiles, k_tiles = n // self.mem_n, k // self.mem_k weight = weight.reshape( n_tiles, self.num_n_packs, # 8 when warp_n = 128 self.n_pack_size, # always 2 in nunchaku self.num_n_lanes, # constant 8 self.reg_n, # constant 1 k_tiles, self.num_k_packs, # 1 self.k_pack_size, # always 2 in nunchaku self.num_k_lanes, # constant 4 self.reg_k, # always 8 = 32 bits / 4 bits ) # (n_tiles, num_n_packs, n_pack_size, num_n_lanes, reg_n, k_tiles, num_k_packs, k_pack_size, num_k_lanes, reg_k) # => # (n_tiles, k_tiles, num_k_packs, num_n_packs, num_n_lanes, num_k_lanes, n_pack_size, k_pack_size, reg_n, reg_k) weight = weight.permute(0, 5, 6, 1, 3, 8, 2, 7, 4, 9).contiguous() assert weight.shape[4:-2] == (8, 4, 2, 2) if self.bits == 4: weight = weight.bitwise_and_(0xF) shift = torch.arange(0, 32, 4, dtype=torch.int32, device=weight.device) weight = weight.bitwise_left_shift_(shift) weight = weight.sum(dim=-1, dtype=torch.int32) elif self.bits == 8: weight = weight.bitwise_and_(0xFF) shift = torch.arange(0, 32, 8, dtype=torch.int32, device=weight.device) weight = weight.bitwise_left_shift_(shift) weight = weight.sum(dim=-1, dtype=torch.int32) else: raise NotImplementedError(f"weight bits {self.bits} is not supported.") return weight.view(dtype=torch.int8).view(n, -1) # assume little-endian def pack_scale(self, scale: torch.Tensor, group_size: int) -> torch.Tensor: if self.check_if_micro_scale(group_size=group_size): return self.pack_micro_scale(scale, group_size=group_size) # note: refer to https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#mma-16864-c assert scale.dtype in (torch.float16, torch.bfloat16), "currently nunchaku only supports fp16 and bf16." n = scale.shape[0] # nunchaku load scales all in one access # for `[warp_n, warp_k]` weights, we load `[warp_n, warp_k / group_size]` scales # scale loading is parallelized in `n` dimension, that is, # `num_s_lanes` in a warp load `num_s_packs` of `s_pack_size` elements, in total `warp_s` elements # each element in `n` dimension is 16 bit as it contains 1 fp16 # min `s_pack_size` set to 2 element, since each lane at least holds 2 accumulator results in `n` dimension # max `s_pack_size` set to 128b/16b = 8 elements # for `warp_n = 8`, we have # `s_pack_size = 2`, `num_s_lanes = 4`, `num_s_packs = 1` # for `warp_n = 128`, we have # `s_pack_size = 4`, `num_s_lanes = 32`, `num_s_packs = 1` # for `warp_n = 512`, we have # `s_pack_size = 8`, `num_s_lanes = 32`, `num_s_packs = 2` s_pack_size = min(max(self.warp_n // self.num_lanes, 2), 8) num_s_lanes = min(self.num_lanes, self.warp_n // s_pack_size) num_s_packs = self.warp_n // (s_pack_size * num_s_lanes) warp_s = num_s_packs * num_s_lanes * s_pack_size assert warp_s == self.warp_n, "warp_n for scales should be equal to warp_n for weights." # `num_n_lanes = 8 (constant)` generates 8 elements consecutive in `n` dimension # however, they are held by 4 lanes, each lane holds 2 elements in `n` dimension # thus, we start from first 4 lanes, assign 2 elements to each lane, until all 8 elements are assigned # we then repeat the process for the same 4 lanes, until each lane holds `s_pack_size` elements # finally, we move to next 4 lanes, and repeat the process until all `num_s_lanes` lanes are assigned # the process is repeated for `num_s_packs` times # here is an example for `warp_n = 128, s_pack_size = 4, num_s_lanes = 32, num_s_packs = 1` # wscales store order: # 0 1 8 9 <-- load by lane 0, broadcast to lane {0, 4, 8, ..., 28} (8x) # 2 3 10 11 <-- load by lane 1, broadcast to lane {1, 5, 9, ..., 29} (8x) # 4 5 12 13 <-- load by lane 2, broadcast to lane {2, 6, 10, ..., 30} (8x) # 6 7 14 15 <-- load by lane 3, broadcast to lane {3, 7, 11, ..., 31} (8x) # 16 17 24 25 <-- load by lane 4, broadcast to lane {0, 4, 8, ..., 28} (8x) # ... # 22 23 30 31 <-- load by lane 7, broadcast to lane {3, 7, 11, ..., 31} (8x) # ... ... # 112 113 120 121 <-- load by lane 28, broadcast to lane {0, 4, 8, ..., 28} (8x) # ... # 118 119 126 127 <-- load by lane 31, broadcast to lane {3, 7, 11, ..., 31} (8x) scale = scale.reshape(n // warp_s, num_s_packs, num_s_lanes // 4, s_pack_size // 2, 4, 2, -1) scale = scale.permute(0, 6, 1, 2, 4, 3, 5).contiguous() return scale.view(-1) if group_size == -1 else scale.view(-1, n) # the shape is just used for validation def pack_micro_scale(self, scale: torch.Tensor, group_size: int) -> torch.Tensor: assert scale.dtype in (torch.float16, torch.bfloat16), "currently nunchaku only supports fp16 and bf16." assert scale.max() <= 448, "scale should be less than 448." assert scale.min() >= -448, "scale should be greater than -448." assert group_size == 16, "currently only support group size 16." assert self.insn_k == 64, "insn_k should be 64." scale = scale.to(dtype=torch.float8_e4m3fn) n = scale.shape[0] assert self.warp_n >= 32, "currently only support warp_n >= 32." # for `[warp_n, warp_k]` weights, we load `[warp_n, warp_k / group_size]` scales # scale loading is parallelized in `n` dimension, that is, # `num_s_lanes` in a warp load `num_s_packs` of `s_pack_size` elements, in total `warp_s` elements # each element in `n` dimension is 32 bit as it contains 4 fp8 in `k` dimension # min `s_pack_size` set to 1 element # max `s_pack_size` set to 128b/32b = 4 elements # for `warp_n = 128`, we have # `s_pack_size = 4`, `num_s_lanes = 32`, `num_s_packs = 1` # for `warp_n = 512`, we have # `s_pack_size = 8`, `num_s_lanes = 32`, `num_s_packs = 2` s_pack_size = min(max(self.warp_n // self.num_lanes, 1), 4) num_s_lanes = 4 * 8 # 32 lanes is divided into 4 pieces, each piece has 8 lanes at a stride of 4 num_s_packs = ceil_divide(self.warp_n, s_pack_size * num_s_lanes) warp_s = num_s_packs * num_s_lanes * s_pack_size assert warp_s == self.warp_n, "warp_n for scales should be equal to warp_n for weights." # note: refer to https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#mma-scaling-thread-id-b-selection # we start from first 8 lines at a stride of 4, assign 1 element to each lane, until all 8 elements are assigned # we then move to next 8 lines at a stride of 4, and repeat the process until all 32 lanes are assigned # here is an example for `warp_n = 128, s_pack_size = 4, num_s_lanes = 32, num_s_packs = 1` # wscales store order: # 0 32 64 96 <-- load by lane 0 # 8 40 72 104 <-- load by lane 1 # 16 48 80 112 <-- load by lane 2 # 24 56 88 120 <-- load by lane 3 # 1 33 65 97 <-- load by lane 4 # ... # 25 57 81 113 <-- load by lane 7 # ... # 7 39 71 103 <-- load by lane 28 # ... # 31 63 95 127 <-- load by lane 31 scale = scale.view(n // warp_s, num_s_packs, s_pack_size, 4, 8, -1, self.insn_k // group_size) scale = scale.permute(0, 5, 1, 4, 3, 2, 6).contiguous() return scale.view(-1, n) # the shape is just used for validation def pack_lowrank_weight(self, weight: torch.Tensor, down: bool) -> torch.Tensor: """Pack Low-Rank Weight. Args: weight (`torch.Tensor`): low-rank weight tensor. down (`bool`): whether the weight is for down projection in low-rank branch. """ assert weight.dtype in (torch.float16, torch.bfloat16), f"Unsupported weight dtype {weight.dtype}." reg_n, reg_k = 1, 2 # reg_n is always 1, reg_k is 32 bits // 16 bits = 2 pack_n = self.n_pack_size * self.num_n_lanes * reg_n pack_k = self.k_pack_size * self.num_k_lanes * reg_k weight = pad(weight, divisor=(pack_n, pack_k), dim=(0, 1)) if down: r, c = weight.shape r_packs, c_packs = r // pack_n, c // pack_k weight = weight.view(r_packs, pack_n, c_packs, pack_k).permute(2, 0, 1, 3) else: c, r = weight.shape c_packs, r_packs = c // pack_n, r // pack_k weight = weight.view(c_packs, pack_n, r_packs, pack_k).permute(0, 2, 1, 3) weight = weight.reshape( c_packs, r_packs, self.n_pack_size, self.num_n_lanes, reg_n, self.k_pack_size, self.num_k_lanes, reg_k ) # (c_packs, r_packs, n_pack_size, num_n_lanes, reg_n, k_pack_size, num_k_lanes, reg_k) # => # (c_packs, r_packs, num_n_lanes, num_k_lanes, n_pack_size, k_pack_size, reg_n, reg_k) weight = weight.permute(0, 1, 3, 6, 2, 5, 4, 7).contiguous() return weight.view(c, r) def unpack_lowrank_weight(self, weight: torch.Tensor, down: bool) -> torch.Tensor: """Unpack Low-Rank Weight. Args: weight (`torch.Tensor`): low-rank weight tensor. down (`bool`): whether the weight is for down projection in low-rank branch. """ c, r = weight.shape assert weight.dtype in (torch.float16, torch.bfloat16), f"Unsupported weight dtype {weight.dtype}." reg_n, reg_k = 1, 2 # reg_n is always 1, reg_k is 32 bits // 16 bits = 2 pack_n = self.n_pack_size * self.num_n_lanes * reg_n pack_k = self.k_pack_size * self.num_k_lanes * reg_k if down: r_packs, c_packs = r // pack_n, c // pack_k else: c_packs, r_packs = c // pack_n, r // pack_k weight = weight.view( c_packs, r_packs, self.num_n_lanes, self.num_k_lanes, self.n_pack_size, self.k_pack_size, reg_n, reg_k ) # (c_packs, r_packs, num_n_lanes, num_k_lanes, n_pack_size, k_pack_size, reg_n, reg_k) # => # (c_packs, r_packs, n_pack_size, num_n_lanes, reg_n, k_pack_size, num_k_lanes, reg_k) weight = weight.permute(0, 1, 4, 2, 6, 5, 3, 7).contiguous() weight = weight.view(c_packs, r_packs, pack_n, pack_k) if down: weight = weight.permute(1, 2, 0, 3).contiguous().view(r, c) else: weight = weight.permute(0, 2, 1, 3).contiguous().view(c, r) return weight def check_if_micro_scale(self, group_size: int) -> bool: return self.insn_k == group_size * 4 def pad_weight(self, weight: torch.Tensor) -> torch.Tensor: assert weight.ndim == 2, "weight tensor should be 2D." return pad(weight, divisor=(self.mem_n, self.mem_k * self.num_k_unrolls), dim=(0, 1)) def pad_scale(self, scale: torch.Tensor, group_size: int) -> torch.Tensor: if group_size > 0 and scale.numel() > scale.shape[0]: scale = scale.view(scale.shape[0], 1, -1, 1) if self.check_if_micro_scale(group_size=group_size): scale = pad(scale, divisor=(self.warp_n, self.insn_k // group_size), dim=(0, 2), fill_value=1) else: scale = pad(scale, divisor=(self.warp_n, self.num_k_unrolls), dim=(0, 2), fill_value=1) else: scale = pad(scale, divisor=self.warp_n, dim=0, fill_value=1) return scale def pad_lowrank_weight(self, weight: torch.Tensor, down: bool) -> torch.Tensor: assert weight.ndim == 2, "weight tensor should be 2D." return pad(weight, divisor=self.warp_n, dim=1 if down else 0) def convert_to_nunchaku_w4x4y16_linear_weight( weight: torch.Tensor, scale: torch.Tensor, bias: torch.Tensor | None = None, smooth: torch.Tensor | None = None, lora: tuple[torch.Tensor, torch.Tensor] | None = None, float_point: bool = False, subscale: torch.Tensor | None = None, ) -> tuple[ torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, tuple[torch.Tensor, torch.Tensor] | None, torch.Tensor | None, ]: assert weight.ndim == 2, "weight tensor should be 2D." device, dtype = weight.device, weight.dtype assert dtype in (torch.float16, torch.bfloat16), "currently nunchaku only supports fp16 and bf16." assert scale is not None, "scale tensor is required for quantization." oc, ic = weight.shape if scale.numel() == 1: scale = scale.view(-1).expand(oc).reshape(oc, 1, 1, 1) per_tensor_scale = True else: per_tensor_scale = False assert scale.ndim == 4, "scale tensor should be 4D." assert scale.shape[1] == scale.shape[3] == 1 assert scale.shape[0] == oc ng, gs = scale.shape[2], ic // scale.shape[2] assert ic == gs * ng, "input channel size should be equal to group size times number of groups." if subscale is not None: assert subscale.ndim == 4, "subscale tensor should be 4D." assert subscale.shape[1] == subscale.shape[3] == 1 assert subscale.shape[0] == oc nsg, sgs = subscale.shape[2], ic // subscale.shape[2] assert ic == sgs * nsg, "input channel size should be equal to subgroup size times number of subgroups." assert gs > sgs and gs % sgs == 0, "group size should be divisible by subgroup size." else: nsg, sgs = ng, gs # region quantize and pack weight tensor weight = weight.to(dtype=torch.float32).view(oc, 1, ng, gs).div_(scale.to(dtype=torch.float32, device=device)) if subscale is not None: weight = weight.view(oc, 1, nsg, sgs).div_(subscale.to(dtype=torch.float32, device=device)) weight = weight.view(oc, ic) if float_point: weight = fp_quantize(weight) assert weight.min() >= 0 and weight.max() <= 15, "quantized weight should be in [0, 15]." else: weight = weight.round_() assert weight.min() >= -8 and weight.max() <= 7, "quantized weight should be in [-8, 7]." # endregion bias = torch.zeros([oc, 1], dtype=dtype, device=device) if bias is None else bias.view(-1, 1) smooth = torch.ones([ic, 1], dtype=dtype, device=device) if smooth is None else smooth.view(-1, 1) packer = NunchakuWeightPacker(bits=4) weight = packer.pad_weight(weight.to(dtype=torch.int32)) scale = packer.pad_scale(scale.to(dtype=dtype), group_size=gs) if subscale is not None: subscale = packer.pad_scale(subscale.to(dtype=dtype), group_size=sgs) bias = packer.pad_scale(bias.to(dtype=dtype), group_size=-1) smooth = packer.pad_scale(smooth.to(dtype=dtype), group_size=-1) weight = packer.pack_weight(weight) scale = packer.pack_scale(scale, group_size=gs if gs < ic else -1) if subscale is not None: subscale = packer.pack_scale(subscale, group_size=sgs if sgs < ic else -1) bias = packer.pack_scale(bias, group_size=-1) smooth = packer.pack_scale(smooth, group_size=-1) if lora is not None: lora_down = packer.pack_lowrank_weight(packer.pad_lowrank_weight(lora[0], down=True), down=True) lora_up = packer.pack_lowrank_weight(packer.pad_lowrank_weight(lora[1], down=False), down=False) lora = (lora_down, lora_up) if per_tensor_scale: scale = scale.view(-1)[0].view([1]) return weight, scale, bias, smooth, lora, subscale def convert_to_nunchaku_w8x8y16_linear_weight( weight: torch.Tensor, scale: torch.Tensor, bias: torch.Tensor | None = None ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: assert weight.ndim == 2, "weight tensor should be 2D." device, dtype = weight.device, weight.dtype assert dtype in (torch.float16, torch.bfloat16), "currently nunchaku only supports fp16 and bf16." assert scale is not None, "scale tensor is required for quantization." oc, ic = weight.shape if scale.numel() == 1: scale = scale.view(-1).expand(oc) scale = scale.reshape(oc, 1) weight = weight.to(dtype=torch.float32) weight = weight.div_(scale.to(dtype=torch.float32, device=device)).round_().to(torch.int32).view(oc, ic) assert weight.min() >= -128 and weight.max() <= 127, "quantized weight should be in [-128, 127]." # endregion bias = torch.zeros([oc, 1], dtype=dtype, device=device) if bias is None else bias.view(-1, 1) packer = NunchakuWeightPacker(bits=8) weight = packer.pack_weight(packer.pad_weight(weight)) scale = packer.pack_scale(packer.pad_scale(scale.to(dtype=dtype), group_size=-1), group_size=-1) bias = packer.pack_scale(packer.pad_scale(bias.to(dtype=dtype), group_size=-1), group_size=-1).view(-1) return weight, scale, bias def convert_to_nunchaku_w4x16_linear_weight( weight: torch.Tensor, scale: torch.Tensor, zero: torch.Tensor | None = None, bias: torch.Tensor | None = None, adanorm_splits: int = 1, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: oc, ic = weight.shape assert scale.ndim == 4, "scale tensor should be 4D." assert scale.shape[0] == oc assert scale.shape[1] == scale.shape[3] == 1 ng = scale.shape[2] if bias is None: bias = torch.zeros([oc], dtype=weight.dtype, device=weight.device) assert oc % adanorm_splits == 0, "output channel size should be divisible by splits." if adanorm_splits > 1: weight = weight.view(adanorm_splits, oc // adanorm_splits, ic).transpose(0, 1).reshape(oc, ic) scale = scale.view(adanorm_splits, oc // adanorm_splits, ng).transpose(0, 1).reshape(oc, 1, ng, 1) bias = bias.reshape(adanorm_splits, oc // adanorm_splits).transpose(0, 1) delta = [0] * adanorm_splits delta[1] = delta[-2] = 1 bias = bias.add_(torch.tensor(delta, dtype=bias.dtype, device=bias.device)) bias = bias.reshape(oc) weight, scale, zero = convert_to_tinychat_w4x16y16_linear_weight( weight=weight, scale=scale, zero=torch.full_like(scale, 7) if zero is None else zero, zero_pre_scaled=True ) weight = weight.view(torch.int32) return weight, scale, zero, bias ================================================ FILE: deepcompressor/backend/qserve/__init__.py ================================================ ================================================ FILE: deepcompressor/backend/qserve/convert.py ================================================ # -*- coding: utf-8 -*- """QServe state dict converter module.""" import argparse import os import torch import tqdm from .utils import convert_to_qserve_w4x8y16_linear_weight, convert_to_qserve_w8x8y16_linear_weight __all__ = ["convert_to_qserve_state_dict"] def convert_to_qserve_w4x8y16_linear_state_dict( param_name: str, weight: torch.Tensor, scale: torch.Tensor, zero: torch.Tensor, subscale: torch.Tensor | None = None, zero_pre_scaled: bool = False, ) -> dict[str, torch.Tensor]: """Convert a weight tensor to QServe W4-X8-Y16 linear state dictionary. Args: param_name (`str`): parameter name. weight (`torch.Tensor`): weight tensor to be converted. scale (`torch.Tensor`): scale tensor for the weight tensor. zero (`torch.Tensor`): zero point tensor for the weight tensor. subscale (`torch.Tensor` or `None`, *optional*, defaults to `None`): subscale tensor for the weight tensor. zero_pre_scaled (`bool`, *optional*, defaults to `False`): whether zero point tensor is pre-scaled. Returns: `dict[str, torch.Tensor]`: state dictionary for the quantized weight tensor. """ module_name = param_name[:-7] weight, scale, zero, subscale = convert_to_qserve_w4x8y16_linear_weight( weight, scale=scale, zero=zero, subscale=subscale, zero_pre_scaled=zero_pre_scaled ) state_dict: dict[str, torch.Tensor] = {} state_dict[f"{module_name}.qweight"] = weight.cpu() state_dict[f"{module_name}.s1_scales"] = scale.cpu() if subscale is None: state_dict[f"{module_name}.s1_szeros"] = zero.cpu() else: state_dict[f"{module_name}.s2_scales"] = subscale.cpu() state_dict[f"{module_name}.s2_zeros"] = zero.cpu() return state_dict def convert_to_qserve_w8x8y16_linear_state_dict( param_name: str, weight: torch.Tensor, scale: torch.Tensor ) -> dict[str, torch.Tensor]: """Convert a weight tensor to QServe W8-X8-Y16 linear state dictionary. Args: param_name (`str`): parameter name. weight (`torch.Tensor`): weight tensor to be converted. scale (`torch.Tensor`): scale tensor for the weight tensor. Returns: `dict[str, torch.Tensor]`: state dictionary for the quantized weight tensor. """ module_name = param_name[:-7] weight, scale = convert_to_qserve_w8x8y16_linear_weight(weight, scale=scale) state_dict: dict[str, torch.Tensor] = {} state_dict[f"{module_name}.weight"] = weight.cpu() state_dict[f"{module_name}.dequant_scale"] = scale.cpu() return state_dict def convert_to_qserve_state_dict( state_dict: dict[str, torch.Tensor], scale_dict: dict[str, torch.Tensor], weight_bits: int ) -> dict[str, torch.Tensor]: assert weight_bits in [4, 8], "weight bits should be 4 or 8." scales: dict[str, dict[tuple[int, ...], torch.Tensor]] = {} zeros: dict[str, tuple[torch.Tensor | None, bool]] = {} print("Loading scale tensors...") for name, tensor in tqdm.tqdm(scale_dict.items(), desc="Loading scale tensors", leave=False, dynamic_ncols=True): print(f" - Loading tensor {name} (dtype: {tensor.dtype}, shape: {tensor.shape}, device: {tensor.device})") if name.endswith("zero"): # this is a zero point tensor zero = None if tensor is None or all(t.item() == 0 for t in tensor.flatten()) else tensor if name.endswith(".scaled_zero"): zeros[name[:-12]] = (zero, False) # zero point tensor is post-scaled else: zeros[name[:-5]] = (zero, True) # zero point tensor is pre-scaled else: assert ".weight.scale" in name # this is a scale tensor idx = name.index(".weight.scale") param_name = name[: idx + 7] scale_level = tuple(map(int, name[idx + 14 :].split("."))) scales.setdefault(param_name, {})[scale_level] = tensor for param_name in zeros.keys(): assert param_name in state_dict, f"zero point tensor {param_name} not found in state dict." assert param_name in scales, f"scale tensor {param_name} not found in scale dict." converted: dict[str, torch.Tensor] = {} print("Converting state dict...") for param_name, param in tqdm.tqdm(state_dict.items(), desc="Converting state dict", dynamic_ncols=True): if param_name in scales: print(f" - Converting {param_name} (dtype: {param.dtype}, shape: {param.shape}, device: {param.device})") weight = param.data.clone() if param_name in zeros: zero, zero_pre_scaled = zeros[param_name] zero = zero.clone() if zero is not None else None else: zero, zero_pre_scaled = None, False level_scales = sorted(scales[param_name].items(), key=lambda x: x[0]) assert len(level_scales) <= 2, "more than two scale levels are not supported." scale = level_scales[0][1].clone() subscale = level_scales[1][1].clone() if len(level_scales) > 1 else None if weight_bits == 4: converted.update( convert_to_qserve_w4x8y16_linear_state_dict( param_name, weight, scale=scale, zero=zero, subscale=subscale, zero_pre_scaled=zero_pre_scaled, ) ) else: assert zero is None, "zero point tensor is not supported for W8 quantization." assert subscale is None, "subscale tensor is not supported for W8 quantization." converted.update(convert_to_qserve_w8x8y16_linear_state_dict(param_name, weight, scale=scale)) else: if isinstance(param, torch.Tensor): print(f" - Copying {param_name} (dtype: {param.dtype}, shape: {param.shape}, device: {param.device})") converted[param_name] = param.clone().cpu() else: print(f" - Copying {param_name} (type: {type(param)}, value: {param})") converted[param_name] = param return converted if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--quant-path", type=str, required=True, help="path to the quantization checkpoint directory.") parser.add_argument("--weight-bits", type=int, required=True, help="quantized weight bits.") parser.add_argument("--output-root", type=str, default="", help="root to the output checkpoint directory.") parser.add_argument("--model-name", type=str, default=None, help="name of the model.") parser.add_argument("--model-path", type=str, default=None, help="path to the huggingface model directory.") parser.add_argument( "--copy-on-save", action="store_true", help="copy the original tokenizer and configuration files to the output directory.", ) args = parser.parse_args() if not args.output_root: args.output_root = args.quant_path if args.model_name is None: assert args.model_path is not None, "model name or path is required." model_name = args.model_path.rstrip(os.sep).split(os.sep)[-1] print(f"Model name not provided. Using model name {model_name}.") else: model_name = args.model_name assert model_name, "model name is required." model_name = f"{model_name}-w{args.weight_bits}a8" output_dirpath = os.path.join(args.output_root, model_name) output_path = os.path.join(output_dirpath, "quant_model.pt") state_dict = torch.load( os.path.join(args.quant_path, "model.pt"), map_location="cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu", ) scale_dict = torch.load(os.path.join(args.quant_path, "scale.pt"), map_location="cpu") converted = convert_to_qserve_state_dict(state_dict, scale_dict, weight_bits=args.weight_bits) os.makedirs(output_dirpath, exist_ok=True) torch.save(converted, output_path) if args.model_path and os.path.exists(args.model_path): for filename in os.listdir(args.model_path): if filename == "tokenizer.model" or ( filename.endswith(".json") and filename != "pytorch_model.bin.index.json" ): filepath = os.path.abspath(os.path.join(args.model_path, filename)) if args.copy_on_save: os.system(f"cp {filepath} {output_dirpath}/") else: os.system(f"ln -s {filepath} {output_dirpath}/{filename}") print(f"Quantized model checkpoint saved to {output_path}.") print(f"Quantized model saved to {output_dirpath}.") print(f"Quantized model checkpoint saved to {output_path}.") print(f"Quantized model saved to {output_dirpath}.") ================================================ FILE: deepcompressor/backend/qserve/utils.py ================================================ # -*- coding: utf-8 -*- """QServe backend utilities.""" import torch from ..utils import MmaWeightPackerBase __all__ = ["convert_to_qserve_w4x8y16_linear_weight", "convert_to_qserve_w8x8y16_linear_weight"] class QServePacker(MmaWeightPackerBase): def __init__(self): super().__init__(bits=8, warp_n=32) assert self.num_n_packs >= 2 and self.num_n_packs % 2 == 0, ( f"num_n_packs should be even, but got {self.num_n_packs}." ) def pack_weight(self, weight: torch.Tensor) -> torch.Tensor: assert weight.min() >= 0, "quantized weight should be non-negative." assert weight.max() <= 15, "quantized weight should be less than 16." assert weight.dtype == torch.uint8, f"quantized weight should be torch.uint8, but got {weight.dtype}." n, k = weight.shape assert n % self.mem_n == 0, f"output channel size ({n}) should be divisible by mem_n ({self.mem_n})." assert k % self.mem_k == 0, f"input channel size ({k}) should be divisible by mem_k ({self.mem_k})." n_tiles, k_tiles = n // self.mem_n, k // self.mem_k weight = weight.reshape( n_tiles, self.num_n_packs, # num_n_packs = 2 when warp_n = 32 self.n_pack_size, # always 2 in QServe self.num_n_lanes, # constant 8 self.reg_n, # constant 1 k_tiles, self.num_k_packs, # constant 1 self.k_pack_size, # always 2 self.num_k_lanes, # constant 4 self.reg_k, # always 4 = 32 bits / 8 bits in QServe ) # (n_tiles, num_n_packs, n_pack_size, num_n_lanes, reg_n, k_tiles, num_k_packs, k_pack_size, num_k_lanes, reg_k) # => # (num_n_packs, n_tiles, k_tiles, num_k_packs, num_n_lanes, num_k_lanes, k_pack_size, n_pack_size, reg_n, reg_k) weight = weight.permute(1, 0, 5, 6, 3, 8, 7, 2, 4, 9).contiguous() assert weight.shape[4:-2] == (8, 4, 2, 2) weight = (weight[1] << 4) + weight[0] return weight.view(torch.int8).view(n, k // 2) def pack_scale( self, scale: torch.Tensor, zero: torch.Tensor | None = None, subscale: torch.Tensor | None = None ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: scale = scale.view(-1) n = scale.shape[0] if subscale is None: zero = zero.view(-1) else: assert subscale.dtype == torch.int8, f"subscale should be torch.int8, but got {subscale.dtype}." view_shape = (n // self.mem_n, self.num_n_packs, self.n_pack_size, self.num_n_lanes, self.reg_n, -1) # (n_tiles, num_n_packs, n_pack_size, num_n_lanes, reg_n, -1) # => # (-1, n_tiles, num_n_packs, num_n_lanes, n_pack_size, reg_n) subscale = subscale.view(view_shape).permute(5, 0, 1, 3, 2, 4).contiguous().view(-1, n) zero = zero.view(view_shape).permute(5, 0, 1, 3, 2, 4).contiguous().view(-1, n) return scale, zero, subscale def convert_to_qserve_w4x8y16_linear_weight( weight: torch.Tensor, scale: torch.Tensor, zero: torch.Tensor, subscale: torch.Tensor | None = None, zero_pre_scaled: bool = False, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor | None]: """Convert a weight tensor to QServe W4-X8-Y16 linear weight format. Args: weight (`torch.Tensor`): weight tensor to be converted. scale (`torch.Tensor`): scale tensor for the weight tensor. zero (`torch.Tensor`): zero point tensor for the weight tensor. subscale (`torch.Tensor` or `None`, *optional*, defaults to `None`): subscale tensor for the weight tensor. zero_pre_scaled (`bool`, *optional*, defaults to `False`): whether zero point tensor is pre-scaled. Returns: `tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor | None]`: packed quantized weight tensor, scale tensor, zero point tensor, and subscale tensor. """ dtype = weight.dtype assert dtype == torch.float16, "currently qserve only supports fp16." assert scale is not None, "scale tensor is required for quantization." assert zero is not None, "zero point tensor is required for quantization." weight = weight.to(dtype=torch.float32) scale = scale.to(dtype=torch.float32, device=weight.device) zero = zero.to(dtype=torch.float32, device=weight.device) oc, ic = weight.shape if subscale is not None: # per-group quantization subscale = subscale.to(dtype=weight.dtype, device=weight.device) # region reshape scale and zero point if scale.numel() == 1: scale = scale.view(-1).expand(oc) scale = scale.reshape(oc).contiguous().view(oc, 1) assert subscale.numel() > 1, "subscale tensor is required for per-group quantization." subscale = subscale.view(oc, -1, 1).round_() ng = subscale.shape[1] gs = ic // ng assert ic == ng * gs, "input channel size should be divisible by group size." if zero.numel() == 1: zero = zero.view(1, 1).expand(oc, ng) zero = zero.reshape(oc, ng).contiguous().view(oc, ng, 1).round_() # endregion # region quantize weight tensor weight = weight.div_(scale).round_() assert weight.min() >= -128, "first-level quantized weight should be greater than or equal to -128." assert weight.max() <= 127, "first-level quantized weight should be less than or equal to 127." weight = weight.view(oc, ng, gs) if not zero_pre_scaled: # zero point is int8 weight = weight.add_(zero) weight = weight.div_(subscale) if zero_pre_scaled: # zero point is int4 if zero.min() < 0: # sint4 zero point zero = zero.add_(8) # convert to uint4 zero point assert zero.min() >= 0, "quantized zero point should be non-negative." assert zero.max() <= 15, "quantized zero point should be less than 16." weight = weight.add_(zero) zero = zero.mul_(subscale) else: if weight.min() < 0: # sint4 weight weight = weight.add_(8) # convert to uint4 weight zero = zero.add_(8 * subscale) _weight = weight.mul(subscale) assert _weight.min() >= 0, "first-level dequantize weight should be non-negative." assert _weight.max() <= 255, "first-level dequantize weight should be less than 256." del _weight assert subscale.min() >= 0, "subscale should be non-negative." assert subscale.max() <= 127, "subscale should be less than or equal to 127." assert zero.min() >= 0, "quantized zero point should be non-negative." assert zero.max() <= 255, "quantized zero point should be less than 256." assert weight.min() >= 0, "quantized weight should be non-negative." assert weight.max() <= 15, "quantized weight should be less than 16." # endregion zero = -zero # ! for group quant, qserve uses q*s+z=r instead of q*s-z=r subscale = subscale.to(torch.int8) zero = zero.to(torch.int8) else: # per-channel quantization assert subscale is None, "subscale tensor is not required for per-channel quantization." # region reshape scale and zero point if scale.numel() == 1: scale = scale.view(-1).expand(oc) scale = scale.reshape(oc).contiguous().view(oc, 1) if zero.numel() == 1: zero = zero.view(-1).expand(oc) zero = zero.reshape(oc).contiguous().view(oc, 1) # endregion # region quantize weight tensor if not zero_pre_scaled: # zero point is fp16 weight = weight.add_(zero) weight = weight.div_(scale).round_() if zero_pre_scaled: # zero point is int4 zero = zero.round_() if zero.min() < 0: # sint4 zero point zero = zero.add_(8) # convert to uint4 zero point assert zero.min() >= 0, "quantized zero point should be non-negative." assert zero.max() <= 15, "quantized zero point should be less than 16." weight = weight.add_(zero) zero = zero.mul_(scale) else: if weight.min() < 0: # sint4 weight weight = weight.add_(8) # convert to uint4 weight zero = zero.add_(8 * scale) assert weight.min() >= 0, "quantized weight should be non-negative." assert weight.max() <= 15, "quantized weight should be less than 16." # endregion zero = zero.to(dtype=dtype) scale = scale.to(dtype=dtype) packer = QServePacker() weight = packer.pack_weight(weight.view(oc, ic).to(torch.uint8)) scale, zero, subscale = packer.pack_scale(scale=scale, zero=zero, subscale=subscale) return weight, scale, zero, subscale def convert_to_qserve_w8x8y16_linear_weight( weight: torch.Tensor, scale: torch.Tensor ) -> tuple[torch.Tensor, torch.Tensor]: """Convert a weight tensor to QServe W8-X8-Y16 linear weight format. Args: weight (`torch.Tensor`): weight tensor to be converted. scale (`torch.Tensor`): scale tensor for the weight tensor. Returns: `tuple[torch.Tensor, torch.Tensor]`: packed quantized weight tensor and scale tensor. """ dtype = weight.dtype assert dtype == torch.float16, "currently qserve only supports fp16." assert scale is not None, "scale tensor is required for quantization." weight = weight.to(dtype=torch.float32) scale = scale.to(dtype=torch.float32, device=weight.device) oc = weight.shape[0] if scale.numel() == 1: scale = scale.view(-1).expand(oc) scale = scale.reshape(oc).contiguous().view(oc, 1) weight = weight.div_(scale).round_() assert weight.min() >= -128, "quantized weight should be greater than or equal to -128." assert weight.max() <= 127, "quantized weight should be less than or equal to 127." weight = weight.contiguous().to(torch.int8) scale = scale.view(oc).to(dtype=dtype) return weight, scale ================================================ FILE: deepcompressor/backend/tinychat/__init__.py ================================================ ================================================ FILE: deepcompressor/backend/tinychat/convert.py ================================================ # -*- coding: utf-8 -*- """QServe state dict converter module.""" import argparse import os import safetensors.torch import torch import tqdm from .utils import convert_to_tinychat_w4x16y16_linear_weight def convert_to_tinychat_w4x16y16_linear_state_dict( param_name: str, weight: torch.Tensor, scale: torch.Tensor, zero: torch.Tensor, zero_pre_scaled: bool = False, ) -> dict[str, torch.Tensor]: """Convert a weight tensor to TinyChat W4-X16-Y16 linear state dictionary. Args: param_name (`str`): parameter name. weight (`torch.Tensor`): weight tensor to be converted. scale (`torch.Tensor`): scale tensor for the weight tensor. zero (`torch.Tensor`): zero point tensor for the weight tensor. zero_pre_scaled (`bool`, *optional*, defaults to `False`): whether zero point tensor is pre-scaled. Returns: `dict[str, torch.Tensor]`: state dictionary for the quantized weight tensor. """ module_name = param_name[:-7] weight, scale, zero = convert_to_tinychat_w4x16y16_linear_weight( weight, scale=scale, zero=zero, zero_pre_scaled=zero_pre_scaled ) state_dict: dict[str, torch.Tensor] = {} state_dict[f"{module_name}.qweight"] = weight.cpu() state_dict[f"{module_name}.scales"] = scale.cpu() state_dict[f"{module_name}.scaled_zeros"] = zero.cpu() return state_dict def convert_to_tinychat_state_dict( state_dict: dict[str, torch.Tensor], scale_dict: dict[str, torch.Tensor] ) -> dict[str, torch.Tensor]: scales: dict[str, dict[tuple[int, ...], torch.Tensor]] = {} zeros: dict[str, tuple[torch.Tensor | None, bool]] = {} print("Loading scale tensors...") for name, tensor in tqdm.tqdm(scale_dict.items(), desc="Loading scale tensors", leave=False, dynamic_ncols=True): print(f" - Loading tensor {name} (dtype: {tensor.dtype}, shape: {tensor.shape}, device: {tensor.device})") if name.endswith("zero"): # this is a zero point tensor zero = None if tensor is None or all(t.item() == 0 for t in tensor.flatten()) else tensor if name.endswith(".scaled_zero"): zeros[name[:-12]] = (zero, False) # zero point tensor is post-scaled else: zeros[name[:-5]] = (zero, True) # zero point tensor is pre-scaled else: assert ".weight.scale" in name # this is a scale tensor idx = name.index(".weight.scale") param_name = name[: idx + 7] scale_level = tuple(map(int, name[idx + 14 :].split("."))) scales.setdefault(param_name, {})[scale_level] = tensor for param_name in zeros.keys(): assert param_name in state_dict, f"zero point tensor {param_name} not found in state dict." assert param_name in scales, f"scale tensor {param_name} not found in scale dict." converted: dict[str, torch.Tensor] = {} print("Converting state dict...") for param_name, param in tqdm.tqdm(state_dict.items(), desc="Converting state dict", dynamic_ncols=True): if param_name in scales: print(f" - Converting {param_name} (dtype: {param.dtype}, shape: {param.shape}, device: {param.device})") weight = param.data.clone() if param_name in zeros: zero, zero_pre_scaled = zeros[param_name] zero = zero.clone() if zero is not None else None else: zero, zero_pre_scaled = None, False level_scales = sorted(scales[param_name].items(), key=lambda x: x[0]) assert len(level_scales) == 1, "more than one scale levels are not supported." scale = level_scales[0][1].clone() converted.update( convert_to_tinychat_w4x16y16_linear_state_dict( param_name, weight, scale=scale, zero=zero, zero_pre_scaled=zero_pre_scaled ) ) else: if isinstance(param, torch.Tensor): print(f" - Copying {param_name} (dtype: {param.dtype}, shape: {param.shape}, device: {param.device})") converted[param_name] = param.clone().cpu() else: print(f" - Copying {param_name} (type: {type(param)}, value: {param})") converted[param_name] = param return converted if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--quant-path", type=str, required=True, help="path to the quantization checkpoint directory.") parser.add_argument("--output-root", type=str, default="", help="root to the output checkpoint directory.") parser.add_argument("--model-name", type=str, default=None, help="model name.") parser.add_argument("--model-path", type=str, default=None, help="path to the huggingface model directory.") parser.add_argument("--copy-on-save", action="store_true", help="copy files on save.") args = parser.parse_args() if not args.output_root: args.output_root = args.quant_path if args.model_name is None: assert args.model_path is not None, "model name or path is required." model_name = args.model_path.rstrip(os.sep).split(os.sep)[-1] print(f"Model name not provided. Using model name {model_name}.") else: model_name = args.model_name state_dict = torch.load( os.path.join(args.quant_path, "model.pt"), map_location="cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu", ) scale_dict = torch.load(os.path.join(args.quant_path, "scale.pt"), map_location="cpu") converted = convert_to_tinychat_state_dict(state_dict, scale_dict) model_name = f"{args.model_name}-w4a16" output_dirpath = os.path.join(args.output_root, model_name) os.makedirs(output_dirpath, exist_ok=True) if args.model_path and os.path.exists(args.model_path): output_path = os.path.join(output_dirpath, "model.safetensors") safetensors.torch.save_file(converted, output_path) print(f"Quantized model checkpoint saved to {output_path}.") for filename in os.listdir(args.model_path): if filename == "tokenizer.model" or ( filename.endswith(".json") and filename != "pytorch_model.bin.index.json" ): filepath = os.path.abspath(os.path.join(args.model_path, filename)) if args.copy_on_save: os.system(f"cp {filepath} {output_dirpath}/") else: os.system(f"ln -s {filepath} {output_dirpath}/{filename}") else: output_path = os.path.join(output_dirpath, "tinychat-v2.pt") torch.save(converted, output_path) print(f"Quantized model checkpoint saved to {output_path}.") print(f"Quantized model saved to {output_dirpath}.") ================================================ FILE: deepcompressor/backend/tinychat/csrc/load.py ================================================ # -*- coding: utf-8 -*- """TinyChat Extension.""" import os from torch.utils.cpp_extension import load __all__ = ["_C"] dirpath = os.path.dirname(__file__) _C = load( name="deepcompressor_tinychat_C", sources=[ f"{dirpath}/pybind.cpp", f"{dirpath}/quantization/gemv/gemv_cuda.cu", f"{dirpath}/quantization/gemm/gemm_cuda.cu", ], extra_cflags=["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++20"], extra_cuda_cflags=[ "-O3", "-std=c++20", "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__", "-U__CUDA_NO_HALF2_OPERATORS__", "-U__CUDA_NO_HALF2_CONVERSIONS__", "-U__CUDA_NO_BFLOAT16_OPERATORS__", "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", "-U__CUDA_NO_BFLOAT162_OPERATORS__", "-U__CUDA_NO_BFLOAT162_CONVERSIONS__", "--expt-relaxed-constexpr", "--expt-extended-lambda", "--use_fast_math", "--ptxas-options=--allow-expensive-optimizations=true", "--threads=8", ], ) ================================================ FILE: deepcompressor/backend/tinychat/csrc/pybind.cpp ================================================ #include #include #include "quantization/gemm/gemm_cuda.h" #include "quantization/gemv/gemv_cuda.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("awq_gemm_forward_cuda", &awq_gemm_forward_cuda, "AWQ quantized GEMM kernel."); m.def("awq_gemv_forward_cuda", &awq_gemv_forward_cuda, "AWQ quantized GEMV kernel."); } ================================================ FILE: deepcompressor/backend/tinychat/csrc/quantization/dequantize.cuh ================================================ /* Modified from NVIDIA FasterTransformer: https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h @article{lin2023awq, title={AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration}, author={Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Dang, Xingyu and Han, Song}, journal={arXiv}, year={2023} } */ #pragma once #include #include template __device__ __forceinline__ void dequantize_s4_to_f16x2(T const &source, uint4 *result); template <> __device__ __forceinline__ void dequantize_s4_to_f16x2(half2 const &source, uint4 *result) { uint32_t *h = reinterpret_cast(result); uint32_t const i4s = reinterpret_cast(source); // First, we extract the i4s and construct an intermediate fp16 number. static constexpr uint32_t immLut = (0xf0 & 0xcc) | 0xaa; static constexpr uint32_t BOTTOM_MASK = 0x000f000f; static constexpr uint32_t TOP_MASK = 0x00f000f0; static constexpr uint32_t I4s_TO_F16s_MAGIC_NUM = 0x64006400; // Note that the entire sequence only requires 1 shift instruction. This is thanks to the register packing // format and the fact that we force our integers to be unsigned, and account for this in the fp16 subtractions. // In addition, I exploit the fact that sub and fma have the same throughput in order to convert elt_23 and // elt_67 to fp16 without having to shift them to the bottom bits before hand. // Shift right by 8 to now consider elt_45 and elt_67. Issue first to hide RAW dependency if we issue // immediately before required. const uint32_t top_i4s = i4s >> 8; // Extract elt_01 - (i4s & 0x000f000f) | 0x64006400 asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[0]) : "r"(i4s), "n"(BOTTOM_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut)); // Extract elt_23 (i4s & 0x00f000f0) | 0x64006400 asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[1]) : "r"(i4s), "n"(TOP_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut)); // Extract elt_45 (top_i4s & 0x000f000f) | 0x64006400 asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[2]) : "r"(top_i4s), "n"(BOTTOM_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut)); // Extract elt_67 (top_i4s & 0x00f000f0) | 0x64006400 asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[3]) : "r"(top_i4s), "n"(TOP_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut)); // I use inline PTX below because I am not sure if the compiler will emit float2half instructions if I use the // half2 ctor. In this case, I chose performance reliability over code readability. // This is the half2 {1032, 1032} represented as an integer. // static constexpr uint32_t FP16_TOP_MAGIC_NUM = 0x64086408; // Haotian: subtract {1024, 1024} instead, we do not need to map to [-8, 7] static constexpr uint32_t FP16_TOP_MAGIC_NUM = 0x64006400; // This is the half2 {1 / 16, 1 / 16} represented as an integer. static constexpr uint32_t ONE_SIXTEENTH = 0x2c002c00; // This is the half2 {-72, -72} represented as an integer. // static constexpr uint32_t NEG_72 = 0xd480d480; // Haotian: Let's use {-64, -64}. static constexpr uint32_t NEG_64 = 0xd400d400; // Finally, we construct the output numbers. // Convert elt_01 asm volatile("sub.f16x2 %0, %1, %2;\n" : "=r"(h[0]) : "r"(h[0]), "r"(FP16_TOP_MAGIC_NUM)); // Convert elt_23 asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[1]) : "r"(h[1]), "r"(ONE_SIXTEENTH), "r"(NEG_64)); // Convert elt_45 asm volatile("sub.f16x2 %0, %1, %2;\n" : "=r"(h[2]) : "r"(h[2]), "r"(FP16_TOP_MAGIC_NUM)); // Convert elt_67 asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE_SIXTEENTH), "r"(NEG_64)); } template <> __device__ __forceinline__ void dequantize_s4_to_f16x2<__nv_bfloat162>(__nv_bfloat162 const &source, uint4 *result) { uint32_t *h = reinterpret_cast(result); uint32_t const source_i4s = reinterpret_cast(source); // First, we extract the i4s and construct an intermediate bf16 number. static constexpr uint32_t immLut = (0xf0 & 0xcc) | 0xaa; static constexpr uint32_t MASK = 0x000f000f; static constexpr uint32_t I4s_TO_BF16s_MAGIC_NUM = 0x43004300; uint32_t i4s = source_i4s; // Extract elt_01 - (i4s & 0x000f000f) | 0x43004300 asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[0]) : "r"(i4s), "n"(MASK), "n"(I4s_TO_BF16s_MAGIC_NUM), "n"(immLut)); // Extract elt_23 (i4s & 0x00f000f0) | 0x43004300 i4s >>= 4; asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[1]) : "r"(i4s), "n"(MASK), "n"(I4s_TO_BF16s_MAGIC_NUM), "n"(immLut)); // Extract elt_45 (top_i4s & 0x000f000f) | 0x43004300 i4s >>= 4; asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[2]) : "r"(i4s), "n"(MASK), "n"(I4s_TO_BF16s_MAGIC_NUM), "n"(immLut)); // Extract elt_67 (top_i4s & 0x00f000f0) | 0x43004300 i4s >>= 4; asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[3]) : "r"(i4s), "n"(MASK), "n"(I4s_TO_BF16s_MAGIC_NUM), "n"(immLut)); // This is the BF16 {-136, -136} represented as an integer. // static constexpr uint32_t BF16_BIAS = 0xC308C308; // This is the BF16 {-128, -128} represented as an integer, we do not need to map to [-8, 7] static constexpr uint32_t NEG_128 = 0xC300C300; static constexpr uint32_t ONE = 0x3F803F80; // Finally, we construct the output numbers. // Convert elt_01 asm volatile("fma.rn.bf16x2 %0, %1, %2, %3;\n" : "=r"(h[0]) : "r"(h[0]), "r"(ONE), "r"(NEG_128)); // Convert elt_23 asm volatile("fma.rn.bf16x2 %0, %1, %2, %3;\n" : "=r"(h[1]) : "r"(h[1]), "r"(ONE), "r"(NEG_128)); // Convert elt_45 asm volatile("fma.rn.bf16x2 %0, %1, %2, %3;\n" : "=r"(h[2]) : "r"(h[2]), "r"(ONE), "r"(NEG_128)); // Convert elt_67 asm volatile("fma.rn.bf16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE), "r"(NEG_128)); } ================================================ FILE: deepcompressor/backend/tinychat/csrc/quantization/gemm/gemm_cuda.cu ================================================ #include #include #include "semaphore.h" #include "gemm_cuda.h" #include "../dequantize.cuh" #include "../../utils.cuh" #include #include #define kInterleave 4 #define OP_M 16 #define OP_N 8 #define OP_K 16 #define INTRIN_M 16 #define INTRIN_N 16 #define INTRIN_K 16 #define WARP_SIZE 32 #define SMEM_PAD_A 0 #define SMEM_PAD_B 0 #define PACK_SIZE 8 #if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 4) #define L2_CACHEHINT(size) ".L2::" #size "B" #else #define L2_CACHEHINT(size) #endif #define KERNEL_LAUNCH_CODE \ int num_mn_tiles = (num_in_feats + CTA_M - 1) / CTA_M * (num_out_channels + CTA_N - 1) / CTA_N; \ torch::Tensor _semaphores = torch::empty({num_mn_tiles}, options_int); \ auto semaphores = reinterpret_cast(_semaphores.data_ptr()); \ constexpr int NUM_WARPS = (CTA_M / WARP_M) * (CTA_N / WARP_N) * (CTA_K / WARP_K); \ constexpr int SCALES_SMEM_SIZE = (G >= CTA_K) ? (CTA_N / (G / CTA_K) * STAGES * 2) : (CTA_N * (CTA_K / G) * STAGES * 2); \ constexpr int kSmemByteSize = (CTA_M * (CTA_K + SMEM_PAD_A) + CTA_N * (CTA_K + SMEM_PAD_B) / kInterleave + SCALES_SMEM_SIZE) * STAGES * sizeof(f16_t); \ if (kSmemByteSize >= 99 * 1024) \ { \ printf("This kernel requires %d Bytes of shared memory, which exceeds device limit.\n", kSmemByteSize); \ return _out_feats; \ } \ int j_factors1 = num_out_channels / CTA_N / 1; \ dim3 num_blocks((num_out_feats + CTA_M - 1) / CTA_M * j_factors1 * SPLITK); \ dim3 threads_per_block(WARP_SIZE, NUM_WARPS); \ auto kernel_func = gemm_w4a16_T1; \ cudaFuncSetAttribute(kernel_func, cudaFuncAttributeMaxDynamicSharedMemorySize, kSmemByteSize); \ kernel_func<<>>( \ in_feats, kernel, scales, zeros, out_feats, semaphores, num_in_feats, num_out_channels, num_in_channels); template __inline__ __host__ __device__ int get_log_tile(int n) { if (N >= 8 && n >= 6) return 3; else if (N >= 4 && n >= 3) return 2; else if (N >= 2 && n >= 2) return 1; else return 0; } __inline__ __device__ uint2 get_block_idx_mapping(int blockIdx_x, int blockIdx_y, int log_tile) { return make_uint2((blockIdx_x >> log_tile), (blockIdx_y << log_tile) + ((blockIdx_x) & ((1 << (log_tile)) - 1))); } template __device__ void sync_slice(int slice_id) { if constexpr (SLICES == 1) { __syncthreads(); } else { constexpr int SLICE_GROUP = (SLICES + 7) / 8; constexpr uint32_t num_threads = NUM_WARPS_MN * WARP_SIZE; const uint32_t barrier_id = slice_id / SLICE_GROUP + 1; asm volatile("bar.sync %0, %1;" : : "r"(barrier_id), "n"(num_threads)); } } __inline__ __device__ uint32_t cast_smem_ptr_to_uint(void const *const ptr) { uint32_t smem_int_ptr; asm("{.reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 %0, smem_ptr; }\n" : "=r"(smem_int_ptr) : "l"(ptr)); return smem_int_ptr; } template __inline__ __device__ void ldmatrix_m8n8_x4_b16(f16_t *shared_warp, int ax0_0, uint32_t addr) { static_assert(std::is_same::value || std::is_same::value, "ldmatrix_m8n8_x4_b16 supports only half or __nv_bfloat16 types."); __asm__ __volatile__( "ldmatrix.sync.aligned.m8n8.x4.shared.b16" "{%0, %1, %2, %3}, [%4];" : "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[0]), "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[1]), "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[2]), "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[3]) : "r"(addr)); } template __inline__ __device__ void ldmatrix_m8n8_x4_trans_b16(f16_t *shared_warp, int ax0_0, uint32_t addr) { static_assert(std::is_same::value || std::is_same::value, "ldmatrix_m8n8_x4_trans_b16 supports only half or __nv_bfloat16 types."); __asm__ __volatile__( "ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16" "{%0, %1, %2, %3}, [%4];" : "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[0]), "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[1]), "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[2]), "=r"(((unsigned *)(shared_warp + (ax0_0 * 8)))[3]) : "r"(addr)); } __inline__ __device__ void cp_async_cg_A(uint32_t smem_int_ptr, const uint4 *__restrict__ src, bool mask) { const int cp_size = 16; asm volatile("{" " .reg .pred p;" " setp.ne.b32 p, %0, 0;" " @p cp.async.cg.shared.global" L2_CACHEHINT(128) " [%1], [%2], %3;" "}" ::"r"((int)mask), "r"(smem_int_ptr), "l"(src), "n"(cp_size)); } template __device__ __inline__ void mma_m16n8k16(float *C_warp, f16_t *A_shared_warp, f16_t *B_shared_warp); template <> __device__ __inline__ void mma_m16n8k16(float *C_warp, half *A_shared_warp, half *B_shared_warp) { __asm__ __volatile__( "mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32" "{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%10, %11, %12, %13};" : "=f"(((float *)C_warp)[0]), "=f"(((float *)C_warp)[1]), "=f"(((float *)C_warp)[2]), "=f"(((float *)C_warp)[3]) : "r"(((unsigned *)A_shared_warp)[0]), "r"(((unsigned *)A_shared_warp)[1]), "r"(((unsigned *)A_shared_warp)[2]), "r"(((unsigned *)A_shared_warp)[3]), "r"(((unsigned *)B_shared_warp)[0]), "r"(((unsigned *)B_shared_warp)[1]), "f"(((float *)C_warp)[0]), "f"(((float *)C_warp)[1]), "f"(((float *)C_warp)[2]), "f"(((float *)C_warp)[3])); } template <> __device__ __inline__ void mma_m16n8k16<__nv_bfloat16>(float *C_warp, __nv_bfloat16 *A_shared_warp, __nv_bfloat16 *B_shared_warp) { __asm__ __volatile__( "mma.sync.aligned.m16n8k16.row.col.f32.bf16.bf16.f32" "{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%10, %11, %12, %13};" : "=f"(((float *)C_warp)[0]), "=f"(((float *)C_warp)[1]), "=f"(((float *)C_warp)[2]), "=f"(((float *)C_warp)[3]) : "r"(((unsigned *)A_shared_warp)[0]), "r"(((unsigned *)A_shared_warp)[1]), "r"(((unsigned *)A_shared_warp)[2]), "r"(((unsigned *)A_shared_warp)[3]), "r"(((unsigned *)B_shared_warp)[0]), "r"(((unsigned *)B_shared_warp)[1]), "f"(((float *)C_warp)[0]), "f"(((float *)C_warp)[1]), "f"(((float *)C_warp)[2]), "f"(((float *)C_warp)[3])); } template __device__ __inline__ void global_to_share_one_stage_A(f16_t *src, f16_t *dst, int global_nrows, int global_ncols, int cta_offset_m, int cta_offset_n, int cta_offset_k, int global_iter_k, int shared_iter_k, bool mask) { constexpr int threads_needed = (CTA_M * CTA_K) / PACK_SIZE / SHARED_K_ITERS; constexpr int threads_used = threads_needed < CTA_SIZE ? threads_needed : CTA_SIZE; constexpr int total_global_iters = (CTA_M * CTA_K) / PACK_SIZE / threads_used; constexpr int partial_global_iters = (total_global_iters + SHARED_K_ITERS - 1) / SHARED_K_ITERS; constexpr int cta_step_m_or_n = (threads_used * PACK_SIZE) / CTA_K; constexpr int warp_step_m_or_n = (WARP_SIZE * PACK_SIZE) / CTA_K; constexpr int threads_per_row = CTA_K / PACK_SIZE; constexpr int kSmemCol = CTA_K + SMEM_PAD_A; bool local_mask = mask & (threadIdx.y * WARP_SIZE + threadIdx.x < threads_used); int ld_col = (threadIdx.x % threads_per_row); #pragma unroll for (int _global_iter = 0; _global_iter < partial_global_iters; ++_global_iter) { int global_iter = shared_iter_k * partial_global_iters + _global_iter; int ld_row = global_iter * cta_step_m_or_n + threadIdx.y * warp_step_m_or_n + (threadIdx.x / threads_per_row); int ld_col_swizzled = (ld_col ^ (ld_row) & 7) * PACK_SIZE; void *dst_ptr = (void *)(dst + ld_row * kSmemCol + ld_col_swizzled); uint4 *src_ptr = (uint4 *)(src + (ld_row + cta_offset_m) * global_ncols + ld_col * PACK_SIZE + global_iter_k * CTA_K + cta_offset_k); // cta_offset_m * global_ncols + global_iter * cta_step_m_or_n * global_ncols + threadIdx.y * warp_step_m_or_n * global_ncols + (threadIdx.x / threads_per_row) * global_ncols + global_iter_k * CTA_K + (threadIdx.x % threads_per_row) * PACK_SIZE); if constexpr (STAGES > 1) { uint32_t addr = cast_smem_ptr_to_uint(dst_ptr); cp_async_cg_A(addr, src_ptr, local_mask & (ld_row + cta_offset_m < global_nrows)); } else { if (local_mask & (ld_row + cta_offset_m < global_nrows)) *(uint4 *)dst_ptr = *src_ptr; } } } template __device__ __inline__ void global_to_share_one_stage_B(f16_t *src, f16_t *dst, int global_ncols, int cta_offset_m, int cta_offset_n, int cta_offset_k, int global_iter_k, int shared_iter_k, bool mask) { constexpr int threads_needed = (CTA_N / kInterleave * CTA_K) / PACK_SIZE / SHARED_K_ITERS; constexpr int threads_used = threads_needed < CTA_SIZE ? threads_needed : CTA_SIZE; constexpr int total_global_iters = (CTA_N / kInterleave * CTA_K) / PACK_SIZE / threads_used; constexpr int partial_global_iters = (total_global_iters + SHARED_K_ITERS - 1) / SHARED_K_ITERS; constexpr int cta_step_m_or_n = (threads_used * PACK_SIZE) / CTA_K; constexpr int warp_step_m_or_n = (WARP_SIZE * PACK_SIZE) / CTA_K; constexpr int threads_per_row = CTA_K / PACK_SIZE; constexpr int kSmemCol = CTA_K + SMEM_PAD_B; bool local_mask = mask & (threadIdx.y * WARP_SIZE + threadIdx.x < threads_used); #pragma unroll for (int _global_iter = 0; _global_iter < partial_global_iters; ++_global_iter) { int global_iter = shared_iter_k * partial_global_iters + _global_iter; int ld_row = global_iter * cta_step_m_or_n + threadIdx.y * warp_step_m_or_n + (threadIdx.x / threads_per_row); int ld_col = (threadIdx.x % threads_per_row); int ld_col_swizzled = ld_col ^ (ld_row % 2) & 7; void *dst_ptr = (void *)(dst + (ld_row * kSmemCol + ld_col_swizzled * PACK_SIZE)); uint4 *src_ptr = (uint4 *)(src + global_iter_k * CTA_K + cta_offset_n / kInterleave * global_ncols + ld_row * global_ncols + ld_col * PACK_SIZE + cta_offset_k); if constexpr (STAGES > 1) { uint32_t addr = cast_smem_ptr_to_uint(dst_ptr); cp_async_cg_A(addr, src_ptr, local_mask); } else { if (local_mask) *(uint4 *)dst_ptr = *src_ptr; } } } template __device__ __inline__ void global_to_share_one_stage_scales(f16_t *src, f16_t *dst, f16_t *src_z, f16_t *dst_z, int global_ncols, int cta_offset_m, int cta_offset_n, int cta_offset_k, int global_iter_k, int shared_iter_k, bool mask) { constexpr int LD_AMOUNT = (G >= CTA_K) ? CTA_N : CTA_N * CTA_K / G; constexpr int threads_needed = LD_AMOUNT / PACK_SIZE / 1; constexpr int threads_used = threads_needed < CTA_SIZE ? threads_needed : CTA_SIZE; constexpr int total_global_iters = LD_AMOUNT / PACK_SIZE / threads_used; constexpr int threads_per_row = CTA_N / PACK_SIZE; constexpr int kSmemCol = CTA_N; bool local_mask = mask & (threadIdx.y * WARP_SIZE + threadIdx.x < threads_used); int g_idx = (cta_offset_k + global_iter_k * CTA_K) / G; void *dst_ptr = (void *)(dst + (threadIdx.x / threads_per_row) * kSmemCol + (threadIdx.x % threads_per_row) * PACK_SIZE); uint4 *src_ptr = (uint4 *)(src + g_idx * global_ncols + cta_offset_n + (threadIdx.x / threads_per_row) * global_ncols + (threadIdx.x % threads_per_row) * PACK_SIZE); void *dst_ptr_z = (void *)(dst_z + (threadIdx.x / threads_per_row) * kSmemCol + (threadIdx.x % threads_per_row) * PACK_SIZE); uint4 *src_ptr_z = (uint4 *)(src_z + g_idx * global_ncols + cta_offset_n + (threadIdx.x / threads_per_row) * global_ncols + (threadIdx.x % threads_per_row) * PACK_SIZE); if (STAGES > 1) { uint32_t addr = cast_smem_ptr_to_uint(dst_ptr); cp_async_cg_A(addr, src_ptr, local_mask); uint32_t addr_z = cast_smem_ptr_to_uint(dst_ptr_z); cp_async_cg_A(addr_z, src_ptr_z, local_mask); } else { if (local_mask) { *(uint4 *)dst_ptr = *src_ptr; *(uint4 *)dst_ptr_z = *src_ptr_z; } } } template __device__ __inline__ void share_to_reg_one_stage_A(f16_t *src, f16_t *dst, int warp_offset_m, int warp_offset_n, int warp_offset_k, int k_0_1) { constexpr int kSmemCol = CTA_K + SMEM_PAD_A; for (int shared_iter = 0; shared_iter < shared_iters; ++shared_iter) { int ld_row = warp_offset_m + shared_iter * OP_M + (threadIdx.x % 16); int ld_col = k_0_1 * 16 + (threadIdx.x / 16) * 8 + warp_offset_k; int ld_col_swizzled = ((ld_col / PACK_SIZE) ^ (ld_row) & 7) * PACK_SIZE; void *addr_ptr = (void *)(src + ld_row * kSmemCol + ld_col_swizzled); uint32_t addr = cast_smem_ptr_to_uint(addr_ptr); ldmatrix_m8n8_x4_b16(dst, shared_iter, addr); } } template __device__ __inline__ void share_to_reg_one_stage_B(f16_t *src, f16_t *src_scales, f16_t *src_zeros, f16_t *dst, f16_t *dst_fp16, int warp_offset_m, int warp_offset_n, int warp_offset_k, int k_0_1) { using f162_t = typename packed_as::type; constexpr int kSmemCol = CTA_K + SMEM_PAD_B; int r0 = ((threadIdx.x / 8 / 2) * 8 + threadIdx.x % 8); int c0 = ((threadIdx.x / 8) % 2) * 8; int r = r0 / 4; int c = (r0 % 4) * 16 + c0; int c_swizzled = ((c / PACK_SIZE) ^ (r % 2) & 7) * PACK_SIZE; if constexpr (ldmatrix) { #pragma unroll for (int shared_iter = 0; shared_iter < shared_iters; ++shared_iter) { void *addr_ptr = (void *)(src + warp_offset_n / kInterleave * kSmemCol + shared_iter * 16 / kInterleave * kSmemCol + k_0_1 * 16 + r * kSmemCol + c_swizzled + warp_offset_k); uint32_t addr = cast_smem_ptr_to_uint(addr_ptr); ldmatrix_m8n8_x4_b16(dst, shared_iter, addr); } } #pragma unroll for (int shared_iter = 0; shared_iter < shared_iters; ++shared_iter) { f16_t scale = src_scales[(warp_offset_k / G) * CTA_N + warp_offset_n + 16 * shared_iter + 8 * (k_0_1 % 2) + threadIdx.x / 4]; f16_t zero = src_zeros[(warp_offset_k / G) * CTA_N + warp_offset_n + 16 * shared_iter + 8 * (k_0_1 % 2) + threadIdx.x / 4]; f162_t scale2 = f162f162(scale); f162_t zero2 = f162f162(zero); f162_t loaded[4]; dequantize_s4_to_f16x2(*reinterpret_cast(dst + (k_0_1 % 2) * 4 + (k_0_1 / 2 * 2) + shared_iter * 8), reinterpret_cast(loaded)); #pragma unroll for (int i = 0; i < 4; i++) { loaded[i] = __hfma2(loaded[i], scale2, zero2); } *reinterpret_cast(dst_fp16 + shared_iter * 16 + 8 * (k_0_1 % 2)) = *reinterpret_cast(loaded); } } template __global__ void gemm_w4a16_T1(f16_t *__restrict__ A, f16_t *__restrict__ B, f16_t *__restrict__ scales, f16_t *__restrict__ zeros, f16_t *__restrict__ C, int *__restrict__ semaphores, int M, int N, int K) { using f162_t = typename packed_as::type; constexpr int NUM_WARPS_MN = CTA_M / WARP_M * CTA_N / WARP_N; constexpr int NUM_WARPS = NUM_WARPS_MN * CTA_K / WARP_K; constexpr int CTA_SIZE = NUM_WARPS * WARP_SIZE; constexpr int CTA_SIZE_MN = NUM_WARPS_MN * WARP_SIZE; constexpr int SLICES = CTA_K / WARP_K; int num_blocks_n = (N + CTA_N - 1) / CTA_N; int num_blocks_m = (M + CTA_M - 1) / CTA_M; int blockIdx_x = 0; int blockIdx_y = blockIdx.x % (num_blocks_m * num_blocks_n); int blockIdx_z = blockIdx.x / (num_blocks_m * num_blocks_n); const int log_tile = get_log_tile<1>((N + CTA_N - 1) / CTA_N); int blockIdx_m = blockIdx_y / (num_blocks_n >> log_tile); int blockIdx_n = blockIdx_y % (num_blocks_n >> log_tile); const uint2 block_idx_mapping = get_block_idx_mapping(blockIdx_m, blockIdx_n, log_tile); blockIdx_m = block_idx_mapping.x; blockIdx_n = block_idx_mapping.y; float C_warp[CTA_M * CTA_N / CTA_SIZE_MN]; constexpr int kSmemPadKA = CTA_K + SMEM_PAD_A; constexpr int kSmemPadKB = CTA_K + SMEM_PAD_B; constexpr int kSmemSizeAPerStage = CTA_M * kSmemPadKA; constexpr int kSmemSizeBPerStage = CTA_N / kInterleave * kSmemPadKB; constexpr int kSmemSizeA = kSmemSizeAPerStage * STAGES; constexpr int kSmemSizeB = kSmemSizeBPerStage * STAGES; constexpr int scales_load_interval = G >= CTA_K ? G / CTA_K : 1; constexpr int scales_per_load = G < CTA_K ? CTA_K / G : 1; constexpr int kSmemSizeScales = CTA_N * STAGES / scales_load_interval * scales_per_load; constexpr int kSmemSizeZeros = CTA_N * STAGES / scales_load_interval * scales_per_load; extern __shared__ half mem_shared[]; f16_t *A_shared = reinterpret_cast(mem_shared); f16_t *B_shared = reinterpret_cast(mem_shared + kSmemSizeA); f16_t *scales_shared = reinterpret_cast(mem_shared + kSmemSizeA + kSmemSizeB); f16_t *zeros_shared = reinterpret_cast(mem_shared + kSmemSizeA + kSmemSizeB + kSmemSizeScales); float *C_shared = reinterpret_cast(mem_shared); f16_t A_shared_warp_[2][WARP_M * INTRIN_K / WARP_SIZE]; f16_t B_shared_warp_[2][WARP_N * 32 / WARP_SIZE]; f16_t B_shared_warp_tmp_[2][WARP_N * 16 / WARP_SIZE]; int cta_offset_m = blockIdx_m * CTA_M; int cta_offset_n = blockIdx_n * CTA_N; int cta_offset_k = blockIdx_z * (K / SPLITK); int warp_mn = threadIdx.y % NUM_WARPS_MN; int slice_id = threadIdx.y / NUM_WARPS_MN; int warp_offset_n = (warp_mn % (CTA_N / WARP_N)) * WARP_N; int warp_offset_m = (warp_mn / (CTA_N / WARP_N)) * WARP_M; int warp_offset_k = slice_id * WARP_K; for (int i = 0; i < CTA_M * CTA_N / CTA_SIZE_MN; i++) C_warp[i] = 0.0; int gemm_iters = (K + CTA_K - 1) / CTA_K / SPLITK; int k_0_0_ld = 0; int k_0_0 = 0; constexpr int prologue_stages = STAGES == 1 ? 1 : STAGES - 1; #pragma unroll for (k_0_0_ld = 0; k_0_0_ld < prologue_stages; ++k_0_0_ld) { global_to_share_one_stage_A(A, A_shared + k_0_0_ld * kSmemSizeAPerStage, M, K, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, 0, true); global_to_share_one_stage_B(B, B_shared + k_0_0_ld * kSmemSizeBPerStage, K, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, 0, true); global_to_share_one_stage_scales( scales, scales_shared + (k_0_0_ld / scales_load_interval * scales_per_load) * CTA_N, zeros, zeros_shared + (k_0_0_ld / scales_load_interval * scales_per_load) * CTA_N, N, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, 0, k_0_0_ld < gemm_iters && k_0_0_ld % scales_load_interval == 0); if constexpr (STAGES > 1) __pipeline_commit(); } if constexpr (STAGES > 1) __pipeline_wait_prior(STAGES - 2); __syncthreads(); share_to_reg_one_stage_A(A_shared, A_shared_warp_[0], warp_offset_m, warp_offset_n, warp_offset_k, 0); share_to_reg_one_stage_B(B_shared, scales_shared, zeros_shared, B_shared_warp_tmp_[0], B_shared_warp_[0], warp_offset_m, warp_offset_n, warp_offset_k, 0); constexpr int SHARED_K_ITERS = WARP_K / INTRIN_K; for (; k_0_0 < gemm_iters; ++k_0_0, ++k_0_0_ld) { int ld_stage = k_0_0_ld % STAGES; int compute_stage = k_0_0 % STAGES; f16_t *A_shared_this_compute_stage; f16_t *B_shared_this_compute_stage; f16_t *scales_shared_this_compute_stage; f16_t *zeros_shared_this_compute_stage; #pragma unroll for (int iter_k = 0; iter_k < SHARED_K_ITERS; ++iter_k) { A_shared_this_compute_stage = A_shared + compute_stage * kSmemSizeAPerStage; B_shared_this_compute_stage = B_shared + compute_stage * kSmemSizeBPerStage; scales_shared_this_compute_stage = scales_shared + (compute_stage / scales_load_interval * scales_per_load) * CTA_N; zeros_shared_this_compute_stage = zeros_shared + (compute_stage / scales_load_interval * scales_per_load) * CTA_N; share_to_reg_one_stage_A(A_shared_this_compute_stage, A_shared_warp_[(iter_k + 1) % 2], warp_offset_m, warp_offset_n, warp_offset_k, (iter_k + 1) % SHARED_K_ITERS); if ((iter_k + 1) % kInterleave == 0) { if (compute_stage % 2 == 1) { share_to_reg_one_stage_B( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[1], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, warp_offset_k, (iter_k + 1) % SHARED_K_ITERS); } else { share_to_reg_one_stage_B( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[0], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, warp_offset_k, (iter_k + 1) % SHARED_K_ITERS); } } else { if (compute_stage % 2 == 1) { share_to_reg_one_stage_B( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[1], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, warp_offset_k, (iter_k + 1) % SHARED_K_ITERS); } else { share_to_reg_one_stage_B( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[0], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, warp_offset_k, (iter_k + 1) % SHARED_K_ITERS); } } f16_t *A_shared_warp = A_shared_warp_[iter_k % 2]; f16_t *B_shared_warp = B_shared_warp_[(iter_k / 2) % 2]; for (int i_0_3 = 0; i_0_3 < WARP_M / INTRIN_M; ++i_0_3) { for (int j_0_4 = 0; j_0_4 < WARP_N / INTRIN_N; ++j_0_4) { mma_m16n8k16(C_warp + i_0_3 * WARP_N / INTRIN_N * 8 + j_0_4 * 8, A_shared_warp + i_0_3 * 8, B_shared_warp + j_0_4 * 16 + (iter_k % 2) * 4); mma_m16n8k16(C_warp + i_0_3 * WARP_N / INTRIN_N * 8 + j_0_4 * 8 + 4, A_shared_warp + i_0_3 * 8, B_shared_warp + j_0_4 * 16 + (iter_k % 2) * 4 + 8); } } if (iter_k < WARP_K / INTRIN_K - 1) { if constexpr (STAGES == 1) __syncthreads(); global_to_share_one_stage_A(A, A_shared + ld_stage * kSmemSizeAPerStage, M, K, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, iter_k, k_0_0_ld < gemm_iters); global_to_share_one_stage_B(B, B_shared + ld_stage * kSmemSizeBPerStage, K, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, iter_k, k_0_0_ld < gemm_iters); } if (iter_k == WARP_K / INTRIN_K - 2) { if constexpr (STAGES == 1 && WARP_K / INTRIN_K > 2) { __syncthreads(); } global_to_share_one_stage_A(A, A_shared + ld_stage * kSmemSizeAPerStage, M, K, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, iter_k + 1, k_0_0_ld < gemm_iters); global_to_share_one_stage_B(B, B_shared + ld_stage * kSmemSizeBPerStage, K, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, iter_k + 1, k_0_0_ld < gemm_iters); global_to_share_one_stage_scales( scales, scales_shared + (ld_stage / scales_load_interval * scales_per_load) * CTA_N, zeros, zeros_shared + (ld_stage / scales_load_interval * scales_per_load) * CTA_N, N, cta_offset_m, cta_offset_n, cta_offset_k, k_0_0_ld, iter_k, k_0_0_ld < gemm_iters && k_0_0_ld % scales_load_interval == 0); if constexpr (STAGES > 1) { __pipeline_commit(); __pipeline_wait_prior(STAGES - 2); } compute_stage = (k_0_0 + 1) % STAGES; __syncthreads(); } } } __pipeline_commit(); __pipeline_wait_prior(0); __syncthreads(); if constexpr (SLICES > 1) { #pragma unroll for (int z = 0; z < SLICES; ++z) { if (slice_id == z) { #pragma unroll for (int ax0_0_1 = 0; ax0_0_1 < WARP_M / INTRIN_M; ++ax0_0_1) { #pragma unroll for (int ax1_0_1 = 0; ax1_0_1 < WARP_N / INTRIN_N; ++ax1_0_1) { #pragma unroll for (int local_id = 0; local_id < OP_M * 16 / WARP_SIZE; ++local_id) { if (z > 0) { C_warp[ax0_0_1 * WARP_N / INTRIN_N * 8 + ax1_0_1 * 8 + local_id] += C_shared[warp_offset_m * CTA_N + ax0_0_1 * OP_M * CTA_N + warp_offset_n + ax1_0_1 * 16 + ((local_id % 4) / 2 * 8 + (threadIdx.x / 4)) * CTA_N + (local_id / 4) * 8 + (local_id % 2) + (threadIdx.x % 4) * 2]; } C_shared[warp_offset_m * CTA_N + ax0_0_1 * OP_M * CTA_N + warp_offset_n + ax1_0_1 * 16 + ((local_id % 4) / 2 * 8 + (threadIdx.x / 4)) * CTA_N + (local_id / 4) * 8 + (local_id % 2) + (threadIdx.x % 4) * 2] = C_warp[ax0_0_1 * WARP_N / INTRIN_N * 8 + ax1_0_1 * 8 + local_id]; }; } } } __syncthreads(); } if (slice_id == 0) { #pragma unroll for (int ax0_0_1 = 0; ax0_0_1 < WARP_M / INTRIN_M; ++ax0_0_1) { #pragma unroll for (int ax1_0_1 = 0; ax1_0_1 < WARP_N / INTRIN_N; ++ax1_0_1) { #pragma unroll for (int local_id = 0; local_id < OP_M * 16 / WARP_SIZE; ++local_id) { C_warp[ax0_0_1 * WARP_N / INTRIN_N * 8 + ax1_0_1 * 8 + local_id] = C_shared[warp_offset_m * CTA_N + ax0_0_1 * OP_M * CTA_N + warp_offset_n + ax1_0_1 * 16 + ((local_id % 4) / 2 * 8 + (threadIdx.x / 4)) * CTA_N + (local_id / 4) * 8 + (local_id % 2) + (threadIdx.x % 4) * 2]; }; } } } } if (slice_id == 0) { Semaphore semaphore(semaphores + blockIdx_y, threadIdx.x); if constexpr (SPLITK > 1) { semaphore.fetch(); } if (blockIdx_z != 0) { semaphore.wait(blockIdx_z); for (int ax0_0_1 = 0; ax0_0_1 < WARP_M / INTRIN_M; ++ax0_0_1) { for (int ax1_0_1 = 0; ax1_0_1 < WARP_N / INTRIN_N; ++ax1_0_1) { for (int local_id = 0; local_id < OP_M * 16 / WARP_SIZE; local_id += 2) { int write_row = cta_offset_m + warp_offset_m + ax0_0_1 * OP_M + ((local_id % 4) / 2 * 8 + (threadIdx.x / 4)); if (write_row < M) { f162_t *existing_psum_ptr = reinterpret_cast( C + write_row * N + cta_offset_n + warp_offset_n + ax1_0_1 * 16 + (local_id / 4) * 8 + (local_id % 2) + (threadIdx.x % 4) * 2); *existing_psum_ptr = __hadd2( *existing_psum_ptr, cuda_cast(*reinterpret_cast( C_warp + ax0_0_1 * WARP_N / INTRIN_N * 8 + ax1_0_1 * 8 + local_id))); } }; } } } else { for (int ax0_0_1 = 0; ax0_0_1 < WARP_M / INTRIN_M; ++ax0_0_1) { for (int ax1_0_1 = 0; ax1_0_1 < WARP_N / INTRIN_N; ++ax1_0_1) { for (int local_id = 0; local_id < OP_M * 16 / WARP_SIZE; local_id += 2) { int write_row = cta_offset_m + warp_offset_m + ax0_0_1 * OP_M + ((local_id % 4) / 2 * 8 + (threadIdx.x / 4)); if (write_row < M) { *reinterpret_cast( C + write_row * N + cta_offset_n + warp_offset_n + ax1_0_1 * 16 + (local_id / 4) * 8 + (local_id % 2) + (threadIdx.x % 4) * 2) = cuda_cast(*reinterpret_cast(C_warp + ax0_0_1 * WARP_N / INTRIN_N * 8 + ax1_0_1 * 8 + local_id)); } }; } } } if constexpr (SPLITK > 1) { int lock = 0; if (SPLITK == blockIdx_z + 1) { lock = 0; } else { lock = blockIdx_z + 1; } semaphore.release(lock); } } } template __device__ __inline__ void global_to_share_one_stage_A_T2(f16_t *src, f16_t *dst, int global_nrows, int global_ncols, int cta_offset_m, int cta_offset_n, int global_iter_k, int shared_iter_k, bool mask) { constexpr int threads_needed = (CTA_M * CTA_K) / PACK_SIZE / SHARED_K_ITERS; constexpr int threads_used = threads_needed < CTA_SIZE ? threads_needed : CTA_SIZE; constexpr int total_global_iters = (CTA_M * CTA_K) / PACK_SIZE / threads_used; constexpr int partial_global_iters = (total_global_iters + SHARED_K_ITERS - 1) / SHARED_K_ITERS; constexpr int cta_step_m_or_n = (threads_used * PACK_SIZE) / CTA_K; constexpr int warp_step_m_or_n = (WARP_SIZE * PACK_SIZE) / CTA_K; constexpr int threads_per_row = CTA_K / PACK_SIZE; constexpr int kSmemCol = CTA_K + SMEM_PAD_A; bool local_mask = mask & (threadIdx.y * WARP_SIZE + threadIdx.x < threads_used); int ld_col = (threadIdx.x % threads_per_row); #pragma unroll for (int _global_iter = 0; _global_iter < partial_global_iters; ++_global_iter) { int global_iter = shared_iter_k * partial_global_iters + _global_iter; int ld_row = global_iter * cta_step_m_or_n + threadIdx.y * warp_step_m_or_n + (threadIdx.x / threads_per_row); int ld_col_swizzled = (ld_col ^ (ld_row) & 7) * PACK_SIZE; void *dst_ptr = (void *)(dst + ld_row * kSmemCol + ld_col_swizzled); uint4 *src_ptr = (uint4 *)(src + (ld_row + cta_offset_m) * global_ncols + ld_col * PACK_SIZE + global_iter_k * CTA_K); // cta_offset_m * global_ncols + global_iter * cta_step_m_or_n * global_ncols + threadIdx.y * warp_step_m_or_n * global_ncols + (threadIdx.x / threads_per_row) * global_ncols + global_iter_k * CTA_K + (threadIdx.x % threads_per_row) * PACK_SIZE); if constexpr (STAGES > 1) { uint32_t addr = cast_smem_ptr_to_uint(dst_ptr); cp_async_cg_A(addr, src_ptr, local_mask & (ld_row + cta_offset_m < global_nrows)); } else { if (local_mask & (ld_row + cta_offset_m < global_nrows)) *(uint4 *)dst_ptr = *src_ptr; } } } template __device__ __inline__ void global_to_share_one_stage_B_T2(f16_t *src, f16_t *dst, int global_ncols, int cta_offset_m, int cta_offset_n, int global_iter_k, int shared_iter_k, bool mask) { constexpr int threads_needed = (CTA_N / kInterleave * CTA_K) / PACK_SIZE / SHARED_K_ITERS; constexpr int threads_used = threads_needed < CTA_SIZE ? threads_needed : CTA_SIZE; constexpr int total_global_iters = (CTA_N / kInterleave * CTA_K) / PACK_SIZE / threads_used; constexpr int partial_global_iters = (total_global_iters + SHARED_K_ITERS - 1) / SHARED_K_ITERS; constexpr int cta_step_m_or_n = (threads_used * PACK_SIZE) / CTA_K; constexpr int warp_step_m_or_n = (WARP_SIZE * PACK_SIZE) / CTA_K; constexpr int threads_per_row = CTA_K / PACK_SIZE; constexpr int kSmemCol = CTA_K + SMEM_PAD_B; bool local_mask = mask & (threadIdx.y * WARP_SIZE + threadIdx.x < threads_used); #pragma unroll for (int _global_iter = 0; _global_iter < partial_global_iters; ++_global_iter) { int global_iter = shared_iter_k * partial_global_iters + _global_iter; int ld_row = global_iter * cta_step_m_or_n + threadIdx.y * warp_step_m_or_n + (threadIdx.x / threads_per_row); int ld_col = (threadIdx.x % threads_per_row); int ld_col_swizzled = ld_col ^ (ld_row % 2) & 7; void *dst_ptr = (void *)(dst + (ld_row * kSmemCol + ld_col_swizzled * PACK_SIZE)); uint4 *src_ptr = (uint4 *)(src + global_iter_k * CTA_K + cta_offset_n / kInterleave * global_ncols + ld_row * global_ncols + ld_col * PACK_SIZE); if constexpr (STAGES > 1) { uint32_t addr = cast_smem_ptr_to_uint(dst_ptr); cp_async_cg_A(addr, src_ptr, local_mask); } else { if (local_mask) *(uint4 *)dst_ptr = *src_ptr; } } } template __device__ __inline__ void global_to_share_one_stage_scales_T2(f16_t *src, f16_t *dst, f16_t *src_z, f16_t *dst_z, int global_ncols, int cta_offset_m, int cta_offset_n, int global_iter_k, int shared_iter_k, bool mask) { constexpr int threads_needed = CTA_N / PACK_SIZE / 1; constexpr int threads_used = threads_needed < CTA_SIZE ? threads_needed : CTA_SIZE; constexpr int total_global_iters = CTA_N / PACK_SIZE / threads_used; constexpr int threads_per_row = CTA_N / PACK_SIZE; constexpr int kSmemCol = CTA_N; bool local_mask = mask & (threadIdx.y * WARP_SIZE + threadIdx.x < threads_used); int g_idx = global_iter_k * CTA_K / G; void *dst_ptr = (void *)(dst + (threadIdx.x % threads_per_row) * PACK_SIZE); uint4 *src_ptr = (uint4 *)(src + g_idx * global_ncols + cta_offset_n + (threadIdx.x % threads_per_row) * PACK_SIZE); void *dst_ptr_z = (void *)(dst_z + (threadIdx.x % threads_per_row) * PACK_SIZE); uint4 *src_ptr_z = (uint4 *)(src_z + g_idx * global_ncols + cta_offset_n + (threadIdx.x % threads_per_row) * PACK_SIZE); if (STAGES > 1) { uint32_t addr = cast_smem_ptr_to_uint(dst_ptr); cp_async_cg_A(addr, src_ptr, local_mask); uint32_t addr_z = cast_smem_ptr_to_uint(dst_ptr_z); cp_async_cg_A(addr_z, src_ptr_z, local_mask); } else { if (local_mask) { *(uint4 *)dst_ptr = *src_ptr; *(uint4 *)dst_ptr_z = *src_ptr_z; } } } template __device__ __inline__ void share_to_reg_one_stage_A_T2(f16_t *src, f16_t *dst, int warp_offset_m, int warp_offset_n, int k_0_1) { constexpr int kSmemCol = CTA_K + SMEM_PAD_A; for (int shared_iter = 0; shared_iter < shared_iters; ++shared_iter) { int ld_row = warp_offset_m + shared_iter * OP_M + (threadIdx.x % 16); int ld_col = k_0_1 * 16 + (threadIdx.x / 16) * 8; int ld_col_swizzled = ((ld_col / PACK_SIZE) ^ (ld_row) & 7) * PACK_SIZE; void *addr_ptr = (void *)(src + ld_row * kSmemCol + ld_col_swizzled); uint32_t addr = cast_smem_ptr_to_uint(addr_ptr); ldmatrix_m8n8_x4_b16(dst, shared_iter, addr); } } template __device__ __inline__ void share_to_reg_one_stage_B_T2(f16_t *src, f16_t *src_scales, f16_t *src_zeros, f16_t *dst, f16_t *dst_fp16, int warp_offset_m, int warp_offset_n, int k_0_1) { using f162_t = typename packed_as::type; constexpr int kSmemCol = CTA_K + SMEM_PAD_B; int r0 = ((threadIdx.x / 8 / 2) * 8 + threadIdx.x % 8); int c0 = ((threadIdx.x / 8) % 2) * 8; int r = r0 / 4; int c = (r0 % 4) * 16 + c0; int c_swizzled = ((c / PACK_SIZE) ^ (r % 2) & 7) * PACK_SIZE; if constexpr (ldmatrix) { #pragma unroll for (int shared_iter = 0; shared_iter < shared_iters; ++shared_iter) { void *addr_ptr = (void *)(src + warp_offset_n / kInterleave * kSmemCol + shared_iter * 16 / kInterleave * kSmemCol + k_0_1 * 16 + r * kSmemCol + c_swizzled); uint32_t addr = cast_smem_ptr_to_uint(addr_ptr); ldmatrix_m8n8_x4_b16(dst, shared_iter, addr); } } #pragma unroll for (int shared_iter = 0; shared_iter < shared_iters; ++shared_iter) { f16_t scale = src_scales[warp_offset_n + 16 * shared_iter + 8 * (k_0_1 % 2) + threadIdx.x / 4]; f16_t zero = src_zeros[warp_offset_n + 16 * shared_iter + 8 * (k_0_1 % 2) + threadIdx.x / 4]; f162_t scale2 = f162f162(scale); f162_t zero2 = f162f162(zero); f162_t loaded[4]; dequantize_s4_to_f16x2(*reinterpret_cast(dst + (k_0_1 % 2) * 4 + (k_0_1 / 2 * 2) + shared_iter * 8), reinterpret_cast(loaded)); #pragma unroll for (int i = 0; i < 4; i++) { loaded[i] = __hfma2(loaded[i], scale2, zero2); } *reinterpret_cast(dst_fp16 + shared_iter * 16 + 8 * (k_0_1 % 2)) = *reinterpret_cast(loaded); } } template __global__ void gemm_w4a16_T2(f16_t *__restrict__ A, f16_t *__restrict__ B, f16_t *__restrict__ scales, f16_t *__restrict__ zeros, f16_t *__restrict__ C, int M, int N, int K) { using f162_t = typename packed_as::type; constexpr int NUM_WARPS = CTA_M / WARP_M * CTA_N / WARP_N; constexpr int CTA_SIZE = NUM_WARPS * WARP_SIZE; int num_blocks_n = (N + CTA_N - 1) / CTA_N; int num_blocks_m = (M + CTA_M - 1) / CTA_M; int blockIdx_x = 0; int blockIdx_y = blockIdx.x % (num_blocks_m * num_blocks_n); int blockIdx_z = blockIdx.x / (num_blocks_m * num_blocks_n); const int log_tile = get_log_tile<1>((N + CTA_N - 1) / CTA_N); int blockIdx_m = blockIdx_y / (num_blocks_n >> log_tile); int blockIdx_n = blockIdx_y % (num_blocks_n >> log_tile); const uint2 block_idx_mapping = get_block_idx_mapping(blockIdx_m, blockIdx_n, log_tile); blockIdx_m = block_idx_mapping.x; blockIdx_n = block_idx_mapping.y; float C_warp[CTA_M * CTA_N / CTA_SIZE]; constexpr int kSmemPadKA = CTA_K + SMEM_PAD_A; constexpr int kSmemPadKB = CTA_K + SMEM_PAD_B; constexpr int kSmemSizeAPerStage = CTA_M * kSmemPadKA; constexpr int kSmemSizeBPerStage = CTA_N / kInterleave * kSmemPadKB; constexpr int kSmemSizeA = kSmemSizeAPerStage * STAGES; constexpr int kSmemSizeB = kSmemSizeBPerStage * STAGES; constexpr int kSmemSizeScales = CTA_N * STAGES / 2; constexpr int kSmemSizeZeros = CTA_N * STAGES / 2; constexpr int scales_load_interval = G / CTA_K; extern __shared__ half mem_shared[]; f16_t *A_shared = reinterpret_cast(mem_shared); f16_t *B_shared = reinterpret_cast(mem_shared + kSmemSizeA); f16_t *scales_shared = reinterpret_cast(mem_shared + kSmemSizeA + kSmemSizeB); f16_t *zeros_shared = reinterpret_cast(mem_shared + kSmemSizeA + kSmemSizeB + kSmemSizeScales); f16_t A_shared_warp_[2][WARP_M * INTRIN_K / WARP_SIZE]; f16_t B_shared_warp_[2][WARP_N * 32 / WARP_SIZE]; f16_t B_shared_warp_tmp_[2][WARP_N * 16 / WARP_SIZE]; int cta_offset_m = blockIdx_m * CTA_M; int cta_offset_n = blockIdx_n * CTA_N; int warp_offset_m = (threadIdx.y % (CTA_M / WARP_M)) * WARP_M; int warp_offset_n = (threadIdx.y / (CTA_M / WARP_M)) * WARP_N; for (int i = 0; i < CTA_M * CTA_N / CTA_SIZE; i++) C_warp[i] = 0.0; int gemm_iters = (K + CTA_K - 1) / CTA_K; int k_0_0_ld = 0; int k_0_0 = 0; constexpr int prologue_stages = STAGES == 1 ? 1 : STAGES - 1; #pragma unroll for (k_0_0_ld = 0; k_0_0_ld < prologue_stages; ++k_0_0_ld) { global_to_share_one_stage_A_T2(A, A_shared + k_0_0_ld * kSmemSizeAPerStage, M, K, cta_offset_m, cta_offset_n, k_0_0_ld, 0, true); global_to_share_one_stage_B_T2(B, B_shared + k_0_0_ld * kSmemSizeBPerStage, K, cta_offset_m, cta_offset_n, k_0_0_ld, 0, true); global_to_share_one_stage_scales_T2( scales, scales_shared + (k_0_0_ld / scales_load_interval) * CTA_N, zeros, zeros_shared + (k_0_0_ld / scales_load_interval) * CTA_N, N, cta_offset_m, cta_offset_n, k_0_0_ld, 0, k_0_0_ld < gemm_iters && k_0_0_ld % scales_load_interval == 0); if constexpr (STAGES > 1) __pipeline_commit(); } if constexpr (STAGES > 1) __pipeline_wait_prior(STAGES - 2); __syncthreads(); share_to_reg_one_stage_A_T2(A_shared, A_shared_warp_[0], warp_offset_m, warp_offset_n, 0); share_to_reg_one_stage_B_T2(B_shared, scales_shared, zeros_shared, B_shared_warp_tmp_[0], B_shared_warp_[0], warp_offset_m, warp_offset_n, 0); constexpr int SHARED_K_ITERS = WARP_K / INTRIN_K; for (; k_0_0 < gemm_iters; ++k_0_0, ++k_0_0_ld) { int ld_stage = k_0_0_ld % STAGES; int compute_stage = k_0_0 % STAGES; f16_t *A_shared_this_compute_stage; f16_t *B_shared_this_compute_stage; f16_t *scales_shared_this_compute_stage; f16_t *zeros_shared_this_compute_stage; for (int iter_k = 0; iter_k < SHARED_K_ITERS; ++iter_k) { A_shared_this_compute_stage = A_shared + compute_stage * kSmemSizeAPerStage; B_shared_this_compute_stage = B_shared + compute_stage * kSmemSizeBPerStage; scales_shared_this_compute_stage = scales_shared + (compute_stage / scales_load_interval) * CTA_N; zeros_shared_this_compute_stage = zeros_shared + (compute_stage / scales_load_interval) * CTA_N; share_to_reg_one_stage_A_T2(A_shared_this_compute_stage, A_shared_warp_[(iter_k + 1) % 2], warp_offset_m, warp_offset_n, (iter_k + 1) % SHARED_K_ITERS); if ((iter_k + 1) % kInterleave == 0) { if (compute_stage % 2 == 1) { share_to_reg_one_stage_B_T2( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[1], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, (iter_k + 1) % SHARED_K_ITERS); } else { share_to_reg_one_stage_B_T2( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[0], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, (iter_k + 1) % SHARED_K_ITERS); } } else { if (compute_stage % 2 == 1) { share_to_reg_one_stage_B_T2( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[1], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, (iter_k + 1) % SHARED_K_ITERS); } else { share_to_reg_one_stage_B_T2( B_shared_this_compute_stage, scales_shared_this_compute_stage, zeros_shared_this_compute_stage, B_shared_warp_tmp_[0], B_shared_warp_[((iter_k + 1) / 2) % 2], warp_offset_m, warp_offset_n, (iter_k + 1) % SHARED_K_ITERS); } } __syncthreads(); f16_t *A_shared_warp = A_shared_warp_[iter_k % 2]; f16_t *B_shared_warp = B_shared_warp_[(iter_k / 2) % 2]; for (int i_0_3 = 0; i_0_3 < WARP_M / INTRIN_M; ++i_0_3) { for (int j_0_4 = 0; j_0_4 < WARP_N / INTRIN_N; ++j_0_4) { mma_m16n8k16(C_warp + i_0_3 * WARP_N / INTRIN_N * 8 + j_0_4 * 8, A_shared_warp + i_0_3 * 8, B_shared_warp + j_0_4 * 16 + (iter_k % 2) * 4); mma_m16n8k16(C_warp + i_0_3 * WARP_N / INTRIN_N * 8 + j_0_4 * 8 + 4, A_shared_warp + i_0_3 * 8, B_shared_warp + j_0_4 * 16 + (iter_k % 2) * 4 + 8); } } if (iter_k < WARP_K / INTRIN_K - 1) { if constexpr (STAGES == 1) __syncthreads(); global_to_share_one_stage_A_T2(A, A_shared + ld_stage * kSmemSizeAPerStage, M, K, cta_offset_m, cta_offset_n, k_0_0_ld, iter_k, k_0_0_ld < gemm_iters); global_to_share_one_stage_B_T2(B, B_shared + ld_stage * kSmemSizeBPerStage, K, cta_offset_m, cta_offset_n, k_0_0_ld, iter_k, k_0_0_ld < gemm_iters); } if (iter_k == WARP_K / INTRIN_K - 2) { if constexpr (STAGES == 1 && WARP_K / INTRIN_K > 2) { __syncthreads(); } global_to_share_one_stage_A_T2(A, A_shared + ld_stage * kSmemSizeAPerStage, M, K, cta_offset_m, cta_offset_n, k_0_0_ld, iter_k + 1, k_0_0_ld < gemm_iters); global_to_share_one_stage_B_T2(B, B_shared + ld_stage * kSmemSizeBPerStage, K, cta_offset_m, cta_offset_n, k_0_0_ld, iter_k + 1, k_0_0_ld < gemm_iters); global_to_share_one_stage_scales_T2( scales, scales_shared + (ld_stage / scales_load_interval) * CTA_N, zeros, zeros_shared + (ld_stage / scales_load_interval) * CTA_N, N, cta_offset_m, cta_offset_n, k_0_0_ld, iter_k, k_0_0_ld < gemm_iters && k_0_0_ld % scales_load_interval == 0); if constexpr (STAGES > 1) { __pipeline_commit(); __pipeline_wait_prior(STAGES - 2); } compute_stage = (k_0_0 + 1) % STAGES; __syncthreads(); } } } for (int ax0_0_1 = 0; ax0_0_1 < WARP_M / INTRIN_M; ++ax0_0_1) { for (int ax1_0_1 = 0; ax1_0_1 < WARP_N / INTRIN_N; ++ax1_0_1) { for (int local_id = 0; local_id < OP_M * 16 / WARP_SIZE; local_id += 2) { int write_row = cta_offset_m + warp_offset_m + ax0_0_1 * OP_M + ((local_id % 4) / 2 * 8 + (threadIdx.x / 4)); if (write_row < M) { *reinterpret_cast( C + write_row * N + cta_offset_n + warp_offset_n + ax1_0_1 * 16 + (local_id / 4) * 8 + (local_id % 2) + (threadIdx.x % 4) * 2) = cuda_cast(*reinterpret_cast(C_warp + ax0_0_1 * WARP_N / INTRIN_N * 8 + ax1_0_1 * 8 + local_id)); } }; } } } torch::Tensor awq_gemm_forward_cuda( torch::Tensor _in_feats, torch::Tensor _kernel, torch::Tensor _scales, torch::Tensor _zeros) { std::vector output_shape = _in_feats.sizes().vec(); output_shape.back() = _kernel.size(0) * kInterleave; int num_in_feats = _in_feats.numel() / _in_feats.size(-1); int num_in_channels = _in_feats.size(-1); auto options = torch::TensorOptions().dtype(_in_feats.dtype()).device(_in_feats.device()); auto options_int = torch::TensorOptions().dtype(torch::kInt32).device(_in_feats.device()); at::Tensor _out_feats = torch::empty(output_shape, options); int num_out_feats = _out_feats.numel() / _out_feats.size(-1); int num_out_channels = _out_feats.size(-1); if (_in_feats.scalar_type() == at::ScalarType::Half) { using f16_t = half; auto in_feats = reinterpret_cast(_in_feats.data_ptr()); auto kernel = reinterpret_cast(_kernel.data_ptr()); auto scales = reinterpret_cast(_scales.data_ptr()); auto zeros = reinterpret_cast(_zeros.data_ptr()); auto out_feats = reinterpret_cast(_out_feats.data_ptr()); if (num_out_feats <= 32) { constexpr int G = 128; constexpr int CTA_M = 16; constexpr int CTA_N = 128; constexpr int CTA_K = 128; constexpr int WARP_M = 16; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 2; constexpr int STAGES = 4; KERNEL_LAUNCH_CODE } else if (num_out_feats <= 64) { constexpr int G = 128; constexpr int CTA_M = 16; constexpr int CTA_N = 128; constexpr int CTA_K = 128; constexpr int WARP_M = 16; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 1; constexpr int STAGES = 3; KERNEL_LAUNCH_CODE } else if (num_out_feats <= 128) { constexpr int G = 128; constexpr int CTA_M = 32; constexpr int CTA_N = 128; constexpr int CTA_K = 128; constexpr int WARP_M = 32; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 1; constexpr int STAGES = 4; KERNEL_LAUNCH_CODE } else if (num_out_feats <= 192) { constexpr int G = 128; constexpr int CTA_M = 64; constexpr int CTA_N = 128; constexpr int CTA_K = 64; constexpr int WARP_M = 64; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 1; constexpr int STAGES = 4; KERNEL_LAUNCH_CODE } else { constexpr int G = 128; constexpr int CTA_M = 64; constexpr int CTA_N = 128; constexpr int CTA_K = 64; constexpr int WARP_M = 64; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int STAGES = 4; constexpr int NUM_WARPS = (CTA_M / WARP_M) * (CTA_N / WARP_N); constexpr int kSmemByteSize = (CTA_M * (CTA_K + SMEM_PAD_A) + CTA_N * (CTA_K + SMEM_PAD_B) / kInterleave + CTA_N) * STAGES * sizeof(f16_t); if (kSmemByteSize >= 99 * 1024) { printf("This kernel requires %d Bytes of shared memory, which exceeds device limit.\n", kSmemByteSize); return _out_feats; } int j_factors1 = num_out_channels / CTA_N / 1; dim3 num_blocks((num_out_feats + CTA_M - 1) / CTA_M * j_factors1); dim3 threads_per_block(WARP_SIZE, NUM_WARPS); auto kernel_func = gemm_w4a16_T2; cudaFuncSetAttribute(kernel_func, cudaFuncAttributeMaxDynamicSharedMemorySize, kSmemByteSize); kernel_func<<>>( in_feats, kernel, scales, zeros, out_feats, num_in_feats, num_out_channels, num_in_channels); } } else if (_in_feats.scalar_type() == at::ScalarType::BFloat16) { using f16_t = __nv_bfloat16; auto in_feats = reinterpret_cast(_in_feats.data_ptr()); auto kernel = reinterpret_cast(_kernel.data_ptr()); auto scales = reinterpret_cast(_scales.data_ptr()); auto zeros = reinterpret_cast(_zeros.data_ptr()); auto out_feats = reinterpret_cast(_out_feats.data_ptr()); if (num_out_feats <= 32) { constexpr int G = 128; constexpr int CTA_M = 16; constexpr int CTA_N = 128; constexpr int CTA_K = 128; constexpr int WARP_M = 16; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 2; constexpr int STAGES = 4; KERNEL_LAUNCH_CODE } else if (num_out_feats <= 64) { constexpr int G = 128; constexpr int CTA_M = 16; constexpr int CTA_N = 128; constexpr int CTA_K = 128; constexpr int WARP_M = 16; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 1; constexpr int STAGES = 3; KERNEL_LAUNCH_CODE } else if (num_out_feats <= 128) { constexpr int G = 128; constexpr int CTA_M = 32; constexpr int CTA_N = 128; constexpr int CTA_K = 128; constexpr int WARP_M = 32; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 1; constexpr int STAGES = 4; KERNEL_LAUNCH_CODE } else if (num_out_feats <= 192) { constexpr int G = 128; constexpr int CTA_M = 64; constexpr int CTA_N = 128; constexpr int CTA_K = 64; constexpr int WARP_M = 64; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int SPLITK = 1; constexpr int STAGES = 4; KERNEL_LAUNCH_CODE } else { constexpr int G = 128; constexpr int CTA_M = 64; constexpr int CTA_N = 128; constexpr int CTA_K = 64; constexpr int WARP_M = 64; constexpr int WARP_N = 32; constexpr int WARP_K = 64; constexpr int STAGES = 4; constexpr int NUM_WARPS = (CTA_M / WARP_M) * (CTA_N / WARP_N); constexpr int kSmemByteSize = (CTA_M * (CTA_K + SMEM_PAD_A) + CTA_N * (CTA_K + SMEM_PAD_B) / kInterleave + CTA_N) * STAGES * sizeof(f16_t); if (kSmemByteSize >= 99 * 1024) { printf("This kernel requires %d Bytes of shared memory, which exceeds device limit.\n", kSmemByteSize); return _out_feats; } int j_factors1 = num_out_channels / CTA_N / 1; dim3 num_blocks((num_out_feats + CTA_M - 1) / CTA_M * j_factors1); dim3 threads_per_block(WARP_SIZE, NUM_WARPS); auto kernel_func = gemm_w4a16_T2; cudaFuncSetAttribute(kernel_func, cudaFuncAttributeMaxDynamicSharedMemorySize, kSmemByteSize); kernel_func<<>>( in_feats, kernel, scales, zeros, out_feats, num_in_feats, num_out_channels, num_in_channels); } } else { AT_ERROR("Unsupported input type"); } return _out_feats; } ================================================ FILE: deepcompressor/backend/tinychat/csrc/quantization/gemm/gemm_cuda.h ================================================ #include torch::Tensor awq_gemm_forward_cuda( torch::Tensor _in_feats, torch::Tensor _kernel, torch::Tensor _scales, torch::Tensor _zeros); ================================================ FILE: deepcompressor/backend/tinychat/csrc/quantization/gemm/semaphore.h ================================================ /*************************************************************************************************** * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ /*! \file \brief Implementation of a CTA-wide semaphore for inter-CTA synchronization. */ #pragma once ///////////////////////////////////////////////////////////////////////////////////////////////// // namespace cutlass { ///////////////////////////////////////////////////////////////////////////////////////////////// /// CTA-wide semaphore for inter-CTA synchronization. class Semaphore { public: int *lock; bool wait_thread; int state; public: /// Implements a semaphore to wait for a flag to reach a given value __host__ __device__ Semaphore(int *lock_, int thread_id) : lock(lock_), wait_thread(thread_id < 0 || thread_id == 0), state(-1) { } /// Permit fetching the synchronization mechanism early __device__ void fetch() { if (wait_thread) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700 asm volatile("ld.global.acquire.gpu.b32 %0, [%1];\n" : "=r"(state) : "l"(lock)); #else asm volatile("ld.global.cg.b32 %0, [%1];\n" : "=r"(state) : "l"(lock)); #endif } } /// Gets the internal state __device__ int get_state() const { return state; } /// Waits until the semaphore is equal to the given value __device__ void wait(int status = 0) { while (__syncthreads_and(state != status)) { fetch(); } __syncthreads(); } /// Updates the lock with the given result __device__ void release(int status = 0) { __syncthreads(); if (wait_thread) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700 asm volatile("st.global.release.gpu.b32 [%0], %1;\n" : : "l"(lock), "r"(status)); #else asm volatile("st.global.cg.b32 [%0], %1;\n" : : "l"(lock), "r"(status)); #endif } } }; ///////////////////////////////////////////////////////////////////////////////////////////////// // } // namespace cutlass ///////////////////////////////////////////////////////////////////////////////////////////////// ================================================ FILE: deepcompressor/backend/tinychat/csrc/quantization/gemv/gemv_cuda.cu ================================================ /* * Modified from NVIDIA [TRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/tree/d37b507f41a87457fe9f10f7459d08f5db235745/cpp/tensorrt_llm/kernels/weightOnlyBatchedGemv) * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* @article{lin2023awq, title={AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration}, author={Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Dang, Xingyu and Han, Song}, journal={arXiv}, year={2023} } */ #include #include #include "gemv_cuda.h" #include "../dequantize.cuh" #include "../../utils.cuh" #define PACK_FACTOR 8 #define WARP_SIZE 32 #define MEM_ACCESS_SIZE 128 // Reduce sum within the warp using the tree reduction algorithm. template __device__ __forceinline__ static void warp_reduce(fp_t *psum, float (*out_smem)[Num * 4]) { // kInterleave = 4 float fpsum[Num]; #pragma unroll for (int i = 0; i < Num; ++i) { fpsum[i] = static_cast(psum[i]); } #pragma unroll for (int i = 0; i < Num; ++i) { // T0 + T1 + T8 + T9 + T16 + T17 + T24 + T25 (kInterleave = 4) fpsum[i] += __shfl_xor_sync(~0, fpsum[i], 16); fpsum[i] += __shfl_xor_sync(~0, fpsum[i], 8); fpsum[i] += __shfl_xor_sync(~0, fpsum[i], 1); } __syncthreads(); int warp = threadIdx.x / WarpSize, lane = threadIdx.x % WarpSize; if (lane == 0 || lane == 2 || lane == 4 || lane == 6) { #pragma unroll for (int i = 0; i < Num; ++i) { out_smem[warp][i * 4 + lane / 2] = fpsum[i]; } } __syncthreads(); }; __device__ __forceinline__ int make_divisible(int c, int divisor) { return (c + divisor - 1) / divisor; } template __global__ void gemv_kernel( const f16_t *inputs, const uint32_t *weight, const f16_t *scales, const f16_t *zeros, f16_t *outputs, const int IC, const int OC) { using f162_t = typename packed_as::type; using accum_t = float; using accum2_t = typename packed_as::type; const int kStride = 64; const int kElemsPerThread = MEM_ACCESS_SIZE / 4; const int kThreadsNumPerTile = kStride / kElemsPerThread; static constexpr int kShuffleBasicTile = 2; static constexpr int kShuffleContinous = 4; static constexpr int kShuffleStrided = 4; constexpr int Num = NPerBlock * Batch; constexpr int kInterleave = 4; alignas(16) f16_t local_inputs[kElemsPerThread]; alignas(16) uint32_t local_qweights[MEM_ACCESS_SIZE / 32]; alignas(16) f16_t half_weight_buffer[kElemsPerThread]; alignas(16) f16_t dequantized_weight[kElemsPerThread * NPerBlock]; alignas(16) f16_t local_scale[NPerBlock]; alignas(16) f16_t local_scaled_zeros[NPerBlock]; accum_t psum[Num]; for (int i = 0; i < Num; ++i) psum[i] = static_cast(0.f); extern __shared__ uint8_t shmem[]; float(*out_smem)[Num * kInterleave] = reinterpret_cast(shmem); const int blk_row_offset = blockIdx.x * NPerBlock * kInterleave; const int thd_row_offset = (threadIdx.x / kThreadsNumPerTile) % kInterleave; const int act_k_offset = threadIdx.x / (kThreadsNumPerTile * kInterleave) * kStride + (threadIdx.x % kThreadsNumPerTile) * kElemsPerThread; const int group_offset = act_k_offset / GroupSize; // TODO: use make_divisible const uint32_t *blk_weight_ptr = weight + blk_row_offset * IC / PACK_FACTOR; const f16_t *scale_ptr = scales + blk_row_offset + thd_row_offset + group_offset * OC; const f16_t *zeros_ptr = zeros + blk_row_offset + thd_row_offset + group_offset * OC; const f16_t *inputs_ptr = inputs + act_k_offset; const int act_forward_step = BlockSize * kElemsPerThread / kInterleave; const int scale_forward_step = act_forward_step / GroupSize * OC; // Main loop iteration, each block completes the outputs for several OCs for (int kk = threadIdx.x * kElemsPerThread; kk < IC * kInterleave; kk += BlockSize * kElemsPerThread) { // Load qweight, scales and scaled_zeros #pragma unroll for (int idx = 0; idx < NPerBlock; ++idx) { // use float4 to load weights, each thread load 32 int4 numbers (1 x float4, 128 bit) *((float4 *)(local_qweights)) = *((float4 *)(blk_weight_ptr + (idx * kInterleave * IC + kk) / PACK_FACTOR)); local_scale[idx] = *(scale_ptr + idx * kInterleave); local_scaled_zeros[idx] = *(zeros_ptr + idx * kInterleave); // Map int4 qweight to fp format #pragma unroll for (int i = 0; i < MEM_ACCESS_SIZE / 32; ++i) { // Converts 32 bits (8 x int4) to 8 fp16 dequantize_s4_to_f16x2(*reinterpret_cast(local_qweights + i), reinterpret_cast(half_weight_buffer + i * PACK_FACTOR)); } // Dequantize (apply s/z) and shuffle elements to match the weight packing format #pragma unroll for (int i = 0; i < kShuffleContinous; ++i) { #pragma unroll for (int j = 0; j < kShuffleStrided; ++j) { f162_t w = *reinterpret_cast( half_weight_buffer + (i + j * kShuffleContinous) * kShuffleBasicTile); w = __hfma2(w, f162f162(local_scale[idx]), f162f162(local_scaled_zeros[idx])); dequantized_weight[((i * kShuffleStrided + j) * kShuffleBasicTile + 0) * NPerBlock + idx] = w.x; dequantized_weight[((i * kShuffleStrided + j) * kShuffleBasicTile + 1) * NPerBlock + idx] = w.y; } } } #pragma unroll for (int batch_idx = 0; batch_idx < Batch; ++batch_idx) { const f16_t *local_inputs_ptr = inputs_ptr + batch_idx * IC; #pragma unroll for (int idx = 0; idx < kElemsPerThread / 8; ++idx) { // load activation, 8 halves (128 bits) / step. *((float4 *)(local_inputs + idx * 8)) = *((float4 *)(local_inputs_ptr + idx * 8)); } // Perform the MACs #pragma unroll for (int x = 0; x < NPerBlock / 2; ++x) { #pragma unroll for (int y = 0; y < kElemsPerThread; ++y) { accum2_t prod = cuda_cast(__hmul2( *reinterpret_cast(dequantized_weight + y * NPerBlock + x * 2), f162f162(local_inputs[y]))); *reinterpret_cast(psum + batch_idx * NPerBlock + x * 2) = prod + *reinterpret_cast(psum + batch_idx * NPerBlock + x * 2); } } } inputs_ptr += act_forward_step; scale_ptr += scale_forward_step; zeros_ptr += scale_forward_step; } warp_reduce(psum, out_smem); // Num * Interleave = batch * NPerBlock * Interleave -> 1 thread_block write back num for (int i = threadIdx.x; i < Num * kInterleave; i += BlockSize) { int batch_idx = i / (NPerBlock * kInterleave); int oc_idx = i % (NPerBlock * kInterleave); float acc = 0.f; for (int j = 0; j < BlockSize / WARP_SIZE; ++j) { acc += out_smem[j][i]; } outputs[batch_idx * OC + blk_row_offset + oc_idx] = static_cast(acc); } } /* Computes GEMV (PyTorch interface). Args: _in_feats: tensor of shape [B, IC]; _kernel: int tensor of shape [OC, IC // 8]; _zeros: int tensor of shape [OC, IC // G // 8]; _scaling_factors: tensor of shape [OC, IC // G]; blockDim_x: size of thread block, dimension x, where blockDim_x * workload_per_thread = IC; blockDim_y: size of thread block, dimension y, where blockDim_y * gridDim_y = OC; Returns: out_feats: tensor of shape [B, OC]; */ torch::Tensor awq_gemv_forward_cuda( torch::Tensor _in_feats, torch::Tensor _kernel, torch::Tensor _scaling_factors, torch::Tensor _zeros, int m, int n, int k, int group_size) { std::vector output_shape = _in_feats.sizes().vec(); output_shape.back() = n; auto options = torch::TensorOptions().dtype(_in_feats.dtype()).device(_in_feats.device()); at::Tensor _out_feats = torch::empty(output_shape, options); static constexpr int N_PER_BLOCK = 2; static constexpr int K_INTERLEAVE = 4; static constexpr int BLOCK_SIZE = 256; dim3 num_blocks(n / N_PER_BLOCK / K_INTERLEAVE); dim3 num_threads(BLOCK_SIZE); AT_DISPATCH_REDUCED_FLOATING_TYPES( _in_feats.scalar_type(), "awq_gemv_forward_cuda", [&] { using f16_t = typename to_cpp_t::type; auto in_feats = reinterpret_cast(_in_feats.data_ptr()); auto kernel = reinterpret_cast(_kernel.data_ptr()); auto zeros = reinterpret_cast(_zeros.data_ptr()); auto scaling_factors = reinterpret_cast(_scaling_factors.data_ptr()); auto out_feats = reinterpret_cast(_out_feats.data_ptr()); if (group_size == 128) { switch (m) { case 1: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; case 2: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; case 3: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; case 4: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; case 5: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; case 6: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; case 7: gemv_kernel<<>>( in_feats, kernel, scaling_factors, zeros, out_feats, k, n); break; default: throw std::runtime_error("Unsupported batch size for gemv kernel.\n"); } } else { throw std::runtime_error("Unsupported group size for gemv kernel.\n"); } }); return _out_feats; } ================================================ FILE: deepcompressor/backend/tinychat/csrc/quantization/gemv/gemv_cuda.h ================================================ #pragma once #include torch::Tensor awq_gemv_forward_cuda( torch::Tensor _in_feats, torch::Tensor _kernel, torch::Tensor _scaling_factors, torch::Tensor _zeros, int m, int n, int k, int group_size); ================================================ FILE: deepcompressor/backend/tinychat/csrc/utils.cuh ================================================ // Adated from FasterTransformer, https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.hpp #pragma once #include #include #include #include #include #include #define ENABLE_BF16 1 template struct to_cpp_t; template <> struct to_cpp_t { using type = half; }; template <> struct to_cpp_t { using type = __nv_bfloat16; }; template struct num_elems; template <> struct num_elems { static constexpr int value = 1; }; template <> struct num_elems { static constexpr int value = 2; }; template <> struct num_elems { static constexpr int value = 4; }; template <> struct num_elems { static constexpr int value = 1; }; template <> struct num_elems { static constexpr int value = 2; }; #ifdef ENABLE_BF16 template <> struct num_elems<__nv_bfloat16> { static constexpr int value = 1; }; template <> struct num_elems<__nv_bfloat162> { static constexpr int value = 2; }; #endif template struct packed_as; template struct packed_as { using type = T; }; template <> struct packed_as { using type = half2; }; template <> struct packed_as { using type = float2; }; template <> struct packed_as { using type = int16_t; }; template <> struct packed_as { using type = int2; }; template <> struct packed_as { using type = half; }; template <> struct packed_as { using type = float; }; #ifdef ENABLE_BF16 template <> struct packed_as<__nv_bfloat16, 2> { using type = __nv_bfloat162; }; template <> struct packed_as<__nv_bfloat162, 1> { using type = __nv_bfloat16; }; #endif #ifdef ENABLE_FP8 template <> struct packed_as<__nv_fp8_e4m3, 2> { using type = __nv_fp8x2_e4m3; }; template <> struct packed_as<__nv_fp8x2_e4m3, 1> { using type = __nv_fp8_e4m3; }; template <> struct packed_as<__nv_fp8_e5m2, 2> { using type = __nv_fp8x2_e5m2; }; template <> struct packed_as<__nv_fp8x2_e5m2, 1> { using type = __nv_fp8_e5m2; }; #endif template __device__ __forceinline__ packed_as::type f162f162(f16_t x); template <> __device__ __forceinline__ packed_as::type f162f162(half x) { return __half2half2(x); } #ifdef ENABLE_BF16 template <> __device__ __forceinline__ packed_as<__nv_bfloat16, 2>::type f162f162<__nv_bfloat16>(__nv_bfloat16 x) { return __bfloat162bfloat162(x); } # endif template __device__ __forceinline__ float2 f1622float2(T val); template <> __device__ __forceinline__ float2 f1622float2(half2 val) { return __half22float2(val); } #ifdef ENABLE_BF16 template <> __device__ __forceinline__ float2 f1622float2<__nv_bfloat162>(__nv_bfloat162 val) { return __bfloat1622float2(val); } # endif inline __device__ float2 operator*(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); } inline __device__ float2 operator+(float2 a, float2 b) { return make_float2(a.x + b.x, a.y + b.y); } inline __device__ float2 operator-(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); } inline __device__ float2 operator*(float2 a, float b) { return make_float2(a.x * b, a.y * b); } inline __device__ float2 operator+(float2 a, float b) { return make_float2(a.x + b, a.y + b); } inline __device__ float2 operator-(float2 a, float b) { return make_float2(a.x - b, a.y - b); } static inline __device__ int8_t float_to_int8_rn(float x) { uint32_t dst; asm volatile("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(dst) : "f"(x)); return reinterpret_cast(dst); } template inline __device__ T ldg(const T *val) { return __ldg(val); } #if ENABLE_BF16 #define float22bf162 __float22bfloat162_rn inline __device__ int16_t bf1622int16(__nv_bfloat162 val) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800 float2 f_val; f_val.x = max(min(__low2float(val), 127.f), -128.f); f_val.y = max(min(__high2float(val), 127.f), -128.f); union { int8_t int8[2]; int16_t int16; }; int8[0] = static_cast(static_cast(f_val.x)); int8[1] = static_cast(static_cast(f_val.y)); return int16; #else val = __hmin2(val, make_bfloat162(127., 127.)); val = __hmax2(val, make_bfloat162(-128., -128.)); union { int8_t int8[2]; int16_t int16; }; int8[0] = static_cast(static_cast(val.x)); int8[1] = static_cast(static_cast(val.y)); return int16; #endif } #endif #if ENABLE_BF16 template <> inline __device__ __nv_bfloat162 ldg(const __nv_bfloat162 *val) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800 return val[0]; #else return __ldg(val); #endif } template <> inline __device__ __nv_bfloat16 ldg(const __nv_bfloat16 *val) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800 return val[0]; #else return __ldg(val); #endif } #endif // ENABLE_BF16 template __device__ inline T_OUT cuda_cast(T_IN val) { return val; } template <> __device__ inline float2 cuda_cast(int2 val) { return make_float2(val.x, val.y); } template <> __device__ inline float2 cuda_cast(float val) { return make_float2(val, val); } template <> __device__ inline float2 cuda_cast(half2 val) { return __half22float2(val); } template <> __device__ inline half2 cuda_cast(float2 val) { return __float22half2_rn(val); } template <> __device__ inline half2 cuda_cast(float val) { return __float2half2_rn(val); } template <> __device__ inline half2 cuda_cast(half val) { return __half2half2(val); } template <> __device__ inline int8_t cuda_cast(half val) { union { int8_t int8[2]; int16_t int16; }; union { half fp16; int16_t int16_in; }; fp16 = val; asm volatile("cvt.rni.sat.s8.f16 %0, %1;" : "=h"(int16) : "h"(int16_in)); return int8[0]; } template <> __device__ inline int16_t cuda_cast(half2 val) { union { int8_t int8[2]; int16_t int16; }; int8[0] = cuda_cast(val.x); int8[1] = cuda_cast(val.y); return int16; } template <> __device__ inline int8_t cuda_cast(float val) { union { int8_t int8[2]; int16_t int16; }; asm volatile("cvt.rni.sat.s8.f32 %0, %1;" : "=h"(int16) : "f"(val)); return int8[0]; } template <> __device__ inline int16_t cuda_cast(float2 val) { union { int8_t int8[2]; int16_t int16; }; int8[0] = cuda_cast(val.x); int8[1] = cuda_cast(val.y); return int16; } template <> __device__ inline half2 cuda_cast(int16_t val) { union { int8_t int8[2]; int16_t int16; }; int16 = val; return make_half2(int8[0], int8[1]); } template <> __device__ inline float2 cuda_cast(int16_t val) { union { int8_t int8[2]; int16_t int16; }; int16 = val; return make_float2(int8[0], int8[1]); } #ifdef ENABLE_BF16 template <> __device__ inline __nv_bfloat16 cuda_cast(int32_t val) { return static_cast(val); } template <> __device__ inline __nv_bfloat16 cuda_cast(int8_t val) { return static_cast(val); } template <> __device__ inline int8_t cuda_cast(__nv_bfloat16 val) { return static_cast(val); } template <> __device__ inline float cuda_cast(__nv_bfloat16 val) { return __bfloat162float(val); } template <> __device__ inline float2 cuda_cast(__nv_bfloat162 val) { return __bfloat1622float2(val); } template <> __device__ inline half cuda_cast(__nv_bfloat16 val) { return __float2half(__bfloat162float(val)); } template <> __device__ inline int16_t cuda_cast(__nv_bfloat162 val) { return bf1622int16(val); } template <> __device__ inline __nv_bfloat16 cuda_cast<__nv_bfloat16, float>(float val) { return __float2bfloat16(val); } template <> __device__ inline __nv_bfloat16 cuda_cast<__nv_bfloat16, half>(half val) { return __float2bfloat16(__half2float(val)); } template <> __device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, __nv_bfloat16>(__nv_bfloat16 val) { return __bfloat162bfloat162(val); } template <> __device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, float>(float val) { return __float2bfloat162_rn(val); } template <> __device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, float2>(float2 val) { return float22bf162(val); } template <> __device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, int16_t>(int16_t val) { union { int8_t int8[2]; int16_t int16; }; int16 = val; __nv_bfloat162 res; res.x = cuda_cast<__nv_bfloat16>(int8[0]); res.y = cuda_cast<__nv_bfloat16>(int8[1]); return res; } template <> __device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, half2>(half2 val) { return float22bf162(__half22float2(val)); } #endif // ENABLE BF16 template __device__ inline To cuda_sum(Ti val) { return cuda_cast(val); }; template __device__ inline To cuda_sum(float2 val) { return cuda_cast(val.x + val.y); }; // Unary maximum: compute the max of a vector type template __device__ inline To cuda_max(Ti val) { return cuda_cast(val); }; template <> __device__ inline float cuda_max(float2 val) { return fmaxf(val.x, val.y); } template <> __device__ inline half cuda_max(half2 val) { return __hmax(val.x, val.y); } #ifdef ENABLE_BF16 template <> __device__ inline __nv_bfloat16 cuda_max(__nv_bfloat162 val) { #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)) return __hmax(val.x, val.y); #endif } #endif // Binary maximum: compute the max of two scalar types template __device__ inline T cuda_max(T val1, T val2) { return (val1 > val2) ? val1 : val2; } template __device__ inline T cuda_abs(T val) { assert(false); return {}; } template <> __device__ inline float cuda_abs(float val) { return fabs(val); } template <> __device__ inline float2 cuda_abs(float2 val) { return make_float2(fabs(val.x), fabs(val.y)); } template <> __device__ inline half cuda_abs(half val) { return __habs(val); } template <> __device__ inline half2 cuda_abs(half2 val) { return __habs2(val); } #ifdef ENABLE_BF16 #if __CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__) template <> __device__ inline __nv_bfloat16 cuda_abs(__nv_bfloat16 val) { return __habs(val); } template <> __device__ inline __nv_bfloat162 cuda_abs(__nv_bfloat162 val) { return __habs2(val); } #endif #endif ================================================ FILE: deepcompressor/backend/tinychat/linear.py ================================================ # -*- coding: utf-8 -*- """TinyChat Quantized Linear Module""" import warnings import torch import torch.nn as nn from .csrc.load import _C from .utils import ceil_num_groups, convert_to_tinychat_w4x16y16_linear_weight __all__ = ["W4Linear"] warnings.warn( "Module `tinychat.linear` will be moved to `Nunchaku` and deprecated in the future release.", DeprecationWarning, stacklevel=2, ) class W4Linear(nn.Module): def __init__( self, in_features: int, out_features: int, bias: bool = False, group_size: int = 128, dtype: torch.dtype = torch.float16, device: str | torch.device = "cuda", ): super().__init__() assert dtype in (torch.float16, torch.bfloat16), f"Unsupported dtype: {dtype}" self.in_features = in_features self.out_features = out_features self.group_size = group_size if group_size != -1 else in_features assert self.in_features % self.group_size == 0 assert out_features % (32 // self.weight_bits) == 0 self.ceil_num_groups = ceil_num_groups( in_features=self.in_features, group_size=self.group_size, weight_bits=self.weight_bits, ) assert out_features % (self.interleave) == 0 self.register_buffer( "qweight", torch.zeros( ( self.out_features // self.interleave, self.in_features // (16 // self.weight_bits) * self.interleave, ), dtype=torch.int16, device=device, ), ) self.register_buffer( "scales", torch.zeros((self.ceil_num_groups, self.out_features), dtype=dtype, device=device), ) self.register_buffer( "scaled_zeros", torch.zeros((self.ceil_num_groups, self.out_features), dtype=dtype, device=device), ) if bias: self.register_buffer("bias", torch.zeros((out_features), dtype=dtype, device=device)) else: self.bias = None @property def weight_bits(self) -> int: return 4 @property def interleave(self) -> int: return 4 @torch.no_grad() def forward(self, x): if x.numel() / x.shape[-1] < 8: out = _C.awq_gemv_forward_cuda( x, self.qweight, self.scales, self.scaled_zeros, x.numel() // x.shape[-1], self.out_features, self.in_features, self.group_size, ) else: out = _C.awq_gemm_forward_cuda(x, self.qweight, self.scales, self.scaled_zeros) out = out + self.bias if self.bias is not None else out return out @staticmethod def from_linear( linear: nn.Linear, group_size: int, init_only: bool = False, weight: torch.Tensor | None = None, scale: torch.Tensor | None = None, zero: torch.Tensor | None = None, zero_pre_scaled: bool = False, ) -> "W4Linear": """Convert a linear layer to a TinyChat 4-bit weight-only quantized linear layer. Args: linear (`nn.Linear`): linear layer to be converted. group_size (`int`): quantization group size. init_only (`bool`, *optional*, defaults to `False`): whether to only initialize the quantized linear layer. weight (`torch.Tensor`, *optional*, defaults to `None`): weight tensor for the quantized linear layer. scale (`torch.Tensor`, *optional*, defaults to `None`): scale tensor for the quantized linear layer. zero (`torch.Tensor`, *optional*, defaults to `None`): zero point tensor for the quantized linear layer. zero_pre_scaled (`bool`, *optional*, defaults to `False`): whether zero point tensor is pre-scaled. Returns: `W4Linear`: quantized linear layer. """ assert isinstance(linear, nn.Linear) weight = linear.weight.data if weight is None else weight.data dtype, device = weight.dtype, weight.device oc, ic = linear.out_features, linear.in_features _linear = W4Linear( in_features=ic, out_features=oc, bias=linear.bias is not None, group_size=group_size, dtype=dtype, device=device, ) if init_only: return _linear if linear.bias is not None: _linear.bias.data.copy_(linear.bias.data) if scale is None: assert zero is None, "scale and zero point tensors should be provided together." group_size = ic if group_size <= 0 else group_size assert group_size <= ic, "group size should be less than or equal to input channel size." assert ic % group_size == 0, "input channel size should be divisible by group size." ng, gs = ic // group_size, group_size weight = weight.to(dtype=torch.float32).view(oc, 1, ng, gs) vmin, vmax = weight.amin(dim=-1, keepdim=True), weight.amax(dim=-1, keepdim=True) scale = (vmax - vmin).div_(15) scale[scale == 0] = 1.0 if zero_pre_scaled: zero = vmin.neg_().div_(scale).round_().clamp_(0, 15) weight = weight.div_(scale).add_(zero).round_().clamp_(0, 15).sub_(zero).mul_(scale) else: zero = vmin.neg_().clamp_min(0) weight = weight.add_(zero).div_(scale).round_().clamp_(0, 15).mul_(scale).sub_(zero) weight = weight.to(dtype=dtype).view(oc, ic) scale = scale.to(dtype=dtype) zero = zero.to(dtype=dtype) weight, scale, zero = convert_to_tinychat_w4x16y16_linear_weight( weight=weight, scale=scale, zero=zero, zero_pre_scaled=zero_pre_scaled, ) _linear.qweight.data.copy_(weight) _linear.scales.data.copy_(scale) _linear.scaled_zeros.data.copy_(zero) return _linear def extra_repr(self) -> str: return "in_features={}, out_features={}, bias={}, weight_bits={}, group_size={}".format( self.in_features, self.out_features, self.bias is not None, self.weight_bits, self.group_size, ) ================================================ FILE: deepcompressor/backend/tinychat/utils.py ================================================ # -*- coding: utf-8 -*- """TinyChat backend utilities.""" import torch from ..utils import ceil_divide __all__ = ["ceil_num_groups", "convert_to_tinychat_w4x16y16_linear_weight"] def ceil_num_groups(in_features: int, group_size: int, weight_bits: int = 4) -> int: """Calculate the ceiling number of quantization groups. Args: in_features (`int`): input channel size. group_size (`int`): quantization group size. weight_bits (`int`, *optional*, defaults to `4`): quantized weight bits. Returns: `int`: ceiling number of quantization groups. """ assert in_features % group_size == 0, "input channel size should be divisible by group size." num_groups = in_features // group_size assert weight_bits in (4, 2, 1), "weight bits should be 4, 2, or 1." pack_size = 32 // weight_bits # one INT32 contains `pack_size` elements of weights num_packs = ceil_divide(num_groups, pack_size) if group_size >= 128: num_packs_factor = 1 elif group_size == 64: num_packs_factor = 2 elif group_size == 32: num_packs_factor = 4 else: raise NotImplementedError # make sure num_packs is a multiple of num_packs_factor num_packs = ceil_divide(num_packs, num_packs_factor) * num_packs_factor num_groups = num_packs * pack_size return num_groups def pack_w4(weight: torch.Tensor) -> torch.Tensor: assert weight.dtype == torch.int32, f"quantized weight should be torch.int32, but got {weight.dtype}." oc, ic = weight.shape assert ic % 32 == 0, "input channel size should be divisible by 32." # [0, 1, ..., 31] -> [0, 8, 16, 24, 1, 9, 17, 25, ..., 7, 15, 23, 31] weight = weight.view(-1, 4, 8) weight = weight[:, 0] | (weight[:, 1] << 4) | (weight[:, 2] << 8) | (weight[:, 3] << 12) weight = weight.view(oc // 4, 4, ic // 64, 16).permute(0, 2, 1, 3).reshape(oc // 4, ic) return weight.to(torch.int16) def convert_to_tinychat_w4x16y16_linear_weight( weight: torch.Tensor, scale: torch.Tensor, zero: torch.Tensor, zero_pre_scaled: bool = False, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Convert a weight tensor to TinyChat W4-X16-Y16 linear weight format. Args: weight (`torch.Tensor`): weight tensor to be converted. scale (`torch.Tensor`): scale tensor for the weight tensor. zero (`torch.Tensor`): zero point tensor for the weight tensor. zero_pre_scaled (`bool`, *optional*, defaults to `False`): whether zero point tensor is pre-scaled. Returns: `tuple[torch.Tensor, torch.Tensor, torch.Tensor]`: packed quantized weight tensor, scale tensor, and zero point tensor. """ dtype, device = weight.dtype, weight.device assert dtype in (torch.float16, torch.bfloat16), "currently tinychat only supports fp16 and bf16." assert scale is not None, "scale tensor is required for quantization." assert zero is not None, "zero point tensor is required for quantization." weight = weight.to(dtype=torch.float32) scale = scale.to(dtype=torch.float32, device=device) zero = zero.to(dtype=torch.float32, device=device) if zero_pre_scaled: zero = zero * scale oc, ic = weight.shape if scale.numel() == 1: scale = scale.view(1, 1).expand(oc, 1) ng, gs = 1, ic else: ng = scale.numel() // oc gs = ic // ng scale = scale.reshape(oc, ng).contiguous().view(oc, ng, 1) assert ic == gs * ng, "input channel size should be equal to group size times number of groups." if zero.numel() == 1: zero = zero.view(1, 1).expand(oc, ng) zero = zero.reshape(oc, ng).contiguous().view(oc, ng, 1) weight = weight.view(oc, ng, -1).add_(zero).div_(scale).round_().view(oc, ic) assert weight.min() >= 0 and weight.max() <= 15, "quantized weight should be in [0, 15]." _weight = pack_w4(weight.to(torch.int32)) _ng = ceil_num_groups(ic, gs, weight_bits=4) _scale = torch.zeros((_ng, oc), dtype=dtype, device=device) _zero = torch.zeros((_ng, oc), dtype=dtype, device=device) _scale[:ng] = scale.view(oc, ng).t().to(dtype=dtype) _zero[:ng] = zero.view(oc, ng).t().to(dtype=dtype).neg_() return _weight, _scale, _zero ================================================ FILE: deepcompressor/backend/utils.py ================================================ # -*- coding: utf-8 -*- """Backend utilities.""" import typing as tp import safetensors import torch __all__ = ["ceil_divide", "pad", "fp_quantize", "MmaWeightPackerBase"] def ceil_divide(x: int, divisor: int) -> int: """Ceiling division. Args: x (`int`): dividend. divisor (`int`): divisor. Returns: `int`: ceiling division result. """ return (x + divisor - 1) // divisor def pad( tensor: tp.Optional[torch.Tensor], divisor: int | tp.Sequence[int], dim: int | tp.Sequence[int], fill_value: float | int = 0, ) -> torch.Tensor: if isinstance(divisor, int): if divisor <= 1: return tensor elif all(d <= 1 for d in divisor): return tensor if tensor is None: return None shape = list(tensor.shape) if isinstance(dim, int): assert isinstance(divisor, int) shape[dim] = ceil_divide(shape[dim], divisor) * divisor else: if isinstance(divisor, int): divisor = [divisor] * len(dim) for d, div in zip(dim, divisor, strict=True): shape[d] = ceil_divide(shape[d], div) * div result = torch.full(shape, fill_value, dtype=tensor.dtype, device=tensor.device) result[[slice(0, extent) for extent in tensor.shape]] = tensor return result def load_state_dict_in_safetensors( path: str, device: str | torch.device = "cpu", filter_prefix: str = "" ) -> dict[str, torch.Tensor]: """Load state dict in SafeTensors. Args: path (`str`): file path. device (`str` | `torch.device`, optional, defaults to `"cpu"`): device. filter_prefix (`str`, optional, defaults to `""`): filter prefix. Returns: `dict`: loaded SafeTensors. """ state_dict = {} with safetensors.safe_open(path, framework="pt", device=device) as f: for k in f.keys(): if filter_prefix and not k.startswith(filter_prefix): continue state_dict[k.removeprefix(filter_prefix)] = f.get_tensor(k) return state_dict def fp_quantize(x: torch.Tensor, codebook: torch.Tensor | None = None) -> torch.Tensor: if codebook is None: codebook = torch.tensor( [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0, -0.0, -0.5, -1.0, -1.5, -2.0, -3.0, -4.0, -6.0], dtype=x.dtype, device=x.device, ) return (x.unsqueeze(-1) - codebook.unsqueeze(0)).abs().argmin(dim=-1) class MmaWeightPackerBase: def __init__(self, bits: int, warp_n: int, comp_n: int = None, comp_k: int = None): self.bits = bits assert self.bits in (1, 4, 8, 16, 32), "weight bits should be 1, 4, 8, 16, or 32." # region compute tile size self.comp_n = comp_n if comp_n is not None else 16 """smallest tile size in `n` dimension for MMA computation.""" self.comp_k = comp_k if comp_k is not None else 256 // self.bits """smallest tile size in `k` dimension for MMA computation.""" # the smallest MMA computation may contain several MMA instructions self.insn_n = 8 # mma instruction tile size in `n` dimension """tile size in `n` dimension for MMA instruction.""" self.insn_k = self.comp_k """tile size in `k` dimension for MMA instruction.""" assert self.insn_k * self.bits in (128, 256), ( f"insn_k ({self.insn_k}) * bits ({self.bits}) should be 128 or 256." ) assert self.comp_n % self.insn_n == 0, f"comp_n ({self.comp_n}) should be divisible by insn_n ({self.insn_n})." self.num_lanes = 32 """there are 32 lanes (or threds) in a warp.""" self.num_k_lanes = 4 self.num_n_lanes = 8 assert warp_n >= self.comp_n and warp_n % self.comp_n == 0, ( f"warp_n ({warp_n}) should be divisible by comp_n({self.comp_n})." ) self.warp_n = warp_n # endregion # region memory self.reg_k = 32 // self.bits """number of elements in a register in `k` dimension.""" self.reg_n = 1 """number of elements in a register in `n` dimension (always 1).""" self.k_pack_size = self.comp_k // (self.num_k_lanes * self.reg_k) """number of elements in a pack in `k` dimension.""" self.n_pack_size = self.comp_n // (self.num_n_lanes * self.reg_n) """number of elements in a pack in `n` dimension.""" self.pack_size = self.k_pack_size * self.n_pack_size """number of elements in a pack accessed by a lane at a time.""" assert 1 <= self.pack_size <= 4, "pack size should be less than or equal to 4." assert self.k_pack_size * self.num_k_lanes * self.reg_k == self.comp_k assert self.n_pack_size * self.num_n_lanes * self.reg_n == self.comp_n self.mem_k = self.comp_k """the tile size in `k` dimension for one tensor memory access.""" self.mem_n = warp_n """the tile size in `n` dimension for one tensor memory access.""" self.num_k_packs = self.mem_k // (self.k_pack_size * self.num_k_lanes * self.reg_k) """number of packs in `k` dimension for one tensor memory access.""" self.num_n_packs = self.mem_n // (self.n_pack_size * self.num_n_lanes * self.reg_n) """number of packs in `n` dimension for one tensor memory access.""" # endregion def get_view_shape(self, n: int, k: int) -> tuple[int, int, int, int, int, int, int, int, int, int]: assert n % self.mem_n == 0, "output channel size should be divisible by mem_n." assert k % self.mem_k == 0, "input channel size should be divisible by mem_k." return ( n // self.mem_n, self.num_n_packs, self.n_pack_size, self.num_n_lanes, self.reg_n, k // self.mem_k, self.num_k_packs, self.k_pack_size, self.num_k_lanes, self.reg_k, ) ================================================ FILE: deepcompressor/calib/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/calib/config/__init__.py ================================================ # -*- coding: utf-8 -*- from .lowrank import QuantLowRankCalibConfig, SkipBasedQuantLowRankCalibConfig from .range import DynamicRangeCalibConfig, SkipBasedDynamicRangeCalibConfig from .reorder import ChannelOrderCalibConfig, SkipBasedChannelOrderConfig from .rotation import QuantRotationConfig from .search import ( SearchBasedCalibConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective, SearchBasedCalibStrategy, ) from .smooth import SkipBasedSmoothCalibConfig, SmoothCalibConfig, SmoothSpanMode, SmoothTransfomerConfig ================================================ FILE: deepcompressor/calib/config/lowrank.py ================================================ # -*- coding: utf-8 -*- """Quantization SVD calibration configuration.""" from dataclasses import dataclass, field from omniconfig import configclass from ...quantizer.config import QuantLowRankConfig from ...utils.common import num2str from ...utils.config import SkipBasedConfig from .search import SearchBasedCalibConfig, SearchBasedCalibGranularity, SearchBasedCalibStrategy __all__ = ["QuantLowRankCalibConfig", "SkipBasedQuantLowRankCalibConfig"] @configclass @dataclass class QuantLowRankCalibConfig(SearchBasedCalibConfig, QuantLowRankConfig): """Configuration for quantization low-rank branch calibration. Args: rank (`int`, *optional*, defaults to `32`): The rank of the low-rank branch. exclusive (`bool`, *optional*, defaults to `False`): Whether to use exclusive low-rank branch for each weight sharing the inputs. compensate (`bool`, *optional*, defaults to `False`): Whether the low-rank branch compensates the quantization error. degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. num_iters (`int`, *optional*, default=`1`): The number of iterations. early_stop (`bool`, *optional*, default=`False`): Whether to stop the calibration early. """ granularity: SearchBasedCalibGranularity = field(init=False, default=SearchBasedCalibGranularity.Layer) element_batch_size: int = field(init=False, default=-1) element_size: int = field(init=False, default=-1) pre_reshape: bool = field(init=False, default=True) num_iters: int = 1 early_stop: bool = False def __post_init__(self): if self.strategy != SearchBasedCalibStrategy.Manual: self.strategy = SearchBasedCalibStrategy.GridSearch if self.compensate and self.num_iters <= 1: self.exclusive = True super().__post_init__() def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Returns: list[str]: The directory names. """ names = super().generate_dirnames(**kwargs) name = f"i{num2str(self.num_iters)}.r{num2str(self.rank)}" if self.exclusive: name += ".exclusive" if self.compensate: name += ".compensate" if self.early_stop and self.num_iters > 1: name += ".earlystop" names.append(name) if prefix: names = [f"{prefix}.{name}" for name in names] return names @configclass @dataclass class SkipBasedQuantLowRankCalibConfig(SkipBasedConfig, QuantLowRankCalibConfig): """Configuration for Quantization Low-Rank Branch calibration. Args: rank (`int`, *optional*, defaults to `32`): The rank of the low-rank branch. exclusive (`bool`, *optional*, defaults to `False`): Whether to use exclusive low-rank branch for each weight sharing the inputs. compensate (`bool`, *optional*, defaults to `False`): Whether the low-rank branch compensates the quantization error. degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. num_iters (`int`, *optional*, default=`1`): The number of iterations. early_stop (`bool`, *optional*, default=`False`): Whether to stop the calibration early. skips (`list[str]`, *optional*, default=`[]`): The keys of the modules to skip. """ pass ================================================ FILE: deepcompressor/calib/config/range.py ================================================ # -*- coding: utf-8 -*- """Quantization dynamic range calibration configuration.""" from dataclasses import dataclass from omniconfig import configclass from ...utils.common import num2str from ...utils.config import SkipBasedConfig from .search import SearchBasedCalibConfig, SearchBasedCalibStrategy __all__ = ["DynamicRangeCalibConfig", "SkipBasedDynamicRangeCalibConfig"] @configclass @dataclass class DynamicRangeCalibConfig(SearchBasedCalibConfig): """Configuration for quantization dynamic range calibration. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. granularity (`SearchBasedCalibGranularity`, *optional*, default=`SearchBasedCalibGranularity.Layer`): The granularity for quantization calibration. element_batch_size (`int`, *optional*, default=`-1`): The element batch size for calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. element_size (`int`, *optional*, default=`-1`): The calibration element size. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. pre_reshape (`bool`, *optional*, default=`True`): Whether to enable reshaping the tensor before calibration. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. ratio (`float`, *optional*, default=`1.0`): The dynamic range ratio. max_shrink (`float`, *optional*, default=`0.2`): Maximum shrinkage ratio. max_expand (`float`, *optional*, default=`1.0`): Maximum expansion ratio. num_grids (`int`, *optional*, default=`80`): Number of grids for linear range search. allow_scale (`bool`, *optional*, default=`False`): Whether to allow range dynamic scaling. """ ratio: float = 1.0 max_shrink: float = 0.2 max_expand: float = 1.0 num_grids: int = 80 allow_scale: bool = False def get_linear_ratios(self) -> list[float]: """Get the ratios for linear range search. Returns: `list[float]`: The dynamic range ratio candidates for linear range search. """ num_grids, max_shrink, max_expand = self.num_grids, self.max_shrink, self.max_expand assert max_shrink < 1, "maximal shrinkage ratio must be less than 1" ratios = [1 - grid / num_grids * (1 - max_shrink) for grid in range(1, num_grids + 1)] if max_expand > 1: ratios += [1 + grid / num_grids * (max_expand - 1) for grid in range(1, num_grids + 1)] return ratios def get_ratios(self) -> list[list[float]]: """Get the ratios for linear range search. Returns: `list[list[float]]`: The dynamic range ratio candidates for linear range search. """ if self.strategy == SearchBasedCalibStrategy.Manual: return [[self.ratio]] elif self.strategy == SearchBasedCalibStrategy.GridSearch: return [[1.0], self.get_linear_ratios()] else: raise ValueError(f"Invalid strategy: {self.strategy}") def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, default=`""`): The prefix of the directory. Returns: `list[str]`: The directory names. """ names = super().generate_dirnames(**kwargs) if self.strategy == SearchBasedCalibStrategy.Manual: name = f"r.[{num2str(self.ratio)}]" elif self.strategy == SearchBasedCalibStrategy.GridSearch: name = f"r.[{num2str(self.max_shrink)}.{num2str(self.max_expand)}].g{self.num_grids}" else: raise ValueError(f"Invalid strategy: {self.strategy}") if self.allow_scale: name += ".scale" names.append(name) if prefix: names = [f"{prefix}.{name}" for name in names] return names @configclass @dataclass class SkipBasedDynamicRangeCalibConfig(SkipBasedConfig, DynamicRangeCalibConfig): """Configuration for quantization dynamic range calibration. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. granularity (`SearchBasedCalibGranularity`, *optional*, default=`SearchBasedCalibGranularity.Layer`): The granularity for quantization calibration. element_batch_size (`int`, *optional*, default=`-1`): The element batch size for calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. element_size (`int`, *optional*, default=`-1`): The calibration element size. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. pre_reshape (`bool`, *optional*, default=`True`): Whether to enable reshaping the tensor before calibration. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. ratio (`float`, *optional*, default=`1.0`): The dynamic range ratio. max_shrink (`float`, *optional*, default=`0.2`): Maximum shrinkage ratio. max_expand (`float`, *optional*, default=`1.0`): Maximum expansion ratio. num_grids (`int`, *optional*, default=`80`): Number of grids for linear range search. allow_scale (`bool`, *optional*, default=`False`): Whether to allow range dynamic scaling. skips (`list[str]`, *optional*, default=`[]`): The keys of the modules to skip. """ pass ================================================ FILE: deepcompressor/calib/config/reorder.py ================================================ # -*- coding: utf-8 -*- """Channel reorder configuration.""" import enum from dataclasses import dataclass, field from omniconfig import configclass from ...utils.config import SkipBasedConfig from .search import ( SearchBasedCalibConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective, SearchBasedCalibStrategy, ) __all__ = ["ChannelOrderCalibConfig", "SkipBasedChannelOrderConfig"] @configclass @dataclass class ChannelOrderCalibConfig(SearchBasedCalibConfig): """Configuration for channel order calibration in group quantization. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. allow_x_quant (`bool`, *optional*, default=`True`): Whether to allow input quantization during calibration. allow_w_quant (`bool`, *optional*, default=`True`): Whether to allow weight quantization during calibration. channel_metric (`ChannelMetricMode`, *optional*, default=`ChannelMetricMode.AbsNormalizedMean`): The mode for computing the channel importance. channel_index (`ChannelIndexMode`, *optional*, default=`ChannelIndexMode.Sequential`): The mode for ranking the channel importance. dynamic (`bool`, *optional*, default=`False`): Whether to enable dynamic channel reorder. """ class ChannelMetric(enum.Enum): """The mode for computing the channel importance.""" InputsAbsMax = "xMax" InputsAbsMean = "xAvg" InputsRootMeanSquare = "xRms" WeightsAbsMax = "wMax" WeightsAbsMean = "wAvg" WeightsRootMeanSquare = "wRms" AbsMaxProduct = "pMax" AbsMeanProduct = "pAvg" RootMeanSquareProduct = "pRms" class ChannelIndex(enum.Enum): """The mode for ranking the channel importance.""" Sequential = "Seq" Transpose = "Trp" objective: SearchBasedCalibObjective = field(init=False, default=SearchBasedCalibObjective.OutputsError) granularity: SearchBasedCalibGranularity = field(init=False, default=SearchBasedCalibGranularity.Layer) element_batch_size: int = field(init=False, default=-1) element_size: int = field(init=False, default=-1) pre_reshape: bool = field(init=False, default=True) allow_x_quant: bool = True allow_w_quant: bool = True channel_metric: ChannelMetric = ChannelMetric.InputsAbsMax channel_index: ChannelIndex = ChannelIndex.Sequential dynamic: bool = False def __post_init__(self) -> None: if self.strategy != SearchBasedCalibStrategy.Manual: self.strategy = SearchBasedCalibStrategy.GridSearch super().__post_init__() def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, default=`""`): The prefix of the directory. Returns: `list[str]`: The directory names. """ names = super().generate_dirnames(**kwargs) if self.strategy == SearchBasedCalibStrategy.Manual: name = f"{self.channel_metric.name}.{self.channel_index.name}" else: name = "search" if self.dynamic: name += ".dynamic" names.append(name) disallows = [] if not self.allow_x_quant: disallows.append("x") if not self.allow_w_quant: disallows.append("w") if disallows: names.append(f"disallow.[{'+'.join(disallows)}]") if prefix: names = [f"{prefix}.{name}" for name in names] return names @configclass @dataclass class SkipBasedChannelOrderConfig(SkipBasedConfig, ChannelOrderCalibConfig): """Configuration for channel order calibration in group quantization. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. allow_x_quant (`bool`, *optional*, default=`True`): Whether to allow input quantization during calibration. allow_w_quant (`bool`, *optional*, default=`True`): Whether to allow weight quantization during calibration. channel_metric (`ChannelMetricMode`, *optional*, default=`ChannelMetricMode.AbsNormalizedMean`): The mode for computing the channel importance. channel_index (`ChannelIndexMode`, *optional*, default=`ChannelIndexMode.Sequential`): The mode for ranking the channel importance. dynamic (`bool`, *optional*, default=`False`): Whether to enable dynamic channel reorder. skips (`list[str]`, *optional*, default=`[]`): The keys of the modules to skip. """ pass ================================================ FILE: deepcompressor/calib/config/rotation.py ================================================ # -*- coding: utf-8 -*- """Quantization Rotation configuration.""" import os import typing as tp from dataclasses import dataclass, field import omniconfig from omniconfig import configclass __all__ = ["QuantRotationConfig"] @configclass @dataclass class QuantRotationConfig: """Configuration for rotation quantization. Args: name (`str`): The name of the rotation quantization configuration. If `path` is provided, this is required. Otherwise, it is set to "random" if `random` is `True`, and "hadamard" otherwise. path (`str`, *optional*, default=`""`): The path to the rotation matrix. If provided, `name` must be set. random (`bool`, *optional*, default=`False`): Whether to use random hadamard sample as rotation matrix. transforms (`list[str]`, *optional*, default=`[]`): The module keys using explicit hadamard transform. """ name: str = "" path: str = "" random: bool = False transforms: list[str] = field(default_factory=list) def __post_init__(self) -> None: self.transforms = sorted(set(self.transforms or [])) if self.path and os.path.exists(self.path): assert self.name, "The name of the rotation quantization configuration must be provided." self.random = False else: self.path = "" self.name = "random" if self.random else "hadamard" def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Get the directory names of the rotation quantization configuration. Returns: list[str]: The directory names of the rotation quantization configuration. """ name = self.name if self.transforms: name += f".[{'+'.join(self.transforms)}]" return [f"{prefix}.{name}" if prefix else name] @classmethod def update_get_arguments( cls: type["QuantRotationConfig"], *, overwrites: dict[str, tp.Callable[[omniconfig.Arguments], None] | None] | None = None, defaults: dict[str, tp.Any] | None = None, ) -> tuple[dict[str, tp.Callable[[omniconfig.Arguments], None] | None], dict[str, tp.Any]]: """Get the arguments for the rotation quantization configuration.""" overwrites = overwrites or {} defaults = defaults or {} collect_fn = omniconfig.ADD_PREFIX_BOOL_FIELDS("transform", **defaults) def add_transforms_argument(parser): collect_fn(parser) parser.add_argument("--transforms", nargs="+", default=[], help="The keys of the modules to transform.") overwrites.setdefault("transforms", add_transforms_argument) return overwrites, defaults @classmethod def update_from_dict( cls: type["QuantRotationConfig"], *, parsed_args: dict[str, tp.Any], overwrites: dict[str, tp.Any] ) -> tuple[dict[str, tp.Any], dict[str, tp.Any]]: """Create a rotation quantization configuration from the parsed arguments.""" parsed_args.setdefault("transforms", []).extend(omniconfig.COLLECT_PREFIX_BOOL_FIELDS(parsed_args, "transform")) return parsed_args, overwrites ================================================ FILE: deepcompressor/calib/config/search.py ================================================ # -*- coding: utf-8 -*- """Quantization calibrator configurations.""" import enum from dataclasses import dataclass from omniconfig import configclass from ...utils.common import num2str __all__ = [ "SearchBasedCalibStrategy", "SearchBasedCalibGranularity", "SearchBasedCalibObjective", "SearchBasedCalibConfig", ] class SearchBasedCalibStrategy(enum.Enum): """The strategy for search-based quantization calibration.""" Manual = enum.auto() GridSearch = enum.auto() # RandomSearch = enum.auto() # Bayesian = enum.auto() # EvolutionaryAlgorithm = enum.auto() # EvolutionaryStrategy = enum.auto() class SearchBasedCalibGranularity(enum.Enum): """The granularity for search-based quantization calibration.""" Group = enum.auto() ChannelGroup = enum.auto() Layer = enum.auto() class SearchBasedCalibObjective(enum.Enum): """The objective for search-based quantization calibration.""" TensorError = enum.auto() """minimize the quantization error of the tensor.""" ProductsError = enum.auto() """minimize the error of the the multiplication products.""" OutputsError = enum.auto() """minimize the error of the outputs of the evaluation module.""" @configclass @dataclass class SearchBasedCalibConfig: """The base configuration for search-based quantization calibration. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. granularity (`SearchBasedCalibGranularity`, *optional*, default=`SearchBasedCalibGranularity.Layer`): The granularity for quantization calibration. element_batch_size (`int`, *optional*, default=`-1`): The element batch size for calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. element_size (`int`, *optional*, default=`-1`): The calibration element size. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. pre_reshape (`bool`, *optional*, default=`True`): Whether to enable reshaping the tensor before calibration. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. """ degree: int = 2 objective: SearchBasedCalibObjective = SearchBasedCalibObjective.OutputsError strategy: SearchBasedCalibStrategy = SearchBasedCalibStrategy.Manual granularity: SearchBasedCalibGranularity = SearchBasedCalibGranularity.Layer element_batch_size: int = -1 sample_batch_size: int = -1 element_size: int = -1 sample_size: int = -1 pre_reshape: bool = True outputs_device: str = "cpu" def __post_init__(self) -> None: if self.outputs_device != "cpu": self.outputs_device = None if self.element_size != 0 or self.sample_size != 0: assert self.element_batch_size != 0, "element_batch_size must not be zero" assert self.sample_batch_size != 0, "sample_batch_size must not be zero" assert self.element_size != 0, "element_size must not be zero" assert self.sample_size != 0, "sample_size must not be zero" else: assert self.objective == SearchBasedCalibObjective.TensorError if self.objective == SearchBasedCalibObjective.TensorError: pass elif self.granularity == SearchBasedCalibGranularity.Layer: self.objective = SearchBasedCalibObjective.OutputsError self.element_batch_size = -1 self.element_size = -1 @property def needs_search(self) -> bool: """Whether the search is enabled.""" return self.strategy != SearchBasedCalibStrategy.Manual def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, default=`""`): The prefix of the directory. Returns: `list[str]`: The directory names. """ name = f"{self.objective.name}.{self.strategy.name}.{self.granularity.name}.d{num2str(self.degree)}" name += f".e{num2str(self.element_size)}.s{num2str(self.sample_size)}" if prefix: name = f"{prefix}.{name}" return [name] ================================================ FILE: deepcompressor/calib/config/smooth.py ================================================ # -*- coding: utf-8 -*- """Smooth quantization configuration.""" import enum from dataclasses import dataclass, field import omniconfig from omniconfig import configclass from ...utils.common import num2str from ...utils.config import SkipBasedConfig from .search import ( SearchBasedCalibConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective, SearchBasedCalibStrategy, ) __all__ = [ "SmoothSpanMode", "SmoothCalibConfig", "SmoothAttentionCalibConfig", "SkipBasedSmoothCalibConfig", "SmoothTransfomerConfig", ] class SmoothSpanMode(enum.Enum): """The mode for computing the span used in smoothing scale calculation.""" AbsMax = enum.auto() RootMeanSquare = enum.auto() @configclass @dataclass class SmoothCalibConfig(SearchBasedCalibConfig): """Configuration for smooth quantization. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. granularity (`SearchBasedCalibGranularity`, *optional*, default=`SearchBasedCalibGranularity.Layer`): The granularity for quantization calibration. element_batch_size (`int`, *optional*, default=`-1`): The element batch size for calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. element_size (`int`, *optional*, default=`-1`): The calibration element size. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. pre_reshape (`bool`, *optional*, default=`True`): Whether to enable reshaping the tensor before calibration. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. fuse_when_possible (`bool`, *optional*, default=`True`): Whether to fuse smooth scale whenever possible. allow_a_quant (`bool`, *optional*, default=`True`): Whether to allow the quantization for alpha tensor. allow_b_quant (`bool`, *optional*, default=`True`): Whether to allow the quantization for beta tensor. spans (`list[tuple[SmoothSpanMode, SmoothSpanMode]]`, *optional*, default=`[]`): The span combinations. The first element is for the alpha and the second element is for the beta. alpha (`float`, *optional*, default=`0.5`): The smoothing alpha. beta (`float`, *optional*, default=`-1`): The smoothing beta. num_grids (`int`, *optional*, default=`20`): The number of grids for grid search. allow_low_rank (`bool`, *optional*, default=`False`): Whether to allow quantization low-rank branch during calibration. """ fuse_when_possible: bool = True allow_a_quant: bool = True allow_b_quant: bool = True spans: list[tuple[SmoothSpanMode, SmoothSpanMode]] = field( default_factory=list, metadata={ omniconfig.ARGPARSE_KWARGS: { "nargs": "+", "type": lambda s: tuple(SmoothSpanMode[x.split(".")[-1]] for x in s.split(",")), } }, ) a_spans: list[SmoothSpanMode] = field(default_factory=list, init=False) b_spans: list[SmoothSpanMode] = field(default_factory=list, init=False) alpha: float = 0.5 beta: float = -1 num_grids: int = 20 allow_low_rank: bool = False def __post_init__(self) -> None: # noqa: C901 # region remove duplicates of ranges _spans, _spanset, _a_spanset, _b_spanset = [], set(), set(), set() self.a_spans, self.b_spans = [], [] for a_span, b_span in self.spans: if isinstance(a_span, str): a_span = SmoothSpanMode[a_span] if isinstance(b_span, str): b_span = SmoothSpanMode[b_span] assert isinstance(a_span, SmoothSpanMode), f"Invalid span mode used for alpha: {a_span}" assert isinstance(b_span, SmoothSpanMode), f"Invalid span mode used for beta: {b_span}" _span = (a_span, b_span) if _span in _spanset: continue _spans.append(_span) _spanset.add(_span) if a_span not in _a_spanset: _a_spanset.add(a_span) self.a_spans.append(a_span) if b_span not in _b_spanset: _b_spanset.add(b_span) self.b_spans.append(b_span) self.spans = _spans # endregion if self.strategy == SearchBasedCalibStrategy.Manual: assert len(self.spans) == 1, "Only one span combination is allowed in manual mode" assert self.alpha != 0 or self.beta != 0, "alpha and beta cannot be both zero" self.alpha, self.beta = self.get_alpha_beta_pairs()[0] if self.granularity == SearchBasedCalibGranularity.Group: self.granularity = SearchBasedCalibGranularity.ChannelGroup if self.allow_low_rank: self.granularity = SearchBasedCalibGranularity.Layer assert -3 <= self.alpha <= 1, "alpha must be less than or equal to 1" assert -3 <= self.beta <= 1, "beta must be less than or equal to 1" super().__post_init__() def get_alpha_beta_pairs(self) -> list[tuple[float, float]]: # noqa: C901 """Get the alpha and beta pairs for smooth quantization. Returns: `list[tuple[float, float]]`: The alpha and beta pair candidates. """ if self.strategy == SearchBasedCalibStrategy.Manual: if self.beta < 0: assert 0 <= self.alpha <= 1, "alpha must be in [0, 1]" return [(self.alpha, 1 - self.alpha)] elif self.alpha < 0: assert 0 <= self.beta <= 1, "beta must be in [0, 1]" return [(1 - self.beta, self.beta)] else: assert 0 <= self.alpha <= 1, "alpha must be in [0, 1]" assert 0 <= self.beta <= 1, "beta must be in [0, 1]" return [(self.alpha, self.beta)] choices = [i / self.num_grids for i in range(1, self.num_grids)] if self.alpha > 0: if self.beta > 0: return [(0, 0)] + [(alpha, alpha) for alpha in choices] if self.beta == 0: return [(0, 0)] + [(alpha, 0) for alpha in choices] if self.beta == -1: return [(0, 0)] + [(alpha, 1 - alpha) for alpha in choices] if self.beta == -2: return [(0, 0)] + [(alpha, 0) for alpha in choices] + [(alpha, 1 - alpha) for alpha in choices] return ( [(0, 0)] + [(alpha, 0) for alpha in choices] + [(alpha, beta) for alpha in choices for beta in choices] ) if self.alpha == 0: if self.beta > 0: return [(0, 0)] + [(0, beta) for beta in choices] if self.beta == 0: return [(0, 0)] + [(alpha, 0) for alpha in choices] + [(0, beta) for beta in choices] if self.beta == -1: return [(0, 0)] + [(0, beta) for beta in choices] + [(alpha, 1 - alpha) for alpha in choices] if self.beta == -2: return ( [(0, 0)] + [(alpha, 0) for alpha in choices] + [(0, beta) for beta in choices] + [(alpha, 1 - alpha) for alpha in choices] ) return ( [(0, 0)] + [(alpha, 0) for alpha in choices] + [(0, beta) for beta in choices] + [(alpha, beta) for alpha in choices for beta in choices] ) if self.alpha == -1: if self.beta > 0 or self.beta == -1: return [(0, 0)] + [(alpha, 1 - alpha) for alpha in choices] if self.beta == 0 or self.beta == -2: return [(0, 0)] + [(alpha, 0) for alpha in choices] + [(alpha, 1 - alpha) for alpha in choices] return ( [(0, 0)] + [(alpha, 0) for alpha in choices] + [(alpha, beta) for alpha in choices for beta in choices] ) if self.alpha == -2: if self.beta > 0 or self.beta == -1: return [(0, 0)] + [(0, beta) for beta in choices] + [(alpha, 1 - alpha) for alpha in choices] if self.beta == 0 or self.beta == -2: return ( [(0, 0)] + [(alpha, 0) for alpha in choices] + [(0, beta) for beta in choices] + [(alpha, 1 - alpha) for alpha in choices] ) return ( [(0, 0)] + [(alpha, 0) for alpha in choices] + [(0, beta) for beta in choices] + [(alpha, beta) for alpha in choices for beta in choices] ) if self.alpha == -3: if self.beta > 0: return ( [(0, 0)] + [(0, beta) for beta in choices] + [(alpha, beta) for alpha in choices for beta in choices] ) return ( [(0, 0)] + [(0, beta) for beta in choices] + [(alpha, 0) for alpha in choices] + [(alpha, beta) for alpha in choices for beta in choices] ) raise ValueError("Invalid alpha and beta values") def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Get the directory names of the smooth quantization configuration. Args: prefix (`str`, *optional*, default=`""`): The prefix of the directory. Returns: `list[str]`: The directory names of the configuration. """ names = super().generate_dirnames(**kwargs) names.append("[{}]".format("+".join(f"a.{a_span.name}.b.{b_span.name}" for a_span, b_span in self.spans))) alpha, beta = num2str(self.alpha), num2str(self.beta) if self.strategy == SearchBasedCalibStrategy.Manual: names.append(f"a{alpha}.b{beta}") elif self.alpha > 0: names.append(f"g{self.num_grids}.b{beta}") elif self.beta > 0: names.append(f"g{self.num_grids}.a{alpha}") else: names.append(f"g{self.num_grids}.a{alpha}.b{beta}") if self.allow_low_rank: names[-1] += ".lr" if not self.fuse_when_possible: names[-1] += ".nf" disallows = [] if not self.allow_a_quant: disallows.append("a") if not self.allow_b_quant: disallows.append("b") if disallows: names.append(f"disallow.[{'+'.join(disallows)}]") if prefix: names = [f"{prefix}.{name}" for name in names] return names @configclass @dataclass class SkipBasedSmoothCalibConfig(SkipBasedConfig, SmoothCalibConfig): """Configuration for smooth quantization. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. objective (`SearchBasedCalibObjective`, *optional*, default=`SearchBasedCalibObjective.OutputsError`): The objective for quantization calibration. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. granularity (`SearchBasedCalibGranularity`, *optional*, default=`SearchBasedCalibGranularity.Layer`): The granularity for quantization calibration. element_batch_size (`int`, *optional*, default=`-1`): The element batch size for calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. element_size (`int`, *optional*, default=`-1`): The calibration element size. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. pre_reshape (`bool`, *optional*, default=`True`): Whether to enable reshaping the tensor before calibration. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. allow_a_quant (`bool`, *optional*, default=`True`): Whether to allow the quantization for alpha tensor. allow_b_quant (`bool`, *optional*, default=`True`): Whether to allow the quantization for beta tensor. spans (`list[tuple[SmoothSpanMode, SmoothSpanMode]]`, *optional*, default=`[]`): The span combinations. The first element is for the alpha and the second element is for the beta. alpha (`float`, *optional*, default=`0.5`): The smoothing alpha. beta (`float`, *optional*, default=`-1`): The smoothing beta. num_grids (`int`, *optional*, default=`20`): The number of grids for grid search. allow_low_rank (`bool`, *optional*, default=`False`): Whether to allow quantization SVD during calibration. skips (`list[str]`, *optional*, default=`[]`): The keys of the modules to skip. """ pass @configclass @dataclass class SmoothAttentionCalibConfig(SmoothCalibConfig): """Configuration for smooth quantization. Args: degree (`int`, *optional*, default=`2`): The power degree for the quantization error. Defaults to `2`. strategy (`SearchBasedCalibStrategy`, *optional*, default=`SearchBasedCalibStrategy.Manual`): The strategy for quantization calibration. sample_batch_size (`int`, *optional*, default=`-1`): The samples batch size for calibration. sample_size (`int`, *optional*, default=`-1`): The calibration sample size. outputs_device (`str`, *optional*, default=`"cpu"`): The device to store the precomputed outputs of the module. allow_a_quant (`bool`, *optional*, default=`True`): Whether to allow the quantization for alpha tensor. allow_b_quant (`bool`, *optional*, default=`True`): Whether to allow the quantization for beta tensor. spans (`list[tuple[SmoothSpanMode, SmoothSpanMode]]`, *optional*, default=`[]`): The span combinations. The first element is for the alpha and the second element is for the beta. alpha (`float`, *optional*, default=`0.5`): The smoothing alpha. beta (`float`, *optional*, default=`-1`): The smoothing beta. num_grids (`int`, *optional*, default=`20`): The number of grids for grid search. """ objective: SearchBasedCalibObjective = field(init=False, default=SearchBasedCalibObjective.OutputsError) granularity: SearchBasedCalibGranularity = field(init=False, default=SearchBasedCalibGranularity.Layer) element_batch_size: int = field(init=False, default=-1) element_size: int = field(init=False, default=-1) pre_reshape: bool = field(init=False, default=True) allow_low_rank: bool = field(init=False, default=False) @configclass @dataclass class SmoothTransfomerConfig: """Configuration for smooth quantization of transformer-based models. Args: proj (`SkipBasedSmoothCalibConfig` or `None`, *optional*, default=`None`): The smooth configuration for projections. attn (`SmoothAttentionCalibConfig` or `None`, *optional*, default=`None`): The smooth configuration for attentions. """ proj: SkipBasedSmoothCalibConfig | None = None attn: SmoothAttentionCalibConfig | None = None @property def enabled_proj(self) -> bool: """Whether the smooth quantization is enabled for projections.""" return self.proj is not None @property def enabled_attn(self) -> bool: """Whether the smooth quantization is enabled for attentions.""" return self.attn is not None def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Get the names of the smooth quantization configuration. Args: prefix (`str`, *optional*, default=`""`): The prefix of the directory. Returns: `list[str]`: The names of the smooth quantization configuration """ proj_names = self.proj.generate_dirnames(prefix="proj") if self.proj is not None else [] attn_names = self.attn.generate_dirnames(prefix="attn") if self.attn is not None else [] num_names = max(len(proj_names), len(attn_names)) names = [] for index in range(num_names): name = [] if index < len(proj_names): name.append(proj_names[index]) if index < len(attn_names): name.append(attn_names[index]) names.append("-".join(name)) if prefix: names = [f"{prefix}.{name}" for name in names] return names ================================================ FILE: deepcompressor/calib/lowrank.py ================================================ # -*- coding: utf-8 -*- """Quantization SVD calibration module.""" from dataclasses import _MISSING_TYPE, MISSING import torch import torch.nn as nn from ..data.common import TensorType from ..nn.patch.lowrank import LowRankBranch from ..quantizer.processor import Quantizer from ..utils import math, tools from ..utils.config import KeyEnableConfig from .config import QuantLowRankCalibConfig, SearchBasedCalibObjective from .search import SearchBasedCalibrator __all__ = ["QuantLowRankCalibrator"] class QuantLowRankCalibrator(SearchBasedCalibrator[QuantLowRankCalibConfig, LowRankBranch]): """The quantization low-rank branch calibrator.""" def __init__( self, config: QuantLowRankCalibConfig, w_quantizer: Quantizer, x_quantizer: Quantizer | None, develop_dtype: torch.dtype = torch.float32, ) -> None: """Initialize the calibrator. Args: config (`QuantLowRankCalibConfig`): The configuration of the quantization low-rank branch calibrator. w_quantizer (`Quantizer`): The quantizer for weights. x_quantizer (`Quantizer` or `None`): The quantizer for inputs. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type. """ if isinstance(config, KeyEnableConfig): assert config.is_enabled_for(w_quantizer.key), "The calibrator should be enabled for the quantizer." else: assert config.is_enabled(), "The calibrator should be enabled." super().__init__( tensor_type=TensorType.Weights, config=config, w_quantizer=w_quantizer, x_quantizer=x_quantizer, y_quantizer=None, develop_dtype=develop_dtype, ) assert self.needs_quant, "The tensor should be quantized." self.num_iters = config.num_iters @property def population_size(self) -> int: """Return the population size of the current iteration.""" return 1 @property def allows_x_quant_for_wgts(self) -> bool: """Whether the calibrator allows input quantization when tensor_type is Weights.""" return True @property def allows_w_quant_for_wgts(self) -> bool: """Whether the calibrator needs weight quantization when tensor_type is Weights.""" return True def is_done(self) -> bool: """Check if the calibration is done.""" return self.iter >= self.num_iters or self.early_stopped def is_last_iter(self) -> bool: """Check if the current iteration is the last one.""" return self.iter == self.num_iters - 1 def _reset(self, x_wgts: list[torch.Tensor | nn.Parameter], **kwargs) -> None: # noqa: C901 """Reset the calibrator. Args: x_wgts (`list[torch.Tensor | nn.Parameter]`): The weights in x-w computation. """ self.best_branch: LowRankBranch = None self.best_error: torch.Tensor = None self.error_history: list[tuple[float, float]] = [] self.early_stopped = False if len(x_wgts) > 1 and not self.config.exclusive: self.w = torch.cat([wgt.data for wgt in x_wgts], dim=0) else: assert len(x_wgts) == 1 self.w = x_wgts[0].data if self.config.compensate: self.qw = torch.cat( [ self.w_quantizer.quantize(wgt.data, kernel=None, develop_dtype=self.develop_dtype).data for wgt in x_wgts ], dim=0, ) else: self.qw = 0 self.hat_ws: list[torch.Tensor] = [None] * len(x_wgts) self.ocs: list[int] = [wgt.shape[0] for wgt in x_wgts] def get_best(self) -> LowRankBranch: """Get the best candidate. Returns: `LowRankBranch`: The best candidate. """ return self.best_branch def _ask(self) -> LowRankBranch: """Ask for the next candidate. Returns: `LowRankBranch`: The next candidate. """ branch = LowRankBranch( self.w.shape[1], self.w.shape[0], rank=self.config.rank, weight=self.w - self.qw, ) self.wgt_idx = 0 if len(self.hat_ws) > 1: lw = branch.get_effective_weight().view(self.w.shape) rw = self.w - lw oc_idx = 0 for idx, oc in enumerate(self.ocs): self.hat_ws[idx] = self.w_quantizer.quantize( rw[oc_idx : oc_idx + oc], kernel=None, develop_dtype=self.develop_dtype ).data oc_idx += oc self.qw = torch.cat(self.hat_ws, dim=0) if self.objective != SearchBasedCalibObjective.OutputsError: oc_idx = 0 for idx, oc in enumerate(self.ocs): self.hat_ws[idx].add_(lw[oc_idx : oc_idx + oc]) oc_idx += oc else: lw = branch.get_effective_weight().view(self.w.shape) self.qw = self.w_quantizer.quantize(self.w - lw, kernel=None, develop_dtype=self.develop_dtype).data if self.objective != SearchBasedCalibObjective.OutputsError: self.hat_ws = [self.qw + lw] else: self.hat_ws = [self.qw] return branch def _tell(self, error: list[torch.Tensor]) -> None: # noqa: C901 """Tell the error of the last candidate and update the best candidate. Args: errors (list[torch.Tensor]): The error of the last candidate. """ if len(error) > 1: error = [sum(error)] error = error[0] assert isinstance(error, torch.Tensor) assert error.numel() == 1, "The error should only have one value." if self.best_error is None or error <= self.best_error: self.best_error = error self.best_branch = self.candidate elif self.config.early_stop: self.early_stopped = True if self.logger.level <= tools.logging.DEBUG: self.error_history.append( ( math.root_(error.to(torch.float64), self.config.degree).item(), math.root_(self.best_error.to(torch.float64), self.config.degree).item(), ) ) if self.iter % 10 == 9 or self.is_last_iter() or self.early_stopped: iter_end = ((self.iter + 10) // 10) * 10 iter_start = iter_end - 10 iter_end = min(iter_end, self.iter + 1) history = self.error_history[iter_start:iter_end] self.logger.debug(" - iter = [%s]", ", ".join(f"{i:10d}" for i in range(iter_start, iter_end))) self.logger.debug(" - error = [%s]", ", ".join(f"{e[0]:10.4f}" for e in history)) self.logger.debug(" - best error = [%s]", ", ".join(f"{e[1]:10.4f}" for e in history)) def _process_x_in_xw(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if not self.needs_x_quant_for_wgts: return x return self.x_quantizer.quantize(x, channels_dim=channels_dim, develop_dtype=self.develop_dtype).data def _process_w_in_xw(self, w: torch.Tensor) -> torch.Tensor: hat_w = self.hat_ws[self.wgt_idx] self.hat_ws[self.wgt_idx] = None self.wgt_idx += 1 return hat_w if self.needs_w_quant_for_wgts else w def _process_y_in_yx(self, y: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: raise RuntimeError("_process_y_in_yx should not be called in QuantSVDCalibrator.") def _process_x_in_yx(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: raise RuntimeError("_process_x_in_yx should not be called in QuantSVDCalibrator.") def _process_xw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("_process_xw_in_yx should not be called in QuantSVDCalibrator.") def _process_yw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("_process_yw_in_yx should not be called in QuantSVDCalibrator.") def _process_wgts_centric_mod( self, wgts: list[nn.Parameter], mods: list[nn.Module], update_state_dict: bool = True, **kwargs ) -> None: assert len(self.hat_ws) == len(wgts) == len(mods) shared = self.candidate if len(self.hat_ws) > 1: oc_idx = 0 for mod, wgt, hat_w in zip(mods, wgts, self.hat_ws, strict=True): if update_state_dict: self._state_dict.append((wgt, wgt.data)) wgt.data = hat_w branch = LowRankBranch(wgt.shape[1], wgt.shape[0], rank=self.config.rank) branch.a = shared.a branch.b.to(dtype=wgt.dtype, device=wgt.device) branch.b.weight.copy_(shared.b.weight[oc_idx : oc_idx + wgt.data.shape[0]]) oc_idx += wgt.data.shape[0] self._hooks.append(branch.as_hook().register(mod)) else: if update_state_dict: self._state_dict.append((wgts[0], wgts[0].data)) wgts[0].data = self.hat_ws[0] self._hooks.append(shared.as_hook().register(mods)) if self.needs_x_quant_for_wgts: self._hooks.append(self.x_quantizer.as_hook().register(mods)) self.hat_ws = [None] * len(self.hat_ws) ================================================ FILE: deepcompressor/calib/metric.py ================================================ # -*- coding: utf-8 -*- """Channel-wise metric calculation module.""" import typing as tp import torch from ..data.utils.shape import infer_view_shape __all__ = ["ChannelMetric"] class ChannelMetric: """Channel-wise metric.""" @staticmethod def _normalize( tensor: torch.Tensor, group_shape: tp.Sequence[int], dtype: torch.dtype, ) -> torch.Tensor: shape, ndim = tensor.shape, tensor.ndim view_shape = infer_view_shape(tensor.shape, group_shape) # (d0, d1, d2, ...) -> (#g0, gs0, #g1, gs1, #g2, gs2, ...) tensor = tensor.view(view_shape).to(dtype=dtype) tensor_max = tensor.abs().amax(dim=list(range(1, ndim * 2, 2)), keepdim=True) tensor_max[tensor_max == 0] = 1 tensor = tensor / tensor_max return tensor.view(shape) @staticmethod def _abs_max( tensor: torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device, dtype: torch.dtype, ) -> tuple[torch.Tensor, int]: return ( tensor.view(tensor.shape[0], num_channels, -1) .abs() .amax(dim=(0, 2)) .view(-1) .to(dtype=dtype, device=device), 1, ) @staticmethod def _abs_sum( tensor: torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device, dtype: torch.dtype, ) -> tuple[torch.Tensor, int]: tensor = tensor.view(tensor.shape[0], num_channels, -1) cnt = tensor.shape[0] * tensor.shape[2] return tensor.abs().to(dtype=dtype).sum(dim=(0, 2)).view(-1).to(device=device), cnt @staticmethod def _abs_normalize_sum( tensor: torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device, dtype: torch.dtype, ) -> tuple[torch.Tensor, int]: return ChannelMetric._abs_sum( ChannelMetric._normalize(tensor, group_shape, dtype=dtype), num_channels, group_shape, device=device, dtype=dtype, ) @staticmethod def _square_sum( tensor: torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device, dtype: torch.dtype, ) -> tuple[torch.Tensor, int]: tensor = tensor.view(tensor.shape[0], num_channels, -1) cnt = tensor.shape[0] * tensor.shape[2] return tensor.to(dtype=dtype).pow(2).sum(dim=(0, 2)).view(-1).to(device=device), cnt @staticmethod def _max_reduce( fn: tp.Callable[ [torch.Tensor, int, tp.Sequence[int], torch.device, torch.dtype], tuple[torch.Tensor, torch.Tensor | int | float], ], tensors: tp.Sequence[torch.Tensor] | torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device | str | None = None, dtype: torch.dtype = torch.float32, ) -> tuple[torch.Tensor, torch.Tensor | int | float]: if isinstance(tensors, torch.Tensor): device = device or tensors.device return fn(tensors, num_channels, group_shape, device, dtype) else: rst_0, rst_1 = ChannelMetric._max_reduce(fn, tensors[0], num_channels, group_shape, device, dtype) for tensor in tensors[1:]: _rst_0, _rst_1 = ChannelMetric._max_reduce(fn, tensor, num_channels, group_shape, device, dtype) rst_0 = torch.maximum(rst_0, _rst_0.to(device=rst_0.device)) if isinstance(rst_1, torch.Tensor): rst_1 = torch.maximum(rst_1, _rst_1.to(device=rst_1.device)) else: rst_1 = max(rst_1, _rst_1) return rst_0, rst_1 @staticmethod def _sum_reduce( fn: tp.Callable[ [torch.Tensor, int, tp.Sequence[int], torch.device, torch.dtype], tuple[torch.Tensor, torch.Tensor | int | float], ], tensors: tp.Sequence[torch.Tensor] | torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device | str | None = None, dtype: torch.dtype = torch.float32, ) -> tuple[torch.Tensor, torch.Tensor | int | float]: if isinstance(tensors, torch.Tensor): device = device or tensors.device return fn(tensors.to(device), num_channels, group_shape, device, dtype) else: assert isinstance(tensors, (list, tuple)) rst_0, rst_1 = ChannelMetric._sum_reduce(fn, tensors[0], num_channels, group_shape, device, dtype) for tensor in tensors[1:]: _rst_0, _rst_1 = ChannelMetric._sum_reduce(fn, tensor, num_channels, group_shape, device, dtype) rst_0 += _rst_0.to(device=rst_0.device) if isinstance(rst_1, torch.Tensor): rst_1 += _rst_1.to(device=rst_1.device) else: rst_1 += _rst_1 return rst_0, rst_1 @staticmethod def abs_max( tensors: tp.Iterable[torch.Tensor] | torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device | str = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Get the absolute maximum of the tensors, where `R[i] = AbsMax(T[i, :])`.""" return ChannelMetric._max_reduce( ChannelMetric._abs_max, tensors, num_channels, group_shape, device=device, dtype=dtype )[0] @staticmethod def abs_mean( tensors: tp.Iterable[torch.Tensor] | torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device | str = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Get the absolute mean of the tensors, where `R[i] = AbsMean(T[i, :])`.""" rst, cnt = ChannelMetric._sum_reduce( ChannelMetric._abs_sum, tensors, num_channels, group_shape, device=device, dtype=dtype ) return rst.div_(cnt) @staticmethod def abs_normalize_mean( tensors: tp.Iterable[torch.Tensor] | torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device | str = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Get the absolute group normalized mean of the tensors, where `R[i] = Mean(U[i, :])` and `U[i,j] = Abs(T[i, j]) / AbsMax(T[:, j]))`.""" rst, cnt = ChannelMetric._sum_reduce( ChannelMetric._abs_normalize_sum, tensors, num_channels, group_shape, device=device, dtype=dtype ) return rst.div_(cnt) @staticmethod def root_mean_square( tensors: tp.Iterable[torch.Tensor] | torch.Tensor, num_channels: int, group_shape: tp.Sequence[int], device: torch.device | str = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Get the root mean square of the tensors, where `R[i] = Root(Mean(T[i, :]^2))`.""" rst, cnt = ChannelMetric._sum_reduce( ChannelMetric._square_sum, tensors, num_channels, group_shape, device=device, dtype=dtype ) return rst.div_(cnt).sqrt_() ================================================ FILE: deepcompressor/calib/range.py ================================================ # -*- coding: utf-8 -*- """Quantization dynamic range calibration.""" import gc import typing as tp from dataclasses import _MISSING_TYPE, MISSING import torch import torch.nn as nn from ..data.cache import TensorsCache from ..data.common import TensorType from ..data.range import DynamicRange from ..data.scale import QuantScale from ..data.utils.shape import infer_view_shape from ..quantizer.impl.info import QuantInfo from ..quantizer.processor import Quantizer from ..utils import math, tools from .config import DynamicRangeCalibConfig, SearchBasedCalibGranularity from .search import SearchBasedCalibrator __all__ = ["DynamicRangeCalibrator", "calibrate_dynamic_range"] class DynamicRangeCalibrator(SearchBasedCalibrator[DynamicRangeCalibConfig, DynamicRange]): """The quantization dynamic range calibrator.""" def __init__( self, tensor_type: TensorType, config: DynamicRangeCalibConfig, static: bool, quantizer: Quantizer, pre_scale: torch.Tensor | None = None, ) -> None: """Initialize the calibrator. Args: tensor_type (`TensorType`): The tensor type. config (`DynamicRangeCalibConfig`): The dynamic range calibration configuration. static (`bool`): Whether the dynamic range is static, i.e., whether the quantization is static. quantizer (`Quantizer`): The quantizer. pre_scale (`torch.Tensor` or `None`): The joint scale tensor of the previous quantization steps. """ super().__init__( tensor_type=tensor_type, config=config, w_quantizer=quantizer if tensor_type == TensorType.Weights else None, x_quantizer=quantizer if tensor_type == TensorType.Inputs else None, y_quantizer=quantizer if tensor_type == TensorType.Outputs else None, develop_dtype=quantizer.develop_dtype, ) assert self.needs_quant, "The tensor should be quantized." self.static = static self.pre_scale = pre_scale self.ratios = self.config.get_ratios() self.num_iters = len(self.ratios) @property def population_size(self) -> int: """Return the population size of the current iteration.""" return len(self.ratios[self.iter]) def is_clamp_based(self) -> bool: """Return whether the calibration is clamp-based.""" return self.static or not self.config.allow_scale def _reset( # noqa: C901 self, x_wgts: list[torch.Tensor | nn.Parameter], x_acts: TensorsCache | None, y_acts: TensorsCache | None, **kwargs, ) -> None: """Reset the calibrator. Args: x_wgts (`list[torch.Tensor | nn.Parameter]`): The weights in x-w computation. x_acts (`TensorsCache` or `None`): The x activations in x-w computation. y_acts (`TensorsCache` or `None`): The y activations in y-x computation. """ self.base_range: DynamicRange = DynamicRange() self.best_range: DynamicRange = DynamicRange() self.best_error: torch.Tensor = None self.error_history: list[tuple[float, float]] = [] self.device = None if self.tensor_type == TensorType.Weights: assert len(x_wgts) == 1, "The weight should be a single tensor." wgts = x_wgts[0].data assert isinstance(wgts, torch.Tensor), "The weight should be a tensor." tensors = [wgts] self.device = wgts.device elif self.tensor_type == TensorType.Inputs: assert x_acts is not None, "The input activations should be provided." assert x_acts.num_tensors == 1, f"Only one input is allowed, got {x_acts.num_tensors}" acts = x_acts.front() tensors = acts.get_standardized_data(reshape=False) self.device = acts.orig_device else: assert y_acts is not None, "The output activations should be provided." assert y_acts.num_tensors == 1, f"Only one output is allowed, got {y_acts.num_tensors}" acts = y_acts.front() tensors = acts.get_standardized_data(reshape=False) self.device = acts.orig_device shape = tensors[0].shape view_shape = infer_view_shape( shape, self.quantizer.config.largest_group_shape, skip_first_dim=self.tensor_type != TensorType.Weights, ) # region get range scale shape self.pos_view_shape = torch.Size([1, 1, view_shape[2], *([1] * (len(view_shape) - 3))]) self.range_shape = torch.Size([gs if i % 2 == 0 else 1 for i, gs in enumerate(view_shape)]) if self.granularity == SearchBasedCalibGranularity.Layer: self.ratio_shape = self.error_shape = torch.Size((1,)) self.ratio_view_shape = self.ratio_shape elif self.granularity == SearchBasedCalibGranularity.ChannelGroup: self.ratio_shape = self.error_shape = torch.Size((view_shape[2],)) self.ratio_view_shape = self.pos_view_shape elif self.granularity == SearchBasedCalibGranularity.Group: self.ratio_shape = self.error_shape = torch.Size(view_shape[::2]) self.ratio_view_shape = self.range_shape else: raise ValueError(f"Invalid granularity: {self.granularity}") assert self.ratio_shape.numel() == self.ratio_view_shape.numel() if self.pre_scale is not None: assert len(shape) * 2 == len(self.pre_scale.shape) self.pre_view_shape = infer_view_shape(shape, self.pre_scale.shape[1::2]) else: self.pre_view_shape = torch.Size() # endregion if self.is_clamp_based(): if self.pre_scale is not None: tensors = [self._preprocess_with_pre_scale(t) for t in tensors] tensors = [t.view(view_shape).to(dtype=self.develop_dtype) for t in tensors] self.base_range = DynamicRange.construct( tensors, zero_domain=self.quantizer.config.zero_domain, is_float_point=self.quantizer.config.quant_dtype.is_float_point, ) gc.collect() torch.cuda.empty_cache() def get_best(self) -> DynamicRange: """Get the best candidate. Returns: `DynamicRange`: The best candidate. """ if self.static: return DynamicRange(min=self.best_range.min, max=self.best_range.max) elif self.is_clamp_based(): return DynamicRange(min=self.best_range.min, max=self.best_range.max, ratio=1.0) else: return DynamicRange(ratio=self.best_range.ratio.view(self.ratio_view_shape)) def _ask(self) -> DynamicRange: """Ask for the next candidate. Returns: `DynamicRange`: The next candidate. """ ratio = self.ratios[self.iter][self.candidate_id] if self.is_clamp_based(): return self.base_range.scale( ratio=ratio, zero_domain=self.quantizer.config.zero_domain, is_float_point=self.quantizer.config.quant_dtype.is_float_point, ) else: return DynamicRange(ratio=ratio) def _tell(self, error: list[torch.Tensor]) -> None: # noqa: C901 """Tell the error of the last candidate and update the best candidate. Args: errors (`list[torch.Tensor]`): The error of the last candidate. """ assert len(error) == 1, "The error should only have one value." error = error[0] assert isinstance(error, torch.Tensor) assert error.shape == self.error_shape, f"Error shape {error.shape} != {self.error_shape}." assert isinstance(self.candidate, DynamicRange) candidate_ratio = self.ratios[self.iter][self.candidate_id] if self.best_error is None: self.best_error = error if self.is_clamp_based(): self.best_range.min = self.candidate.min self.best_range.max = self.candidate.max self.best_range.ratio = torch.full( size=self.ratio_shape, fill_value=candidate_ratio, device=self.device, dtype=self.develop_dtype ) elif error.numel() > 1: pos = error < self.best_error self.best_error[pos] = error[pos] if self.is_clamp_based(): if self.error_shape.numel() != self.range_shape.numel(): pos = pos.view(self.pos_view_shape).expand(*self.range_shape) else: pos = pos.view(self.range_shape) self.best_range.max[pos] = self.candidate.max[pos] if isinstance(self.candidate.min, torch.Tensor): self.best_range.min[pos] = self.candidate.min[pos] self.best_range.ratio[pos.view(self.ratio_shape)] = candidate_ratio elif error < self.best_error: self.best_error = error if self.is_clamp_based(): self.best_range.min = self.candidate.min self.best_range.max = self.candidate.max self.best_range.ratio.fill_(candidate_ratio) if self.logger.level <= tools.logging.DEBUG: self.error_history.append( ( math.root_(error.to(torch.float64).sum(), self.config.degree).item(), math.root_(self.best_error.to(torch.float64).sum(), self.config.degree).item(), ) ) if self.is_last_candidate_in_iter(): stype_id = self.iter ratios, population_size = self.ratios[stype_id], self.population_size for i in range(0, population_size, 5): self.logger.debug( " - range ratio = [%s]", ", ".join(f"{ratios[j]:10.4f}" for j in range(i, min(i + 5, population_size))), ) self.logger.debug( " sum error = [%s]", ", ".join(f"{self.error_history[j][0]:10.4f}" for j in range(i, min(i + 5, population_size))), ) self.logger.debug( " best error = [%s]", ", ".join(f"{self.error_history[j][1]:10.4f}" for j in range(i, min(i + 5, population_size))), ) self.error_history.clear() if self.is_last_iter(): self.logger.debug( "+ error = [%.4f]", math.root_(self.best_error.to(torch.float64).sum(), self.config.degree).item(), ) def _preprocess_with_pre_scale(self, t: torch.Tensor) -> torch.Tensor: t = t.view(self.pre_view_shape) t = t.to(dtype=self.develop_dtype) if t.dtype != self.develop_dtype else t.clone() t = t.div_(self.pre_scale) if self.quantizer.range_bound is not None and self.quantizer.range_bound.is_set(): t = t.clamp_(min=self.quantizer.range_bound.min, max=self.quantizer.range_bound.max) return t def _process_wxy(self, tensor: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: shape, dtype = tensor.shape, tensor.dtype if self.pre_scale is not None: tensor = self._preprocess_with_pre_scale(tensor).view(shape) tensor = self.quantizer.quantize( tensor, kernel=None, channels_dim=channels_dim, dynamic_range=self.candidate, default_dtype=dtype, develop_dtype=self.develop_dtype, ).data if self.pre_scale is not None: tensor = tensor.view(self.pre_view_shape).mul_(self.pre_scale).to(dtype) tensor = tensor.view(shape) return tensor def _process_x_in_xw(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if self.tensor_type != TensorType.Inputs: return x return self._process_wxy(x, channels_dim) def _process_w_in_xw(self, w: torch.Tensor) -> torch.Tensor: if self.tensor_type != TensorType.Weights: return w return self._process_wxy(w, channels_dim=None) def _process_y_in_yx(self, y: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if self.tensor_type != TensorType.Outputs: return y return self._process_wxy(y, channels_dim) def _process_x_in_yx(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: raise RuntimeError("_process_x_in_yx should not be called in DynamicRangeCalibrator.") def _process_xw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("_process_xw_in_yx should not be called in DynamicRangeCalibrator.") def _process_yw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("_process_yw_in_yx should not be called in DynamicRangeCalibrator.") def calibrate_dynamic_range( tensor_type: TensorType, config: DynamicRangeCalibConfig | None, static: bool, quantizer: Quantizer, modules: tp.Sequence[nn.Module], activations: TensorsCache, weights: tp.Sequence[nn.Parameter] | None = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_weights: tp.Sequence[tuple[nn.Parameter, torch.Tensor]] | None = None, orig_activations: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, ) -> tp.Sequence[DynamicRange] | None: """Calibrate the dynamic range. Args: tensor_type (`TensorType`): The tensor type. config (`DynamicRangeCalibConfig`): The quantization dynamic range calibration configuration. static (`bool`): Whether the dynamic range is static. quantizer (`Quantizer`): The quantizer. modules (`Sequence[nn.Module]`): The modules to calibrate. activations (`TensorsCache`): The inputs cache if the tensor type is not outputs, or the outputs cache if the tensor type is outputs. weights (`Sequence[nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights to calibrate. If not provided, the weights of the modules will be used. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs for evaluation. If not provided, the `activations` cache will be used. eval_module (`nn.Module` or `None`, *optional*, defaults to `None`): The module to evaluate the quantization error. If not provided, the module to calibrate will be used. eval_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): The keyword arguments for evaluation. orig_weights (`Sequence[tuple[nn.Parameter, torch.Tensor]]` or `None`, *optional*, defaults to `None`): The original weights. orig_activations (`TensorsCache` or `None`, *optional*, defaults to `None`): The original activations. orig_eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original evaluation inputs. Returns: `Sequence[DynamicRange]` or `None`: The dynamic ranges of each quantization step. """ if config is None or not quantizer.is_enabled(): return None decomposed_config = quantizer.config.decompose() num_steps = decomposed_config.num_steps # region dynamic range without search if not config.needs_search and (not static or tensor_type == TensorType.Weights): if config.ratio != 1.0: dynamic_range = DynamicRange(ratio=config.ratio) return tuple([dynamic_range] + [None] * (num_steps - 1)) else: return None # endregion # region prepare for search if weights is None: weights = [module.weight for module in modules if hasattr(module, "weight")] if tensor_type == TensorType.Weights: assert len(modules) == 1, "only one module is supported for weight quantization calibration" assert len(weights) == 1, "only one weight is supported for weight quantization calibration" if eval_module is None: eval_module = modules[0] if eval_inputs is None: eval_inputs = activations else: assert eval_inputs is not None, "eval_inputs is required when eval_module is provided" else: assert activations is not None, "activations is required for activation quantization calibration" assert activations.num_tensors == 1, "only one tensor is supported for activation quantization calibration" if tensor_type != TensorType.Outputs: x_wgts, x_acts, x_mods, orig_x_wgts, orig_x_acts = weights, activations, modules, orig_weights, orig_activations y_wgts, y_acts, y_mods, orig_y_wgts, orig_y_acts = [], None, None, None, None else: x_wgts, x_acts, x_mods, orig_x_wgts, orig_x_acts = [], None, None, None, None y_wgts, y_acts, y_mods, orig_y_wgts, orig_y_acts = weights, activations, modules, orig_weights, orig_activations # endregion if num_steps == 1: dynamic_range = DynamicRangeCalibrator( tensor_type=tensor_type, config=config, static=static, quantizer=quantizer, ).calibrate( x_wgts=x_wgts, y_wgts=y_wgts, x_acts=x_acts, y_acts=y_acts, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, x_mods=x_mods, y_mods=y_mods, orig_x_wgts=orig_x_wgts, orig_y_wgts=orig_y_wgts, orig_x_acts=orig_x_acts, orig_y_acts=orig_y_acts, orig_eval_inputs=orig_eval_inputs, ) return (dynamic_range,) # region prepare for search with progressive quantization if tensor_type == TensorType.Weights: tensor = weights[0].detach().data else: assert activations.num_tensors == 1, "Only one tensor is supported for activation quantization" acts = activations.front() assert len(acts.data) == 0, "Only one tensor is supported for activation quantization" tensor = acts.data[0].detach().data if acts.channels_dim is not None: tensor = tensor.reshape(-1, *tensor.shape[acts.channels_dim :]) develop_dtype = quantizer.develop_dtype default_scale_dtype = quantizer.default_dtype or tensor.dtype develop_tensor = tensor.to(dtype=develop_dtype) if tensor.dtype != develop_dtype else tensor.clone() del tensor # endregion info = QuantInfo.construct( decomposed_config, tensor_shape=develop_tensor.shape, default_dtype=default_scale_dtype, quant_range=quantizer.quant_range, range_bound=quantizer.range_bound, ) dynamic_ranges = [] quant_scale = QuantScale() for step, step_info in enumerate(info.steps): step_quantizer = Quantizer( config=step_info.to_config(), kernel=quantizer.kernel if step == num_steps - 1 else None, quant_range=step_info.quant_range, range_bound=step_info.range_bound, default_dtype=quantizer.default_dtype, develop_dtype=quantizer.develop_dtype, ) step_dynamic_range = DynamicRangeCalibrator( tensor_type=tensor_type, config=config, static=static, quantizer=step_quantizer, pre_scale=quant_scale.data, ).calibrate( x_wgts=x_wgts, y_wgts=y_wgts, x_acts=x_acts, y_acts=y_acts, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, x_mods=x_mods, y_mods=y_mods, orig_x_wgts=orig_x_wgts, orig_y_wgts=orig_y_wgts, ) dynamic_ranges.append(step_dynamic_range) step_scale, _ = step_info.scale.quantize( tensor=develop_tensor.view(step_info.tensor_shape), dynamic_range=step_dynamic_range, ) quant_scale.append(step_scale) if num_steps > 2 and step < num_steps - 1: step_quant_range = step_info.tensor_quant_range develop_tensor = develop_tensor.view(step_info.tensor_view_shape).div_(step_scale.data) develop_tensor = develop_tensor.clamp_(min=step_quant_range.min, max=step_quant_range.max) return tuple(dynamic_ranges) ================================================ FILE: deepcompressor/calib/reorder.py ================================================ # -*- coding: utf-8 -*- """Channel reordering module.""" import gc import typing as tp from dataclasses import _MISSING_TYPE, MISSING, dataclass import torch import torch.nn as nn from ..data.cache import TensorsCache from ..data.common import TensorType from ..quantizer.processor import Quantizer from ..utils import math, tools from ..utils.hooks import BaseInputPackager, BaseOutputPackager, BaseTensorProcessor from .config import ( ChannelOrderCalibConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective, SearchBasedCalibStrategy, ) from .metric import ChannelMetric from .search import SearchBasedCalibrator __all__ = ["ChannelOrderCalibrator", "ChannelReorderer"] @dataclass class ChannelReorderer(BaseTensorProcessor): """Activation channel reordering processor.""" index: torch.Tensor channels_dim: int # region hook-related attributes input_packager: BaseInputPackager | None = None output_packager: BaseOutputPackager | None = None # endregion def is_enabled(self) -> bool: return self.index is not None def get_input_packager(self) -> BaseInputPackager | None: return self.input_packager def get_output_packager(self) -> BaseOutputPackager | None: return self.output_packager def process(self, tensor: torch.Tensor) -> torch.Tensor: """Process the tensor. Args: tensor (torch.Tensor): The tensor to process. Returns: torch.Tensor: The processed tensor. """ self.index = self.index.to(device=tensor.device) return tensor.index_select(dim=self.channels_dim, index=self.index) def get_channel_index_from_rank( rank: torch.Tensor, num_channels: int, num_groups: int, index_mode: ChannelOrderCalibConfig.ChannelIndex, ) -> torch.Tensor: """Get the index from the rank. Args: rank (`torch.Tensor`): The rank of the channels. num_channels (`int`): The number of channels. num_groups (`int`): The number of groups. index_mode (`ChannelOrderCalibConfig.ChannelIndex`): The index mode. Returns: `torch.Tensor`: The index of the channels, i.e., the order of the channels. """ if index_mode == ChannelOrderCalibConfig.ChannelIndex.Transpose: return rank.view(num_channels // num_groups, num_groups).t().reshape(-1) elif index_mode == ChannelOrderCalibConfig.ChannelIndex.Sequential: return rank else: raise ValueError(f"Unsupported index mode: {index_mode}") def get_channel_metric( inputs: TensorsCache, weights: tp.Sequence[torch.Tensor], metric_mode: ChannelOrderCalibConfig.ChannelMetric, num_channels: int, num_heads: int = 1, device: torch.device | str | None = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Get the metric value of the channels. Args: inputs (`TensorsCache`): The input activations. weights (`Sequence[torch.Tensor]`): The weight tensors. metric_mode (`ChannelOrderCalibConfig.ChannelMetric`): The channel metric mode. num_channels (`int`): The number of channels. num_heads (`int`, *optional*, defaults to `1`): The number of heads. device (`torch.device` or `str` or `None`, *optional*, defaults to `None`): The device of the metric value tensor. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The data type of the metric value tensor. Returns: `torch.Tensor`: The metric value of the channels. """ metric_name = metric_mode.name if metric_name.endswith("Product"): metric_name = metric_name[:-7] ipts_metric = get_channel_metric( inputs=inputs, weights=weights, metric_mode=ChannelOrderCalibConfig.ChannelMetric[f"Inputs{metric_name}"], num_channels=num_channels, num_heads=num_heads, device=device, dtype=dtype, ) wgts_metric = get_channel_metric( inputs=inputs, weights=weights, metric_mode=ChannelOrderCalibConfig.ChannelMetric[f"Weights{metric_name}"], num_channels=num_channels, num_heads=num_heads, device=device, dtype=dtype, ) return ipts_metric * wgts_metric else: if metric_name.startswith("Inputs"): assert inputs.num_tensors == 1, f"Only one input source is allowed, got {inputs.num_tensors}" metric_name, tensors = metric_name[6:], inputs.front().get_standardized_data(reshape=False) else: assert metric_name.startswith("Weights") metric_name, tensors = metric_name[7:], weights group_shape = [-1] * tensors[0].ndim group_shape[1] = num_channels // num_heads # convert metric name from camel case to snake case metric_name = "".join(["_" + c.lower() if c.isupper() else c for c in metric_name]) metric_name = metric_name.lstrip("_") metric_fn = getattr(ChannelMetric, metric_name) return metric_fn(tensors, num_channels, group_shape, device=device, dtype=dtype).view(num_channels) def update_channel_metric( metric: torch.Tensor | None, inputs: TensorsCache, weights: tp.Sequence[torch.Tensor], metric_mode: ChannelOrderCalibConfig.ChannelMetric, num_channels: int, num_heads: int = 1, device: torch.device | str = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Update the metric value of the channels. Args: metric (`torch.Tensor` or `None`): The metric value of the channels. inputs (`TensorsCache`): The input activations. weights (`Sequence[torch.Tensor]`): The weight tensors. metric_mode (`ChannelOrderCalibConfig.ChannelMetric`): The channel metric mode. num_channels (`int`): The number of channels. num_heads (`int`, *optional*, defaults to `1`): The number of heads. device (`torch.device` or `str`, *optional*, defaults to `None`): The device of the metric value tensor. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The data type of the metric value tensor. Returns: `torch.Tensor`: The updated metric value of the channels. """ _metric = get_channel_metric( inputs=inputs, weights=weights, metric_mode=metric_mode, num_channels=num_channels, num_heads=num_heads, device=device, dtype=dtype, ) if metric is None: return _metric elif "Max" in metric_mode.name: return torch.maximum(metric, _metric) else: return metric.add_(_metric) def init_channel_index_from_metric( metric: torch.Tensor, /, metric_mode: ChannelOrderCalibConfig.ChannelMetric, index_mode: ChannelOrderCalibConfig.ChannelIndex, group_size: int, num_heads: int = 1, num_head_repeats: int = 1, ) -> torch.Tensor: """Get the index of the channels. Args: metric (`torch.Tensor`): The metric value of the channels. metric_mode (`ChannelOrderCalibConfig.ChannelMetric`): The channel metric mode. index_mode (`ChannelOrderCalibConfig.ChannelIndex`): The index mode. group_size (`int`): The quantization group size. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. Returns: `torch.Tensor`: The index of the channels. """ num_channels = metric.numel() num_groups = num_channels // group_size if num_heads > 1: head_channels = num_channels // num_heads if num_head_repeats > 1: num_unique_heads = num_heads // num_head_repeats metric = metric.view(num_unique_heads, num_head_repeats, head_channels) metric = metric.amax(dim=1, keepdim=True) if "Max" in metric_mode.name else metric.sum(dim=1, keepdim=True) rank = metric.argsort(dim=-1).expand(num_unique_heads, num_head_repeats, -1).reshape(num_heads, -1) else: rank = metric.view(num_heads, head_channels).argsort(dim=-1) rank += torch.arange(0, num_channels, head_channels, dtype=torch.long, device=rank.device).view(num_heads, 1) index = torch.empty_like(rank) for head in range(num_heads): index[head] = get_channel_index_from_rank( rank[head], num_channels=head_channels, num_groups=max(num_groups // num_heads, 1), index_mode=index_mode, ) return index.view(-1) else: rank = metric.argsort() return get_channel_index_from_rank( rank, num_channels=num_channels, num_groups=num_groups, index_mode=index_mode ) class ChannelOrderCalibrator(SearchBasedCalibrator[ChannelOrderCalibConfig, torch.Tensor]): """The calibrator for quantization channel reordering.""" def __init__( self, config: ChannelOrderCalibConfig, weight_quantizer: Quantizer | None, input_quantizer: Quantizer | None, num_heads: int = 1, num_head_repeats: int = 1, develop_dtype: torch.dtype = torch.float32, ) -> None: """Initialize the calibrator. Args: config (`ChannelOrderCalibConfig`): The channel order calibration configuration. weight_quantizer (`Quantizer` or `None`): The quantizer for the weights. input_quantizer (`Quantizer` or `None`): The quantizer for the inputs. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type. """ super().__init__( tensor_type=TensorType.Weights, config=config, w_quantizer=weight_quantizer, x_quantizer=input_quantizer, y_quantizer=None, develop_dtype=develop_dtype, ) assert self.config.objective == SearchBasedCalibObjective.OutputsError assert self.config.granularity == SearchBasedCalibGranularity.Layer if self.config.strategy == SearchBasedCalibStrategy.Manual: self.index_modes = [self.config.channel_index] self.metric_modes = [self.config.channel_metric] else: self.metric_modes = list(ChannelOrderCalibConfig.ChannelMetric.__members__.values()) self.index_modes = list(ChannelOrderCalibConfig.ChannelIndex.__members__.values()) self.num_index_modes, self.num_metric_modes = len(self.index_modes), len(self.metric_modes) self.num_heads = num_heads self.num_head_repeats = num_head_repeats self.metrics, self.channel_indexes = None, None @property def population_size(self) -> int: """Get the population size.""" size = self.num_index_modes * self.num_metric_modes return (size + 1) if self.config.strategy != SearchBasedCalibStrategy.Manual else size @property def allows_x_quant_for_wgts(self) -> bool: """Whether the calibrator needs activation quantization when tensor_type is Weights.""" return self.config.allow_x_quant @property def allows_w_quant_for_wgts(self) -> bool: """Whether the calibrator needs weight quantization when tensor_type is Weights.""" return self.config.allow_w_quant def update_channel_metrics(self, weights: list[torch.Tensor | nn.Parameter], inputs: TensorsCache) -> None: """Update the metrics of the channels. Args: weights (list[torch.Tensor | nn.Parameter]): The weight tensors. inputs (TensorsCache): The input activations. """ weights = [w.data for w in weights] if self.metrics is None: self.num_channels = weights[0].shape[1] self.device = weights[0].device self.metrics = [None] * len(self.metric_modes) for metric_id, metric_mode in enumerate(self.metric_modes): self.metrics[metric_id] = update_channel_metric( metric=self.metrics[metric_id], inputs=inputs, weights=weights, metric_mode=metric_mode, num_channels=self.num_channels, num_heads=self.num_heads, device=self.device, dtype=self.develop_dtype, ) def init_channel_indexes(self) -> None: """Initialize the indexes.""" if self.needs_x_quant: ipts_group_size = self.x_quantizer.config.smallest_group_shape[1] else: ipts_group_size = -1 if ipts_group_size <= 0: ipts_group_size = self.num_channels if self.needs_w_quant: wgts_group_size = self.w_quantizer.config.smallest_group_shape[1] else: wgts_group_size = -1 if wgts_group_size <= 0: wgts_group_size = self.num_channels group_size = min(ipts_group_size, wgts_group_size) self.channel_indexes = [None] + [ init_channel_index_from_metric( metric, metric_mode=metric_mode, index_mode=index_mode, group_size=group_size, num_heads=self.num_heads, num_head_repeats=self.num_head_repeats, ) for metric_mode, metric in zip(self.metric_modes, self.metrics, strict=True) for index_mode in self.index_modes ] self.arange = torch.arange(self.num_channels, dtype=torch.long, device=self.device) self.metrics = None gc.collect() torch.cuda.empty_cache() def _reset(self, x_wgts: list[torch.Tensor | nn.Parameter], x_acts: TensorsCache, **kwargs) -> None: """Reset the calibrator. Args: x_wgts (list[list[torch.Tensor | nn.Parameter]]): Weight tensors. x_acts (TensorsCache): Input activations. """ if self.channel_indexes is None: self.update_channel_metrics(x_wgts, x_acts) self.init_channel_indexes() if self.config.strategy == SearchBasedCalibStrategy.Manual and self.channel_indexes[0] is None: self.channel_indexes = self.channel_indexes[1:] assert len(self.channel_indexes) == self.population_size self.baseline_errors, self.best_error, self.best_candidate_id = None, None, None self.error_stats_history = [] def get_best(self) -> torch.Tensor: """Get the best candidate. Returns: torch.Tensor: The best candidate. """ return self.channel_indexes[self.best_candidate_id] def _ask(self) -> torch.Tensor: """Ask for the next candidate. Returns: torch.Tensor: The next candidate. """ channel_index = self.channel_indexes[self.candidate_id] channel_index_inverse = None if channel_index is not None: channel_index_inverse = torch.zeros_like(channel_index) channel_index_inverse[channel_index] = self.arange.to(device=channel_index.device) self.candidate_inverse = channel_index_inverse return channel_index def _tell(self, errors: list[tuple[torch.Tensor, ...]]) -> None: # noqa: C901 """Tell the error of the last candidate and update the best candidate. Args: errors (list[tuple[torch.Tensor, ...]]): The error of the last candidate. """ errors = [tuple(math.root_(e.to(torch.float64), self.config.degree) for e in error) for error in errors] if self.baseline_errors is None: self.baseline_errors = errors error_stats = [0, 0, 0, 0, 0] for baseline_error, error in zip(self.baseline_errors, errors, strict=True): for be, e in zip(baseline_error, error, strict=True): _d = e.item() - be.item() if e > be: error_stats[0] += 1 if e < be: error_stats[1] -= 1 error_stats[2] += max(_d, 0) error_stats[3] += min(_d, 0) error_stats[4] += e.item() if self.best_error is None or error_stats < self.best_error: self.best_error = error_stats self.best_candidate_id = self.candidate_id if self.logger.level <= tools.logging.DEBUG: self.logger.debug( f"+ {self._get_metric_index_mode_str(self.candidate_id)} : {self._get_error_str(error_stats)}" ) if self.is_last_candidate_in_iter(): self.logger.debug(f"+ {self._get_metric_index_mode_str(self.best_candidate_id)} is the best candidate.") def _get_error_str(self, e: list[int | float]) -> str: return f"[{e[0]:+d}, {e[1]:+d}, {e[2]:>10.4f}, {e[3]:>10.4f}, {e[4]:>10.4f}]" def _get_metric_index_mode_str(self, candidate_id: int) -> str: if candidate_id == 0: if self.config.strategy == SearchBasedCalibStrategy.Manual: metric_mode, index_mode = self.metric_modes[0], self.index_modes[0] else: return f"{'baseline':>20} {'':>10}" else: metric_id = (candidate_id - 1) % self.num_metric_modes index_id = (candidate_id - 1) // self.num_metric_modes metric_mode, index_mode = self.metric_modes[metric_id], self.index_modes[index_id] return f"{metric_mode.name:>20} - {index_mode.name:>10}" def _process_x_in_xw(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if not self.needs_x_quant_for_wgts: return x if channels_dim is MISSING: channels_dim = self.x_quantizer.channels_dim if self.candidate is not None: x = x.index_select(dim=channels_dim, index=self.candidate.to(x.device)) x = self.x_quantizer.quantize(x, channels_dim=channels_dim).data if self.candidate is not None: x = x.index_select(dim=channels_dim, index=self.candidate_inverse.to(x.device)) return x def _process_w_in_xw(self, w: torch.Tensor) -> torch.Tensor: if not self.needs_w_quant_for_wgts: return w if self.candidate is not None: w = w.index_select(dim=1, index=self.candidate.to(w.device)) w = self.w_quantizer.quantize(w.data, kernel=None, develop_dtype=self.develop_dtype).data if self.candidate is not None: w = w.index_select(dim=1, index=self.candidate_inverse.to(w.device)) return w def _process_x_in_yx(self, x: torch.Tensor, channels_dim: int) -> torch.Tensor: raise RuntimeError("_process_x_in_yx should not be called in ChannelOrderCalibrator.") def _process_y_in_yx(self, x: torch.Tensor, channels_dim: int) -> torch.Tensor: raise RuntimeError("_process_y_in_yx should not be called in ChannelOrderCalibrator.") def _process_xw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("_process_xw_in_yx should not be called in ChannelOrderCalibrator.") def _process_yw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("_process_yw_in_yx should not be called in ChannelOrderCalibrator.") def _process_wgts_centric_mod( self, wgts: list[nn.Parameter], mods: list[nn.Module], *, reorder_wgts: list[tuple[nn.Parameter, int]], reorder_ipt_mods: list[tuple[nn.Module, int, BaseInputPackager | None]], reorder_opt_mods: list[tuple[nn.Module, int, BaseOutputPackager | None]], update_state_dict: bool = True, **kwargs, ) -> None: channels_index = self.candidate if update_state_dict: self._state_dict.extend([(w, w.data) for w, _ in reorder_wgts]) if channels_index is not None: for w, d in reorder_wgts: w.data = w.data.index_select(dim=d, index=channels_index.to(w.device)) for m, channels_dim, packager in reorder_ipt_mods: self._hooks.append( ChannelReorderer(channels_index, channels_dim, input_packager=packager).as_hook().register(m) ) for m, channels_dim, packager in reorder_opt_mods: self._hooks.append( ChannelReorderer(channels_index, channels_dim, output_packager=packager) .as_hook(is_output=True) .register(m) ) self._candidate_backup = channels_index self.candidate = None # we have already reordered and thus do not need to reorder again in _process super()._process_wgts_centric_mod(wgts, mods, update_state_dict=False) def _recover_mod(self) -> None: super()._recover_mod() self.candidate = self._candidate_backup self._candidate_backup = None ================================================ FILE: deepcompressor/calib/rotate.py ================================================ # -*- coding: utf-8 -*- """Rotation Quantization module.""" import typing as tp import torch import torch.nn as nn from ..utils.hooks import BaseInputPackager, IOHook from ..utils.math import HadamardMatrix, hardmard_transform, random_hadamard_matrix __all__ = [ "rotate_in_channels", "rotate_out_channels", "hadamard_in_channels", "get_rotation_matrix", "transform_rms_norm_and_linear", "transform_layer_norm_to_rms_norm", "transform_norm_and_linear", ] # Copied from transformers.models.llama.modeling_llama.LlamaRMSNorm class RMSNorm(nn.Module): """Root Mean Square Layer Normalization (RMSNorm).""" def __init__(self, hidden_size: int, eps=1e-6) -> None: """Initialize RMSNorm.""" super().__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.variance_epsilon = eps def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: """Apply RMSNorm normalization to hidden states.""" input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32) variance = hidden_states.pow(2).mean(-1, keepdim=True) hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) return self.weight * hidden_states.to(input_dtype) class HadamardTransformHook(IOHook): def __init__( self, rhs: torch.Tensor, lhs: torch.Tensor, lhs_k: int, scaled: bool = True, packager: BaseInputPackager = None ): super().__init__(pre=True, post=False, input_packager=packager, output_packager=None) self.rhs = rhs self.lhs = lhs self.lhs_k = lhs_k self.scaled = scaled def pre_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], ) -> tuple[tuple[torch.Tensor, ...], dict[str, tp.Any]]: tensors = self.input_packager.unpack(module, input_args, input_kwargs) for k, x in tensors.items(): tensors[k] = hardmard_transform( x, hadamard_rhs=self.rhs, hadamard_lhs=self.lhs, lhs_k=self.lhs_k, scaled=self.scaled ) return self.input_packager.repack(tensors, module, input_args, input_kwargs) def rotate_in_channels(weight: nn.Parameter, /, *, rotation: torch.Tensor) -> None: """Rotate the input channels of a weight matrix.""" shape, dtype = weight.shape, weight.dtype weight.data = ( torch.matmul(weight.data.view(-1, rotation.shape[0]).to(dtype=torch.float64), rotation.to(weight.device)) .to(dtype=dtype) .view(shape) ) def rotate_out_channels(weight: nn.Parameter, /, *, rotation: torch.Tensor, bias: nn.Parameter | None = None) -> None: """Rotate the output channels of a weight matrix.""" shape, dtype = weight.shape, weight.dtype out_channels, head_channels = shape[0], rotation.shape[0] num_heads = out_channels // head_channels weight.data = ( torch.matmul( rotation.T.to(weight.device), weight.data.view(num_heads, head_channels, -1).to(dtype=torch.float64) ) .to(dtype=dtype) .view(shape) ) if bias is not None: bias.data = ( torch.matmul( rotation.T.to(weight.device), bias.data.view(num_heads, head_channels, -1).to(dtype=torch.float64) ) .to(dtype=dtype) .view(-1) ) def hadamard_in_channels( modules: tp.Iterable[nn.Module], packager: BaseInputPackager = None, dtype: torch.dtype | None = None, device: torch.device | str | None = None, ): """Apply Hadamard quantization to the input channels of the modules.""" for module in modules: if isinstance(module, nn.Linear): in_channels = module.in_features device, dtype = device or module.weight.device, dtype or module.weight.dtype rhs_double, lhs_double, k = HadamardMatrix.get(in_channels, scale=True, dtype=torch.float64) module.weight.data = hardmard_transform( module.weight.data.to(torch.float64), rhs_double.to(device), lhs_double.to(device), k, scaled=True ).to(device=device, dtype=module.weight.dtype) del rhs_double, lhs_double, k rhs, lhs, k = HadamardMatrix.get(in_channels, scale=True, dtype=dtype, device=device) HadamardTransformHook(rhs=rhs, lhs=lhs, lhs_k=k, packager=packager).register(module) else: raise NotImplementedError(f"Module {module} not supported!") def get_rotation_matrix(num_channels: int, random: bool = True, compatible: bool = True) -> torch.Tensor: """Get a random rotation matrix for the given number of channels.""" if random: return random_hadamard_matrix(num_channels) else: rhs, lhs, k = HadamardMatrix.get(num_channels, scale=False) rhs = rhs.to(dtype=torch.float64) if k == 1: rotation = rhs elif compatible: # this is compatible with hadamard_transform rotation = torch.kron(lhs.T.contiguous().to(dtype=torch.float64), rhs) else: rotation = torch.kron(rhs, lhs.to(dtype=torch.float64)) return rotation.mul_(1.0 / torch.tensor(num_channels, dtype=torch.float64).sqrt()) def transform_rms_norm_and_linear(norm: nn.LayerNorm | RMSNorm, next_modules: tp.Iterable[nn.Linear]) -> None: """Fuse the weight multiplication of rms norm into the next adjacent linear modules. Args: norm (`nn.LayerNorm` or `RMSNorm`): normalization module. next_modules (`Iterable[nn.Linear]`): modules after the normalization module. """ ln_w = norm.weight.data.to(dtype=torch.float64) norm.weight.data = torch.ones_like(norm.weight.data) if hasattr(norm, "bias") and norm.bias is not None: ln_b = norm.bias.data.to(dtype=torch.float64) norm.bias = None else: ln_b = None for linear in next_modules: assert isinstance(linear, nn.Linear) dtype = linear.weight.dtype fc_w = linear.weight.data.to(dtype=torch.float64) ln_w = ln_w.to(fc_w.device) linear.weight.data = (fc_w * ln_w).to(dtype=dtype) if ln_b is not None: ln_b = ln_b.to(fc_w.device) if linear.bias is None: linear.bias = nn.Parameter(torch.zeros(linear.out_features, dtype=dtype, device=linear.weight.device)) linear.bias.data = (linear.bias.data.to(dtype=torch.float64) + torch.matmul(fc_w, ln_b)).to(dtype=dtype) def transform_layer_norm_to_rms_norm( parent: nn.Module, norm_name: str, prev_modules: tp.Iterable[nn.Linear], prev_out_channels_dims: int | tp.Iterable[int] = 0, ) -> None: """Transform LayerNorm to RMSNorm. Args: parent (`nn.Module`): Parent module that contains the normalization module. norm_name (`str`): Name of the normalization module in `parent`. prev_modules (`Iterable[nn.Linear]`): Previous adjacent linear modules. prev_out_channels_dims (`int` or `Iterable[int]`, *optional*, defaults to `0`): Output channels dimension of the previous modules' weights. """ if "." in norm_name: norm_names = norm_name.split(".") for name in norm_names[:-1]: parent = getattr(parent, name) norm_name = norm_names[-1] del norm_names norm = getattr(parent, norm_name) assert isinstance(norm, nn.LayerNorm) assert len(norm.normalized_shape) == 1, f"LayerNorm's #dims must be 1, got {len(norm.normalized_shape)}" assert norm.bias is None, "LayerNorm's bias must be None, please call `transform_rms_norm_and_linear` in advance" # region move substract mean to the previous linear modules assert len(prev_modules) > 0, "No previous modules found" if isinstance(prev_out_channels_dims, int): prev_out_channels_dims = [prev_out_channels_dims] * len(prev_modules) for module, dim in zip(prev_modules, prev_out_channels_dims, strict=True): if isinstance(module, nn.LayerNorm): module.bias = None else: if isinstance(module, nn.Linear): assert dim == 0, "Linear module's output channels dimension is 0" elif isinstance(module, nn.Embedding): assert dim == 1, "Embedding module's output channels dimension is 1" dtype = module.weight.dtype w = module.weight.data.to(dtype=torch.float64) module.weight.data = w.sub_(w.mean(dim=dim, keepdim=True)).to(dtype=dtype) if hasattr(module, "bias") and module.bias is not None: b = module.bias.data.to(dtype=torch.float64) module.bias.data = b.sub_(b.mean()).to(dtype=dtype) # endregion # region replace LayerNorm with RMSNorm rms = RMSNorm(hidden_size=norm.normalized_shape[0], eps=norm.eps) rms.weight.data = norm.weight.data setattr(parent, norm_name, rms) # endregion def transform_norm_and_linear( parent: nn.Module, norm_name: str, next_modules: tp.Iterable[nn.Linear], prev_modules: tp.Iterable[nn.Linear] | None = None, prev_out_channels_dims: int | tp.Iterable[int] = 0, ): """Transform the normalization module and the next adjacent linear modules. Args: parent (nn.Module): Parent module. norm_name (str): Name of the normalization module. next_modules (tp.Iterable[nn.Linear]): Next adjacent linear modules. prev_modules (tp.Iterable[nn.Linear]): Previous adjacent linear modules. prev_out_channels_dims (int | tp.Iterable[int], optional): Output channels dimension of the previous modules. Defaults to ``0``. """ if "." in norm_name: norm_names = norm_name.split(".") for name in norm_names[:-1]: parent = getattr(parent, name) norm_name = norm_names[-1] del norm_names norm = getattr(parent, norm_name) transform_rms_norm_and_linear(norm, next_modules) if isinstance(norm, nn.LayerNorm): transform_layer_norm_to_rms_norm(parent, norm_name, prev_modules, prev_out_channels_dims) ================================================ FILE: deepcompressor/calib/search.py ================================================ # -*- coding: utf-8 -*- """Search-based uantization calibrator module.""" import gc import typing as tp from abc import ABC, abstractmethod from dataclasses import _MISSING_TYPE, MISSING import psutil import torch import torch.nn as nn import torch.utils.hooks from ..data.cache import TensorCache, TensorsCache from ..data.common import TensorType from ..data.utils.reshape import ReshapeFn from ..data.utils.shape import infer_view_shape from ..quantizer.processor import Quantizer from ..utils import tools from ..utils.hooks import Hook from .config import SearchBasedCalibConfig, SearchBasedCalibGranularity, SearchBasedCalibObjective __all__ = ["SearchBasedCalibrator"] def _reshape_w_for_wgts(w: torch.Tensor, w_view_shape: torch.Size) -> torch.Tensor: # (#g0, gs0, #g1, gs1, ...) w = w.view(w_view_shape) # (#g0, gs0, #g1, gs1, ...) -> (#g0, ..., gs1, ..., gs0) w = w.permute(*range(0, len(w_view_shape), 2), *range(3, len(w_view_shape), 2), 1) # (#g0, ..., gs0, gs1, ...) -> (#g0, ..., gs1 * gs2 * ..., gs0) return w.reshape(*w_view_shape[::2], -1, w_view_shape[1]) def _reshape_x_for_wgts(x: torch.Tensor, w_view_shape: torch.Size) -> torch.Tensor: # x is unfolded already num_samples = x.shape[0] # (1, n, #g1, gs1, ...) x = x.view(1, num_samples, *w_view_shape[2:]) # (1, n, #g1, gs1, ...) -> (1, #g1, ..., n, gs1, ...) x = x.permute(*range(0, len(w_view_shape), 2), *range(1, len(w_view_shape), 2)) return x.reshape(1, *w_view_shape[2::2], num_samples, -1) def _reshape_x_for_ipts(x: torch.Tensor, x_view_shape: torch.Size) -> torch.Tensor: # x is original tensor without unfolding # (#g0, gs0, #g1, gs1, ...) x = x.view(x_view_shape) # (#g0, gs0, #g1, gs1, ...) -> (#g0, #g1, ..., gs0, gs2, ..., gs1) x = x.permute(*range(0, len(x_view_shape), 2), 1, *range(5, len(x_view_shape), 2), 3) # (#g0, #g1, ..., gs0, gs2, ..., gs1) -> (#g0, #g1, ..., gs0 * gs2 * ..., gs1) return x.reshape(*x_view_shape[::2], -1, x_view_shape[3]) def _reshape_w_for_ipts(w: torch.Tensor, x_view_shape: torch.Size) -> torch.Tensor: return w.transpose(0, 1).reshape(1, x_view_shape[2], *([1] * (w.ndim - 2)), x_view_shape[3], -1) _CANDIDATE = tp.TypeVar("_CANDIDATE") _CONFIG = tp.TypeVar("_CONFIG", bound=SearchBasedCalibConfig) class SearchBasedCalibrator(ABC, tp.Generic[_CONFIG, _CANDIDATE]): """The base class for search-based calibration.""" config: _CONFIG candidate: _CANDIDATE def __init__( self, tensor_type: TensorType, config: _CONFIG, w_quantizer: Quantizer | None, x_quantizer: Quantizer | None, y_quantizer: Quantizer | None, develop_dtype: torch.dtype, ) -> None: """Initialize the search-based calibrator. Args: tensor_type (`TensorType`): The tensor type. config (`_CONFIG`): The calibration configuration. w_quantizer (`Quantizer` or `None`): The w quantizer for x-w computation. x_quantizer (`Quantizer` or `None`): The x quantizer for x-w or y-x computation. y_quantizer (`Quantizer` or `None`): The y quantizer for y-x computation. develop_dtype (`torch.dtype`): The development data type. """ self.tensor_type = tensor_type self.config = config self.objective = self.config.objective self.granularity = self.config.granularity self.opts_device = None self.develop_dtype = develop_dtype self.w_quantizer = w_quantizer self.x_quantizer = x_quantizer self.y_quantizer = y_quantizer self.needs_w_quant = self.w_quantizer is not None and self.w_quantizer.is_enabled() self.needs_x_quant = self.x_quantizer is not None and self.x_quantizer.is_enabled() self.needs_y_quant = self.y_quantizer is not None and self.y_quantizer.is_enabled() self.needs_x_quant_for_wgts = self.allows_x_quant_for_wgts and self.needs_x_quant self.needs_w_quant_for_wgts = self.allows_w_quant_for_wgts and self.needs_w_quant self.needs_x_quant_for_ipts = self.allows_x_quant_for_ipts and self.needs_x_quant self.needs_w_quant_for_ipts = self.allows_w_quant_for_ipts and self.needs_w_quant self.needs_x_quant_for_opts = self.allows_x_quant_for_opts and self.needs_x_quant self.needs_y_quant_for_opts = self.allows_y_quant_for_opts and self.needs_y_quant self.needs_w_quant_for_opts = self.allows_w_quant_for_opts and self.needs_w_quant if self.tensor_type == TensorType.Weights: self.quantizer = self.w_quantizer self.needs_quant = self.needs_w_quant elif self.tensor_type == TensorType.Inputs: self.quantizer = self.x_quantizer self.needs_quant = self.needs_x_quant elif self.tensor_type == TensorType.Outputs: self.quantizer = self.y_quantizer self.needs_quant = self.needs_y_quant else: raise ValueError(f"unknown tensor type: {self.tensor_type}") self.num_iters = getattr(self.config, "num_iters", 1) self.logger = tools.logging.getLogger(f"{__name__}.{self.__class__.__name__.replace('Agent', '')}") @property @abstractmethod def population_size(self) -> int: """Get the population size.""" ... @property def allows_x_quant_for_wgts(self) -> bool: """Whether the calibrator allows input quantization when tensor_type is Weights.""" return False @property def allows_w_quant_for_wgts(self) -> bool: """Whether the calibrator allows weight quantization when tensor_type is Weights.""" return True @property def allows_x_quant_for_ipts(self) -> bool: """Whether the calibrator allows input quantization when tensor_type is Inputs.""" return True @property def allows_w_quant_for_ipts(self) -> bool: """Whether the calibrator allows weight quantization when tensor_type is Inputs.""" return False @property def allows_x_quant_for_opts(self) -> bool: """Whether the calibrator allows x quantization when tensor_type is Outputs.""" return True @property def allows_y_quant_for_opts(self) -> bool: """Whether the calibrator allows y quantization when tensor_type is Outputs.""" return True @property def allows_w_quant_for_opts(self) -> bool: """Whether the calibrator allows weight quantization when tensor_type is Outputs.""" return False @property def needs_to_pre_reshape_x_for_wgts(self) -> bool: """Whether the calibrator needs to pre-reshape the inputs for weight quantization calibration.""" return not self.needs_x_quant_for_wgts and self.config.pre_reshape @property def needs_to_pre_reshape_w_for_ipts(self) -> bool: """Whether the calibrator needs to pre-reshape the weights for input quantization calibration.""" return not self.needs_w_quant_for_ipts and self.config.pre_reshape def _reset(self, **kwargs) -> None: pass def reset(self, **kwargs) -> None: """Reset the calibrator.""" self.iter = 0 self.candidate_id = 0 self._reset(**kwargs) self._state_dict: list[tuple[nn.Parameter, torch.Tensor]] = [] self._hooks: list[Hook | torch.utils.hooks.RemovableHandle] = [] def is_done(self) -> bool: """Check if the calibration is done.""" return self.iter >= self.num_iters def is_last_iter(self) -> bool: """Check if the current iteration is the last one.""" return self.iter == self.num_iters - 1 def is_last_candidate_in_iter(self) -> bool: """Check if the current candidate is the last one in the current iteration.""" return self.candidate_id == self.population_size - 1 @abstractmethod def get_best(self) -> _CANDIDATE: """Get the best candidate. Returns: `_CANDIDATE`: The best candidate. """ ... @abstractmethod def _ask(self) -> _CANDIDATE: """Ask for the next candidate. Returns: `_CANDIDATE`: The next candidate. """ ... @abstractmethod def _tell(self, error: list[torch.Tensor]) -> None: """Tell the error of the last candidate and update the best candidate. Args: error (`list[torch.Tensor]`): The error of the last candidate. """ ... def ask(self) -> _CANDIDATE: """Ask for the next candidate. Returns: `_CANDIDATE`: The next candidate. """ self.candidate = self._ask() return self.candidate def tell(self, error: list[torch.Tensor]) -> None: """Tell the error of the last candidate and update the best candidate. Args: error (`list[torch.Tensor]`): The error of the last candidate. """ self._tell(error) self.candidate_id += 1 if self.candidate_id >= self.population_size: self.iter += 1 self.candidate_id = 0 def _parse_ipts(self, ipts: TensorsCache | None, set_device: bool = False) -> TensorsCache | None: if set_device: self.opts_device = None elif ipts is None: return None if self.objective == SearchBasedCalibObjective.ProductsError: batch_size = self.config.element_batch_size calib_size = self.config.element_size elif self.objective == SearchBasedCalibObjective.OutputsError: batch_size = self.config.sample_batch_size calib_size = self.config.sample_size else: assert self.objective == SearchBasedCalibObjective.TensorError batch_size = -1 calib_size = -1 prev_size = len(ipts.front().data) parsed_ipts = TensorsCache( { key: ipt.repartition( max_batch_size=batch_size, max_size=calib_size, standardize=self.objective == SearchBasedCalibObjective.ProductsError, reshape=self.tensor_type == TensorType.Weights, ) for key, ipt in ipts.items() } ) curr_size = len(parsed_ipts.front().data) assert all(len(ipt.data) == curr_size for ipt in parsed_ipts.values()) if set_device and prev_size != curr_size: self.opts_device = self.config.outputs_device return parsed_ipts def _parse_args( # noqa: C901 self, x_wgts: list[nn.Parameter] | None, y_wgts: list[nn.Parameter] | None, x_acts: TensorsCache | None, y_acts: TensorsCache | None, eval_inputs: TensorsCache | None, eval_module: nn.Module | None, x_mods: list[nn.Module] | None, y_mods: list[nn.Module] | None, orig_x_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None, orig_y_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None, orig_x_acts: TensorsCache | None, orig_y_acts: TensorsCache | None, orig_eval_inputs: TensorsCache | None, ) -> tuple[ list[torch.Tensor | nn.Parameter] | None, # x_wgts list[torch.Tensor | nn.Parameter] | None, # y_wgts TensorsCache | None, # x_acts TensorsCache | None, # y_acts TensorsCache | None, # eval_inputs nn.Module | None, # eval_module list[nn.Module] | None, # x_mods list[nn.Module] | None, # y_mods list[tuple[nn.Parameter, torch.Tensor]] | None, # orig_x_wgts list[tuple[nn.Parameter, torch.Tensor]] | None, # orig_y_wgts TensorCache | None, # orig_x_acts TensorCache | None, # orig_y_acts TensorCache | None, # orig_eval_inputs ]: # region Check the types of the arguments if x_wgts is not None: assert isinstance(x_wgts, (tuple, list)), "x_wgts should be a list" assert all(isinstance(w, nn.Parameter) for w in x_wgts), "wgts should be a list of nn.Parameter" if y_wgts is not None: assert isinstance(y_wgts, (tuple, list)), "y_wgts should be a list" assert all(isinstance(w, nn.Parameter) for w in y_wgts), "wgts should be a list of nn.Parameter" if x_acts is not None: assert isinstance(x_acts, TensorsCache), "x_acts should be a TensorsCache" if y_acts is not None: assert isinstance(y_acts, TensorsCache), "y_acts should be a TensorsCache" if eval_inputs is not None: assert isinstance(eval_inputs, TensorsCache), "eval_inputs should be a TensorsCache" if x_mods is not None: assert isinstance(x_mods, (tuple, list)), "x_mods should be a list" if y_mods is not None: assert isinstance(y_mods, (tuple, list)), "y_mods should be a list" if orig_x_wgts is not None: assert isinstance(orig_x_wgts, (tuple, list)), "orig_x_wgts should be a list" assert all(isinstance(p, nn.Parameter) and isinstance(w, torch.Tensor) for p, w in orig_x_wgts), ( "orig_x_wgts should be a list of tuples of nn.Parameter and torch.Tensor" ) if x_wgts is not None: assert len(orig_x_wgts) >= len(x_wgts), "orig_wgts should have at least as mtp.Any elements as wgts" assert all(p is w for (p, _), w in zip(orig_x_wgts, x_wgts, strict=False)), ( "the parameters in orig_wgts should be in wgts in the same order" ) if orig_y_wgts is not None: assert isinstance(orig_y_wgts, (tuple, list)), "orig_y_wgts should be a list" assert all(isinstance(p, nn.Parameter) and isinstance(w, torch.Tensor) for p, w in orig_y_wgts), ( "orig_y_wgts should be a list of tuples of nn.Parameter and torch.Tensor" ) if y_wgts is not None: assert len(orig_y_wgts) >= len(y_wgts), "orig_wgts should have at least as mtp.Any elements as wgts" assert all(p is w for (p, _), w in zip(orig_y_wgts, y_wgts, strict=False)), ( "the parameters in orig_wgts should be in wgts in the same order" ) if orig_x_acts is not None: assert isinstance(orig_x_acts, TensorsCache), "orig_x_acts should be a TensorsCache" if orig_y_acts is not None: assert isinstance(orig_y_acts, TensorsCache), "orig_y_acts should be a TensorsCache" if orig_eval_inputs is not None: assert isinstance(orig_eval_inputs, TensorsCache), "orig_eval_inputs should be a TensorsCache" # endregion self.objective = self.config.objective self.granularity = self.config.granularity if self.tensor_type == TensorType.Outputs: # ! currently only support OutputsError and Layer granularity for Outputs self.objective = SearchBasedCalibObjective.OutputsError self.granularity = SearchBasedCalibGranularity.Layer if self.objective == SearchBasedCalibObjective.TensorError: if x_wgts is not None: x_wgts = [w.detach().data for w in x_wgts] if y_wgts is not None: y_wgts = [w.detach().data for w in y_wgts] if self.tensor_type == TensorType.Weights: assert x_wgts is not None, "wgts should not be None when tensor_type is Weights" elif self.tensor_type == TensorType.Inputs: assert x_acts is not None, "mod_ipts should not be None when tensor_type is Inputs" eval_inputs, orig_eval_inputs = x_acts, orig_x_acts else: # self.tensor_type == TensorType.Outputs assert y_acts is not None, "opts should not be None when tensor_type is Outputs" eval_inputs, orig_eval_inputs = y_acts, orig_y_acts eval_module = None elif self.objective == SearchBasedCalibObjective.ProductsError: assert self.tensor_type in ( TensorType.Weights, TensorType.Inputs, ), "tensor_type should be Weights or Inputs when objective is ProductsError" assert x_wgts is not None, "wgts should not be None when objective is ProductsError" x_wgts = [w.detach().data for w in x_wgts] if y_wgts is not None: y_wgts = [w.detach().data for w in y_wgts] x_acts = x_acts or eval_inputs orig_x_acts = orig_x_acts or orig_eval_inputs assert x_acts is not None, "x_acts should not be None when objective is ProductsError" eval_inputs, orig_eval_inputs = x_acts, orig_x_acts elif self.objective == SearchBasedCalibObjective.OutputsError: assert eval_inputs is not None, "eval_inputs should not be None when objective is OutputsError" assert eval_module is not None, "eval_module should not be None when OutputsError" if ( isinstance(eval_module, (nn.Linear, nn.Conv2d)) and self.granularity.value < SearchBasedCalibGranularity.Layer.value and self.tensor_type != TensorType.Outputs ): self.objective = SearchBasedCalibObjective.ProductsError x_wgts = [w.detach().data for w in x_wgts] if y_wgts is not None: y_wgts = [w.detach().data for w in y_wgts] x_acts = x_acts or eval_inputs orig_x_acts = orig_x_acts or orig_eval_inputs assert x_acts is not None, "x_acts should not be None when objective is ProductsError" eval_inputs, orig_eval_inputs = x_acts, orig_x_acts else: self.objective = SearchBasedCalibObjective.OutputsError self.granularity = SearchBasedCalibGranularity.Layer else: raise ValueError(f"unknown objective: {self.objective}") self.logger.debug( f"+ tensor_type: {self.tensor_type}, objective: {self.objective}, granularity: {self.granularity}" ) return ( x_wgts, y_wgts, x_acts, y_acts, self._parse_ipts(eval_inputs, set_device=True), eval_module, x_mods, y_mods, orig_x_wgts, orig_y_wgts, orig_x_acts, orig_y_acts, self._parse_ipts(orig_eval_inputs), ) # region Reshape functions for computing products def _reshape_w_for_wgts_centric_partial_products(self, w: torch.Tensor, *, view_shape: torch.Size) -> torch.Tensor: return _reshape_w_for_wgts(w, view_shape) def _reshape_x_for_wgts_centric_partial_products( self, x: torch.Tensor, *, view_shape: torch.Size, fn: ReshapeFn ) -> torch.Tensor: return _reshape_x_for_wgts(fn(x), view_shape) def _reshape_w_for_ipts_centric_partial_products(self, w: torch.Tensor, *, view_shape: torch.Size) -> torch.Tensor: return _reshape_w_for_ipts(w, view_shape) def _reshape_x_for_ipts_centric_partial_products( self, x: torch.Tensor, *, view_shape: torch.Size, fn: ReshapeFn = None ) -> torch.Tensor: return _reshape_x_for_ipts(x, view_shape) def _reshape_w_for_full_products(self, w: torch.Tensor, *, view_shape: torch.Size = None) -> torch.Tensor: return w.view(w.shape[0], -1).T def _reshape_x_for_full_products( self, x: torch.Tensor, *, fn: ReshapeFn, view_shape: torch.Size = None ) -> torch.Tensor: return fn(x).view(x.shape[0], -1) # endregion @abstractmethod def _process_x_in_xw(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: ... @abstractmethod def _process_w_in_xw(self, w: torch.Tensor) -> torch.Tensor: ... @abstractmethod def _process_y_in_yx(self, y: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: ... @abstractmethod def _process_x_in_yx(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: ... @abstractmethod def _process_xw_in_yx(self, w: torch.Tensor) -> torch.Tensor: ... @abstractmethod def _process_yw_in_yx(self, w: torch.Tensor) -> torch.Tensor: ... def _recover_mod(self) -> None: for p, w in self._state_dict: p.data = w self._state_dict.clear() for hook in self._hooks: hook.remove() self._hooks.clear() def _process_wgts_centric_mod( self, wgts: list[nn.Parameter], mods: list[nn.Module], update_state_dict: bool = True, **kwargs ) -> None: if self.needs_w_quant_for_wgts: for w in wgts: if update_state_dict: self._state_dict.append((w, w.data)) w.data = self._process_w_in_xw(w.data) if self.needs_x_quant_for_wgts: self._hooks.append(self.x_quantizer.as_hook(func=self._process_x_in_xw, is_output=False).register(mods)) def _process_ipts_centric_mod( self, wgts: list[nn.Parameter], mods: list[nn.Module], update_state_dict: bool = True, **kwargs ) -> None: if self.needs_w_quant_for_ipts: for w in wgts: if update_state_dict: self._state_dict.append((w, w.data)) w.data = self._process_w_in_xw(w.data) if self.needs_x_quant_for_ipts: self._hooks.append(self.x_quantizer.as_hook(self._process_x_in_xw, is_output=False).register(mods)) def _process_opts_centric_mod( self, x_wgts: list[nn.Parameter], y_wgts: list[nn.Parameter], x_mods: list[nn.Module], y_mods: list[nn.Module], update_state_dict: bool = True, **kwargs, ) -> None: if self.needs_w_quant_for_opts: for w in x_wgts: if update_state_dict: self._state_dict.append((w, w.data)) w.data = self._process_xw_in_yx(w.detach().data) for w in y_wgts: if update_state_dict: self._state_dict.append((w, w.data)) w.data = self._process_yw_in_yx(w.detach().data) if self.needs_x_quant_for_opts: self._hooks.append(self.x_quantizer.as_hook(self._process_x_in_yx, is_output=True).register(x_mods)) if self.needs_y_quant_for_opts: self._hooks.append(self.y_quantizer.as_hook(self._process_y_in_yx, is_output=True).register(y_mods)) def calibrate( self, x_wgts: list[nn.Parameter] | None = None, y_wgts: list[nn.Parameter] | None = None, x_acts: TensorsCache | None = None, y_acts: TensorsCache | None = None, x_mods: list[nn.Module] | None = None, y_mods: list[nn.Module] | None = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module | None = None, eval_kwargs: dict[str, tp.Any] | None = None, orig_x_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None = None, orig_y_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None = None, orig_x_acts: TensorsCache | None = None, orig_y_acts: TensorsCache | None = None, orig_eval_inputs: TensorsCache | None = None, **kwargs, ) -> _CANDIDATE: """Calibrate the quantization parameters. Args: x_wgts (`list[nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights in x-w computation, or weights that generates x for y-x computation. y_wgts (`list[nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights that generates y for y-x computation. x_acts (`TensorsCache` or `None`, *optional*, defaults to `None`): The x activations. It should be x for x-w or y-x computation. y_acts (`TensorsCache` or `None`, *optional*, defaults to `None`): The y activations. It should be y for y-x computation. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The inputs of evaluation module `eval_module`. eval_module (`nn.Module` or `None`, *optional*, defaults to `None`): The module used for evaluation. x_mods (`list[nn.Module]` or `None`, *optional*, defaults to `None`): The modules for x activation quantization. It should be the modules that take in x for x-w computation, or the modules that generates x for y-x computation. y_mods (`list[nn.Module]` or `None`, *optional*, defaults to `None`): The modules for y activation quantization. It should be the modules that generates y for y-x computation. orig_x_wgts (`list[tuple[nn.Parameter, torch.Tensor]]` or `None`, *optional*, defaults to `None`): The original weights for `x_mods`. orig_y_wgts (`list[tuple[nn.Parameter, torch.Tensor]]` or `None`, *optional*, defaults to `None`): The original weights for `y_mods`. orig_x_acts (`TensorsCache` or `None`, *optional*, defaults to `None`): The original x activations `x_acts`. orig_y_acts (`TensorsCache` or `None`, *optional*, defaults to `None`): The original y activations `y_acts`. orig_eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The original inputs of evaluation module `eval_inputs`. eval_kwargs (`dict[str, tp.Any]` or `None`, *optional*, defaults to `None`): The keyword arguments for evaluation module `eval_module`. Returns: `_CANDIDATE`: The best candidate. """ tools.logging.Formatter.indent_inc() if self.w_quantizer is not None and self.w_quantizer.is_enabled(): self.logger.debug(f"+ w: {self.w_quantizer.config.quant_dtype}") else: self.logger.debug("+ w: None") if self.x_quantizer is not None and self.x_quantizer.is_enabled(): self.logger.debug(f"+ x: {self.x_quantizer.config.quant_dtype}") else: self.logger.debug("+ x: None") if self.y_quantizer is not None and self.y_quantizer.is_enabled(): self.logger.debug(f"+ y: {self.y_quantizer.config.quant_dtype}") else: self.logger.debug("+ y: None") ( x_wgts, y_wgts, x_acts, y_acts, eval_inputs, eval_module, x_mods, y_mods, orig_x_wgts, orig_y_wgts, orig_x_acts, orig_y_acts, orig_eval_inputs, ) = self._parse_args( x_wgts, y_wgts, x_acts, y_acts, eval_inputs, eval_module, x_mods, y_mods, orig_x_wgts, orig_y_wgts, orig_x_acts, orig_y_acts, orig_eval_inputs, ) eval_kwargs = eval_kwargs or {} self.logger.debug(f"+ finished parsing calibration arguments, ram usage: {psutil.virtual_memory().percent}") self.reset( x_wgts=x_wgts, y_wgts=y_wgts, x_acts=x_acts, y_acts=y_acts, eval_inputs=eval_inputs, eval_module=eval_module, x_mods=x_mods, y_mods=y_mods, orig_x_wgts=orig_x_wgts, orig_y_wgts=orig_y_wgts, orig_x_acts=orig_x_acts, orig_y_acts=orig_y_acts, orig_eval_inputs=orig_eval_inputs, eval_kwargs=eval_kwargs, **kwargs, ) self.logger.debug(f"+ finished resetting calibrator, ram usage: {psutil.virtual_memory().percent}") gc.collect() torch.cuda.empty_cache() if self.tensor_type == TensorType.Weights: result = self._calibrate_wgts( x_wgts, eval_inputs, eval_module, x_mods, orig_x_wgts, orig_eval_inputs, eval_kwargs, **kwargs ) elif self.tensor_type == TensorType.Inputs: result = self._calibrate_ipts( x_wgts, eval_inputs, eval_module, x_mods, orig_x_wgts, orig_eval_inputs, eval_kwargs, **kwargs ) else: result = self._calibrate_opts( x_wgts, y_wgts, eval_inputs, eval_module, x_mods, y_mods, orig_x_wgts, orig_y_wgts, orig_eval_inputs, eval_kwargs, **kwargs, ) tools.logging.Formatter.indent_dec() return result def _calibrate_wgts( # noqa: C901 self, wgts: list[torch.Tensor | nn.Parameter], ipts: TensorsCache | None, eval_module: nn.Module | None, mods: list[nn.Module] | None, orig_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None, orig_ipts: TensorsCache | None, eval_kwargs: dict[str, tp.Any], **kwargs, ) -> tp.Any: # region Step 1: Calculate the baseline if self.objective == SearchBasedCalibObjective.TensorError: if orig_wgts is None: orig_wgts = [(None, w.detach().data) for w in wgts] assert all(w.shape[1:] == wgts[0].shape[1:] for w in wgts) assert all(w.shape[1:] == wgts[0].shape[1:] for _, w in orig_wgts) orig_opts = None w_view_shapes = [infer_view_shape(w.shape, self.w_quantizer.config.largest_group_shape) for w in wgts] elif self.objective == SearchBasedCalibObjective.ProductsError: if orig_wgts is None: orig_wgts = [(None, w.detach().data) for w in wgts] assert len(orig_wgts) == len(wgts) assert all(w.shape[1:] == wgts[0].shape[1:] for w in wgts) assert all(w.shape[1:] == wgts[0].shape[1:] for _, w in orig_wgts) w_view_shapes = [infer_view_shape(w.shape, self.w_quantizer.config.largest_group_shape) for w in wgts] if self.granularity != SearchBasedCalibGranularity.Layer: _reshape_x = self._reshape_x_for_wgts_centric_partial_products _reshape_w = self._reshape_w_for_wgts_centric_partial_products else: _reshape_x = self._reshape_x_for_full_products _reshape_w = self._reshape_w_for_full_products assert isinstance(ipts, TensorsCache), "ipts should not be None for ProductsError" if orig_ipts is None: orig_ipts = ipts same_ipts = orig_ipts is ipts orig_ipts = TensorsCache( { key: TensorCache( [_reshape_x(x, view_shape=w_view_shapes[0], fn=ipt.reshape) for x in ipt.data], **ipt.get_factory_kwargs(channels_dim=1, reshape=ReshapeFn()), ) for key, ipt in orig_ipts.items() }, ) orig_opts: dict[tuple[int, ...], torch.Tensor] = {} for j, (_, w) in enumerate(orig_wgts): w = _reshape_w(w, view_shape=w_view_shapes[j]) for s, ipt in enumerate(orig_ipts): for i, x in enumerate(ipt.data): x = x.to(device=w.device, non_blocking=True) y = torch.matmul(x, w) y = y.view(*y.shape[:-2], y.shape[-2] * y.shape[-1]) orig_opts[(i, s, j)] = y.to(device=self.opts_device or y.device, non_blocking=True) if self.needs_to_pre_reshape_x_for_wgts: if same_ipts: ipts = orig_ipts else: ipts = TensorsCache( { key: TensorCache( [_reshape_x(x, view_shape=w_view_shapes[0], fn=ipt.reshape) for x in ipt.data], **ipt.get_factory_kwargs(channels_dim=1, reshape=ReshapeFn()), ) for key, ipt in ipts.items() } ) del orig_wgts, orig_ipts, same_ipts elif self.objective == SearchBasedCalibObjective.OutputsError: w_view_shapes, _state_dict = [], [] if orig_wgts is not None: _state_dict = [(p, p.data) for p, _ in orig_wgts] for p, w in orig_wgts: p.data = w.to(device=p.data.device) if orig_ipts is None: orig_ipts = ipts assert isinstance(orig_ipts, TensorsCache), "orig_ipts should not be None for OutputsError" orig_opts: dict[tuple[int, ...], torch.Tensor] = {} for i in range(len(orig_ipts.front().data)): ipt = orig_ipts.extract(i, eval_kwargs) y = eval_module(*ipt.args, **ipt.kwargs) y = y[0] if not isinstance(y, torch.Tensor) else y assert isinstance(y, torch.Tensor), "eval_mod should return a tensor" orig_opts[(i,)] = y.to(device=self.opts_device or y.device, non_blocking=True) del ipt, y for p, s in _state_dict: p.data = s del orig_wgts, orig_ipts, _state_dict else: raise ValueError(f"Unknown objective {self.objective}") gc.collect() torch.cuda.empty_cache() self.logger.debug(f"+ finished calculating the original outputs, ram usage: {psutil.virtual_memory().percent}") # endregion while not self.is_done(): self.ask() e: list[torch.Tensor] = [] # region Step 2: Calculate the errors if self.objective == SearchBasedCalibObjective.TensorError: assert isinstance(orig_wgts, (tuple, list)) for w, (_, orig_w), w_view_shape in zip(wgts, orig_wgts, w_view_shapes, strict=True): e_w = self._process_w_in_xw(w).sub_(orig_w) if self.granularity == SearchBasedCalibGranularity.Group: e_w = e_w.view(w_view_shape).abs_().pow_(self.config.degree) e_w = e_w.sum(dim=tuple(range(1, len(w_view_shape), 2))).view(w_view_shape[::2]) elif self.granularity == SearchBasedCalibGranularity.ChannelGroup: e_w = e_w.view(*w_view_shape[:4], -1).abs_().pow_(self.config.degree) e_w = e_w.sum(dim=(0, 1, 3, 4)).view(w_view_shape[2]) elif self.granularity == SearchBasedCalibGranularity.Layer: e_w = e_w.abs_().pow_(self.config.degree).sum().view(-1) else: raise ValueError(f"Unknown granularity {self.granularity}") e.append(e_w) elif self.objective == SearchBasedCalibObjective.ProductsError: e = [None] * len(wgts) for j, w in enumerate(wgts): w = _reshape_w(self._process_w_in_xw(w), view_shape=w_view_shapes[j]) for s, ipt in enumerate(ipts): for i, x in enumerate(ipt.data): x = x.to(device=w.device, non_blocking=True) if not self.needs_to_pre_reshape_x_for_wgts: x = self._process_x_in_xw(x, channels_dim=ipt.channels_dim) x = _reshape_x(x, view_shape=w_view_shapes[j], fn=ipt.reshape) y = torch.matmul(x, w) y = y.view(*y.shape[:-2], y.shape[-2] * y.shape[-1]) y = y.sub_(orig_opts[(i, s, j)].to(device=w.device, non_blocking=True)) if self.granularity == SearchBasedCalibGranularity.Group: y = y.to(self.develop_dtype).pow_(self.config.degree).sum(dim=-1) elif self.granularity == SearchBasedCalibGranularity.ChannelGroup: y = y.view(y.shape[0], y.shape[1], -1) y = y.to(self.develop_dtype).pow_(self.config.degree).sum(dim=(0, 2)) elif self.granularity == SearchBasedCalibGranularity.Layer: y = y.to(self.develop_dtype).pow_(self.config.degree).sum().view(-1) else: raise ValueError(f"Unknown granularity {self.granularity}") if e[j] is None: e[j] = y else: e[j].add_(y) elif self.objective == SearchBasedCalibObjective.OutputsError: self._process_wgts_centric_mod(wgts=wgts, mods=mods, **kwargs) e = [None] for i in range(len(ipts.front().data)): ipt = ipts.extract(i, eval_kwargs) y = eval_module(*ipt.args, **ipt.kwargs) y = y[0] if not isinstance(y, torch.Tensor) else y assert isinstance(y, torch.Tensor), "eval_mod should return a tensor" y = (y - orig_opts[(i,)].to(device=y.device, non_blocking=True)).to(self.develop_dtype) y = y.pow_(self.config.degree).sum().view(-1) if e[0] is None: e[0] = y else: e[0].add_(y) del ipt, y self._recover_mod() else: raise ValueError(f"Unknown objective {self.objective}") # endregion self.tell(e) return self.get_best() def _calibrate_ipts( # noqa: C901 self, wgts: list[torch.Tensor | nn.Parameter], ipts: TensorsCache, eval_module: nn.Module | None, mods: list[nn.Module] | None, orig_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None, orig_ipts: TensorsCache | None, eval_kwargs: dict[str, tp.Any], **kwargs, ) -> tp.Any: if orig_ipts is None: orig_ipts = ipts assert ipts.num_tensors == orig_ipts.num_tensors assert all( x.shape == orig_x.shape for ipt, orig_ipt in zip(ipts, orig_ipts, strict=True) for x, orig_x in zip(ipt.data, orig_ipt.data, strict=True) ) # region Step 1: Calculate the outputs if self.objective == SearchBasedCalibObjective.TensorError: assert all(x.shape == ipt.data[0].shape for ipt in ipts for x in ipt.data) orig_opts = None x_view_shapes = [ infer_view_shape( ipt.data[0].view(-1, *ipt.data[0].shape[ipt.channels_dim :]).shape, self.x_quantizer.config.largest_group_shape, skip_first_dim=True, ) for ipt in ipts ] del orig_wgts elif self.objective == SearchBasedCalibObjective.ProductsError: assert all(ipt.channels_dim == 1 for ipt in ipts) assert all(ipt.channels_dim == 1 for ipt in orig_ipts) assert all(x.shape[1:] == ipts.front().data[0].shape[1:] for ipt in ipts for x in ipt.data) if orig_wgts is None: orig_wgts = [(None, w.detach().data) for w in wgts] assert len(orig_wgts) == len(wgts) if self.granularity != SearchBasedCalibGranularity.Layer: _reshape_x = self._reshape_x_for_ipts_centric_partial_products _reshape_w = self._reshape_w_for_ipts_centric_partial_products else: _reshape_x = self._reshape_x_for_full_products _reshape_w = self._reshape_w_for_full_products x_view_shapes = [ infer_view_shape(ipt.data[0].shape, self.x_quantizer.config.largest_group_shape, skip_first_dim=True) for ipt in ipts ] orig_opts: dict[tuple[int, ...], torch.Tensor] = {} for j, (_, w) in enumerate(orig_wgts): w = _reshape_w(w, view_shape=x_view_shapes[0]) for s, ipt in enumerate(orig_ipts): for i, x in enumerate(ipt.data): x = x.to(device=w.device, non_blocking=True) x = _reshape_x(x, view_shape=x_view_shapes[s], fn=ipt.reshape) y = torch.matmul(x, w) y = y.view(*y.shape[:-2], y.shape[-2] * y.shape[-1]) orig_opts[(i, s, j)] = y.to(device=self.opts_device or y.device, non_blocking=True) if self.needs_to_pre_reshape_w_for_ipts: for j, w in enumerate(wgts): wgts[j] = _reshape_w(w, view_shape=x_view_shapes[0]) del orig_wgts, orig_ipts elif self.objective == SearchBasedCalibObjective.OutputsError: x_view_shapes, _state_dict = [], [] if orig_wgts is not None: _state_dict = [(p, p.data) for p, _ in orig_wgts] for p, w in orig_wgts: p.data = w.to(device=p.data.device) orig_opts: dict[tuple[int, ...], torch.Tensor] = {} for i in range(len(orig_ipts.front().data)): ipt = orig_ipts.extract(i, eval_kwargs) y = eval_module(*ipt.args, **ipt.kwargs) y = y[0] if not isinstance(y, torch.Tensor) else y assert isinstance(y, torch.Tensor), "eval_mod should return a tensor" orig_opts[(i,)] = y.to(device=self.opts_device or y.device, non_blocking=True) del ipt, y for p, s in _state_dict: p.data = s del orig_wgts, orig_ipts, _state_dict else: raise ValueError(f"Unknown objective {self.objective}") gc.collect() torch.cuda.empty_cache() # endregion while not self.is_done(): self.ask() e: list[torch.Tensor] = [] # region Step 2: Calculate the outputs errors if self.objective == SearchBasedCalibObjective.TensorError: e = [None] * len(ipts) for s, (ipt, x_view_shape) in enumerate(zip(ipts, x_view_shapes, strict=True)): for x in ipt.data: e_x = self._process_x_in_xw(x, channels_dim=ipt.channels_dim).sub_(x) if self.granularity == SearchBasedCalibGranularity.Group: e_x = e_x.view(x_view_shape).abs_().pow_(self.config.degree) e_x = e_x.sum(dim=tuple(range(1, len(x_view_shape), 2))) if self.granularity == SearchBasedCalibGranularity.ChannelGroup: e_x = e_x.view(*x_view_shape[:4], -1).abs_().pow_(self.config.degree) e_x = e_x.sum(dim=(0, 1, 3, 4)).view(x_view_shape[2]) elif self.granularity == SearchBasedCalibGranularity.Layer: e_x = e_x.abs_().pow_(self.config.degree).sum().view(-1) else: raise ValueError(f"Unknown granularity {self.granularity}") if e[s] is None: e[s] = e_x else: e[s].add_(e_x) elif self.objective == SearchBasedCalibObjective.ProductsError: e = [None] * len(ipts) for j, w in enumerate(wgts): if not self.needs_to_pre_reshape_w_for_ipts: w = self._process_w_in_xw(w) w = _reshape_w(w, view_shape=x_view_shapes[0]) for s, ipt in enumerate(ipts): for i, x in enumerate(ipt.data): x = x.to(device=w.device, non_blocking=True) x = self._process_x_in_xw(x, channels_dim=ipt.channels_dim) x = _reshape_x(x, view_shape=x_view_shapes[s], fn=ipt.reshape) y = torch.matmul(x, w) y = y.view(*y.shape[:-2], y.shape[-2] * y.shape[-1]) y = y.sub_(orig_opts[(i, s, j)].to(device=w.device, non_blocking=True)) if self.granularity == SearchBasedCalibGranularity.Group: y = y.to(self.develop_dtype).pow_(self.config.degree).sum(dim=-1) elif self.granularity == SearchBasedCalibGranularity.ChannelGroup: y = y.view(y.shape[0], y.shape[1], -1) y = y.to(self.develop_dtype).pow_(self.config.degree).sum(dim=(0, 2)) elif self.granularity == SearchBasedCalibGranularity.Layer: y = y.to(self.develop_dtype).pow_(self.config.degree).sum().view(-1) else: raise ValueError(f"Unknown granularity {self.granularity}") if e[s] is None: e[s] = y else: e[s].add_(y) elif self.objective == SearchBasedCalibObjective.OutputsError: self._process_ipts_centric_mod(wgts=wgts, mods=mods, **kwargs) e = [None] for i in range(len(ipts.front().data)): ipt = ipts.extract(i, eval_kwargs) y = eval_module(*ipt.args, **ipt.kwargs) y = y[0] if not isinstance(y, torch.Tensor) else y assert isinstance(y, torch.Tensor), "eval_mod should return a tensor" y = (y - orig_opts[(i,)].to(device=y.device, non_blocking=True)).to(self.develop_dtype) y = y.pow_(self.config.degree).sum().view(-1) if e[0] is None: e[0] = y else: e[0].add_(y) del ipt, y self._recover_mod() else: raise ValueError(f"Unknown objective {self.objective}") # endregion self.tell(e) return self.get_best() def _calibrate_opts( # noqa: C901 self, x_wgts: list[torch.Tensor | nn.Parameter], y_wgts: list[torch.Tensor | nn.Parameter], eval_inputs: TensorsCache | None, eval_module: nn.Module | None, x_mods: list[nn.Module] | None, y_mods: list[nn.Module] | None, orig_x_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None, orig_y_wgts: list[tuple[nn.Parameter, torch.Tensor]] | None, orig_eval_inputs: TensorsCache | None, eval_kwargs: dict[str, tp.Any], **kwargs, ) -> tp.Any: # region Step 1: Calculate the outputs if self.objective == SearchBasedCalibObjective.OutputsError: assert eval_inputs is not None, "eval_inputs should not be None when objective is OutputsError" if orig_eval_inputs is None: orig_eval_inputs = eval_inputs assert eval_inputs.num_tensors == orig_eval_inputs.num_tensors assert all( x.shape == orig_x.shape for key, ipt in eval_inputs.items() for x, orig_x in zip(ipt.data, orig_eval_inputs[key].data, strict=True) ) _x_state_dict, _y_state_dict = [], [] if orig_x_wgts is not None: _x_state_dict = [(p, p.data) for p, _ in orig_x_wgts] for p, w in orig_x_wgts: p.data = w.to(device=p.data.device) if orig_y_wgts is not None: _y_state_dict = [(p, p.data) for p, _ in orig_y_wgts] for p, w in orig_y_wgts: p.data = w.to(device=p.data.device) orig_opts: dict[tuple[int, ...], torch.Tensor] = {} for i in range(len(orig_eval_inputs.front().data)): ipt = orig_eval_inputs.extract(i, eval_kwargs) y = eval_module(*ipt.args, **ipt.kwargs) y = y[0] if not isinstance(y, torch.Tensor) else y assert isinstance(y, torch.Tensor), "eval_mod should return a tensor" orig_opts[(i,)] = y.to(device=self.opts_device or y.device, non_blocking=True) del ipt, y for p, s in _x_state_dict: p.data = s for p, s in _y_state_dict: p.data = s del orig_x_wgts, orig_y_wgts, orig_eval_inputs, _x_state_dict, _y_state_dict else: raise ValueError(f"Unknown objective {self.objective}") gc.collect() torch.cuda.empty_cache() # endregion while not self.is_done(): self.ask() e: list[torch.Tensor] = [] # region Step 2: Calculate the outputs errors if self.objective == SearchBasedCalibObjective.OutputsError: self._process_opts_centric_mod( x_wgts=x_wgts, y_wgts=y_wgts, x_mods=x_mods, y_mods=y_mods, **kwargs, ) e = [None] for i in range(len(eval_inputs.front().data)): ipt = eval_inputs.extract(i, eval_kwargs) y = eval_module(*ipt.args, **ipt.kwargs) y = y[0] if not isinstance(y, torch.Tensor) else y assert isinstance(y, torch.Tensor), "eval_mod should return a tensor" y = (y - orig_opts[(i,)].to(device=y.device, non_blocking=True)).to(self.develop_dtype) y = y.pow_(self.config.degree).sum().view(-1) if e[0] is None: e[0] = y else: e[0].add_(y) del ipt, y self._recover_mod() else: raise ValueError(f"Unknown objective {self.objective}") # endregion self.tell(e) return self.get_best() ================================================ FILE: deepcompressor/calib/smooth.py ================================================ # -*- coding: utf-8 -*- """Smooth quantization module.""" import gc import typing as tp from dataclasses import _MISSING_TYPE, MISSING, dataclass import torch import torch.nn as nn from ..data.cache import TensorsCache from ..data.common import TensorType from ..quantizer.processor import Quantizer from ..utils import math, tools from ..utils.common import split_sequence from ..utils.hooks import BaseInputPackager, BaseOutputPackager, BaseTensorProcessor from .config import SearchBasedCalibObjective, SmoothCalibConfig, SmoothSpanMode from .metric import ChannelMetric from .search import SearchBasedCalibrator __all__ = [ "smooth_linear_modules", "smooth_attention", "convert_smooth_upscale_to_downscale", "ActivationSmoother", "get_smooth_scale", "get_smooth_span", "SmoothCalibrator", "SmoothLinearCalibrator", "SmoothAttentionCalibrator", ] @dataclass class ActivationSmoother(BaseTensorProcessor): """The quantization smoothing processor.""" smooth_scale: torch.Tensor channels_dim: int upscale: bool = False develop_dtype: torch.dtype | None = None # region hook-related attributes input_packager: BaseInputPackager | None = None output_packager: BaseOutputPackager | None = None # endregion def is_enabled(self) -> bool: return self.smooth_scale is not None def get_input_packager(self) -> BaseInputPackager | None: return self.input_packager def get_output_packager(self) -> BaseOutputPackager | None: return self.output_packager def process(self, tensor: torch.Tensor) -> torch.Tensor: """Process the tensor. Args: tensor (`torch.Tensor`): The tensor to smooth. Returns: `torch.Tensor`: The smoothed tensor. """ device, dtype = tensor.device, tensor.dtype if self.develop_dtype is None: self.develop_dtype = dtype self.smooth_scale = self.smooth_scale.to(device=device, dtype=self.develop_dtype) tensor = tensor.to(dtype=self.develop_dtype) smooth_scale_view_shape = [1] * tensor.ndim smooth_scale_view_shape[self.channels_dim] = -1 smooth_scale = self.smooth_scale.view(smooth_scale_view_shape) if self.upscale: return tensor.mul(smooth_scale).to(dtype=dtype) else: return tensor.div(smooth_scale).to(dtype=dtype) @torch.inference_mode() def get_smooth_span( tensors: tp.Sequence[torch.Tensor], /, *, group_shape: tp.Sequence[int], span_mode: SmoothSpanMode, device: torch.device | str | None = None, dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Calculate the value span of tensors for calculating smoothing scale. Args: tensors (`Sequence[torch.Tensor]`): Tensors to calculate the span. group_shape (`Sequence[int]`): Quantization group shape. span_mode (`SmoothSpanMode`): The quantization smoothing span mode. device (`torch.device` or `str` or `None`, *optional*, defaults to `None`): Device to store the span. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): Data type of the span. Returns: `torch.Tensor`: The span of the tensors for calculating smoothing scale. """ # convert span mode name from camel case to snake case range_name = "".join(["_" + c.lower() if c.isupper() else c for c in span_mode.name]).lstrip("_") range_fn = getattr(ChannelMetric, range_name) r: torch.Tensor = range_fn(tensors, tensors[0].shape[1], group_shape, device=device, dtype=dtype) return r @torch.inference_mode() def get_smooth_scale(*, alpha_base: torch.Tensor, beta_base: torch.Tensor, alpha: float, beta: float) -> torch.Tensor: """Calculate the smoothing scale for quantization. Scale = alpha_base^alpha / beta_base^beta. Args: alpha_base (`torch.Tensor`): Base span for alpha. beta_base (`torch.Tensor`): Base span for beta. alpha (`float`): Alpha. beta (`float`): Beta. Returns: `torch.Tensor`: Smoothing scale. """ assert 0 <= alpha <= 1 and 0 <= beta <= 1, "The smooth factors should be in [0, 1]." if alpha > 0: smooth_scale = alpha_base.pow(alpha) if beta > 0: smooth_scale = smooth_scale.div_(beta_base.pow(beta)) else: smooth_scale = beta_base.pow(-beta) smooth_scale[smooth_scale == 0] = 1 if smooth_scale.isnan().any() or smooth_scale.isinf().any(): smooth_scale = smooth_scale.fill_(1) assert not smooth_scale.isnan().any(), "The smooth scale contains NaN." assert not smooth_scale.isinf().any(), "The smooth scale contains Inf." return smooth_scale class SmoothCalibrator(SearchBasedCalibrator[SmoothCalibConfig, torch.Tensor]): """The quantization smoothing calibrator.""" def __init__( self, tensor_type: TensorType, config: SmoothCalibConfig, w_quantizer: Quantizer | None, x_quantizer: Quantizer | None, y_quantizer: Quantizer | None, num_heads: int = 1, num_head_repeats: int = 1, with_rope: bool = False, develop_dtype: torch.dtype = torch.float32, ) -> None: """Initialize the calibrator. Args: tensor_type (`TensorType`): The type of tensor to quantize. Choices are ``Weights`` and ``Outputs``. config (`SmoothCalibConfig`): The quantization smoothing calibration configuration. w_quantizer (`Quantizer` or `None`): The w quantizer for x-w computation. x_quantizer (`Quantizer` or `None`): The x quantizer for x-w or y-x computation. y_quantizer (`Quantizer` or `None`): The y quantizer for y-x computation. num_heads (`int`, *optional*, defaults to ``1``): The number of heads. num_head_repeats (`int`, *optional*, defaults to ``1``): The number of head repeats. with_rope (`bool`, *optional*, defaults to ``False``): Whether rotary position embedding is used for y-x computation. develop_dtype (torch.dtype, *optional*, defaults to ``torch.float32``): The development data type. """ assert tensor_type in (TensorType.Weights, TensorType.Outputs) super().__init__( tensor_type=tensor_type, config=config, w_quantizer=w_quantizer, x_quantizer=x_quantizer, y_quantizer=y_quantizer, develop_dtype=develop_dtype, ) self.num_heads = num_heads self.num_head_repeats = num_head_repeats self.with_rope = self.tensor_type != TensorType.Weights and with_rope # region set group shapes of weights, inputs and outputs if self.needs_w_quant: w_group_shape = list(self.w_quantizer.config.largest_group_shape) else: w_group_shape = [1, None, -1] if self.needs_x_quant: x_group_shape = list(self.x_quantizer.config.largest_group_shape) else: x_group_shape = [1, None, -1] if self.needs_y_quant: y_group_shape = list(self.y_quantizer.config.largest_group_shape) else: y_group_shape = [1, None, -1] w_group_shape[1] = x_group_shape[1] if w_group_shape[1] is None else w_group_shape[1] if self.tensor_type == TensorType.Weights: x_group_shape[1] = w_group_shape[1] if x_group_shape[1] is None else x_group_shape[1] else: x_group_shape[1] = y_group_shape[1] if x_group_shape[1] is None else x_group_shape[1] y_group_shape[1] = x_group_shape[1] if y_group_shape[1] is None else y_group_shape[1] self.w_group_shape, self.x_group_shape, self.y_group_shape = w_group_shape, x_group_shape, y_group_shape # endregion self.alpha_beta_pairs = self.config.get_alpha_beta_pairs() self.num_iters = 1 @property def population_size(self) -> int: """Get the population size.""" return len(self.alpha_beta_pairs) * len(self.span_mode_pairs) @property def allows_x_quant_for_wgts(self) -> bool: """Whether the calibrator allows input quantization when tensor_type is Weights.""" return self.config.allow_a_quant @property def allows_w_quant_for_wgts(self) -> bool: """Whether the calibrator needs weight quantization when tensor_type is Weights.""" return self.config.allow_b_quant @property def allows_w_quant_for_ipts(self) -> bool: """Whether the calibrator allows weight quantization when tensor_type is Inputs.""" return self.config.allow_b_quant @property def allows_x_quant_for_opts(self) -> bool: """Whether the calibrator allows x quantization when tensor_type is Outputs.""" return self.config.allow_b_quant @property def allows_y_quant_for_opts(self) -> bool: """Whether the calibrator allows y quantization when tensor_type is Outputs.""" return self.config.allow_a_quant @property def allows_w_quant_for_opts(self) -> bool: """Whether the calibrator allows weight quantization when tensor_type is Outputs.""" return False @property def span_mode_pairs(self) -> list[tuple[SmoothSpanMode, SmoothSpanMode]]: """Get the span modes.""" return self.config.spans @property def alpha_span_modes(self) -> list[SmoothSpanMode]: """Get the span modes for alpha.""" return self.config.a_spans @property def beta_span_modes(self) -> list[SmoothSpanMode]: """Get the span modes for beta.""" return self.config.b_spans def _reset( # noqa: C901 self, *, x_wgts: list[torch.Tensor | nn.Parameter], x_acts: TensorsCache, y_wgts: list[torch.Tensor | nn.Parameter] = None, y_acts: TensorsCache | None = None, **kwargs, ) -> None: """Reset the calibrator. Args: x_wgts (`list[torch.Tensor | nn.Parameter]`): The weights in x-w computation, or weights that generates x for y-x computation. x_acts (`TensorsCache`): The x activations. It should be x for x-w or y-x computation. y_wgts (`list[torch.Tensor | nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights that generates y for y-x computation. y_acts (`TensorsCache` or `None`, *optional*, defaults to `None`): The y activations. It should be y for y-x computation. """ wgts_centric = self.tensor_type == TensorType.Weights self.num_in_channels = x_wgts[0].shape[1] if wgts_centric else x_wgts[0].shape[0] device = x_wgts[0].device if self.num_heads > 1 and self.num_head_repeats > 1: self.num_unique_heads = self.num_heads // self.num_head_repeats else: self.num_unique_heads = 0 # region get x spans assert ( x_acts.num_tensors == 1 ), f"Only one input is allowed, got {x_acts.num_tensors}=len({list(x_acts.keys())})" x_tensors = x_acts.front().get_standardized_data(reshape=False) assert all(x.shape[1] == self.num_in_channels for x in x_tensors) x_spans = {} for span_mode in self.alpha_span_modes if wgts_centric else self.beta_span_modes: x_span = get_smooth_span( x_tensors, group_shape=self.x_group_shape, span_mode=span_mode, device=device, dtype=self.develop_dtype, ) if self.num_unique_heads > 0: x_span = x_span.view(self.num_unique_heads, self.num_head_repeats, -1) x_span = (x_span.amax if "Max" in span_mode.name else x_span.mean)(dim=1, keepdim=True) x_span = x_span.expand(self.num_unique_heads, self.num_head_repeats, -1).reshape(-1) if self.tensor_type == TensorType.Outputs and self.with_rope: x_span = x_span.view(self.num_heads, 2, -1) x_span = (x_span.amax if "Max" in span_mode.name else x_span.mean)(dim=1, keepdim=True) x_span = x_span.expand(self.num_heads, 2, -1).reshape(-1) x_spans[span_mode] = x_span if self.logger.level <= tools.logging.DEBUG: self.logger.debug("+ x - %s", span_mode.name) self.logger.debug("+ x = [min=%.4f, max=%.4f]", x_span.min().item(), x_span.max().item()) del x_tensors # endregion if wgts_centric: assert all(w.shape[1] == self.num_in_channels for w in x_wgts) w_tensors = [w.data for w in x_wgts] w_spans = {} for span_mode in self.beta_span_modes: w_span = get_smooth_span( w_tensors, group_shape=self.w_group_shape, span_mode=span_mode, dtype=self.develop_dtype, ) if self.num_unique_heads > 0: w_span = w_span.view(self.num_unique_heads, self.num_head_repeats, -1) w_span = (w_span.amax if "Max" in span_mode.name else w_span.mean)(dim=1, keepdim=True) w_span = w_span.expand(self.num_unique_heads, self.num_head_repeats, -1).reshape(-1) w_spans[span_mode] = w_span if self.logger.level <= tools.logging.DEBUG: self.logger.debug("+ w - %s", span_mode.name) self.logger.debug("+ w = [min=%.4f, max=%.4f]", w_span.min().item(), w_span.max().item()) self.span_pairs: list[tuple[torch.Tensor, torch.Tensor]] = [ (x_spans[x_span_mode], w_spans[w_span_mode]) for x_span_mode, w_span_mode in self.span_mode_pairs ] else: assert y_acts.num_tensors == 1, f"Only one output source is allowed, got {y_acts.num_tensors}" if self.num_unique_heads > 0: num_out_channels = self.num_in_channels // self.num_head_repeats else: num_out_channels = self.num_in_channels assert all(w.shape[0] == self.num_in_channels for w in x_wgts) assert all(w.shape[0] == num_out_channels for w in y_wgts) y_tensors = y_acts.front().get_standardized_data(reshape=False) assert all(y.shape[1] == num_out_channels for y in y_tensors) y_spans = {} for span_mode in self.alpha_span_modes: y_span = get_smooth_span( y_tensors, group_shape=self.x_group_shape, span_mode=span_mode, device=device, dtype=self.develop_dtype, ) if self.num_unique_heads > 0: y_span = y_span.view(self.num_unique_heads, 1, -1) y_span = y_span.expand(self.num_unique_heads, self.num_head_repeats, -1).reshape(-1) if self.tensor_type == TensorType.Outputs and self.with_rope: y_span = y_span.view(self.num_heads, 2, -1) y_span = (y_span.amax if "Max" in span_mode.name else y_span.mean)(dim=1, keepdim=True) y_span = y_span.expand(self.num_heads, 2, -1).reshape(-1) y_spans[span_mode] = y_span if self.logger.level <= tools.logging.DEBUG: self.logger.debug("+ y - %s", span_mode.name) self.logger.debug("+ y = [min=%.4f, max=%.4f]", y_span.min().item(), y_span.max().item()) self.span_pairs: list[tuple[torch.Tensor, torch.Tensor]] = [ (y_spans[y_span_mode], x_spans[x_span_mode]) for y_span_mode, x_span_mode in self.span_mode_pairs ] self.best_error: list[torch.Tensor] = None self.best_scale: torch.Tensor = None self.error_history: list[tuple[float, float]] = [] def _split_candidate_id(self, candidate_id: int) -> tuple[int, int]: """Split the candidate id into alpha_beta id and span_pair id. Args: candidate_id (`int`): The candidate id. Returns: `tuple[int, int]`: The alpha_beta id and span_mode id. """ alpha_beta_id = candidate_id % len(self.alpha_beta_pairs) span_pair_id = candidate_id // len(self.alpha_beta_pairs) return alpha_beta_id, span_pair_id def get_best(self) -> torch.Tensor: """Get the best candidate. Returns: `torch.Tensor`: The best candidate. """ return self.best_scale def _ask(self) -> torch.Tensor: """Ask for the next candidate. Returns: `torch.Tensor`: The next candidate. """ alpha_beta_id, span_pair_id = self._split_candidate_id(self.candidate_id) alpha, beta = self.alpha_beta_pairs[alpha_beta_id] a_span, b_span = self.span_pairs[span_pair_id] if alpha == 0 and beta == 0: scale = torch.ones_like(a_span, dtype=self.develop_dtype) else: scale = get_smooth_scale(alpha_base=a_span, beta_base=b_span, alpha=alpha, beta=beta) return scale def _tell(self, error: list[torch.Tensor]) -> None: # noqa: C901 """Tell the error of the last candidate and update the best candidate. Args: error (`list[torch.Tensor]`): The error of the last candidate. """ numel = error[0].numel() assert all(e.numel() == numel for e in error) scale = self.candidate self.best_error, self.best_scale = self._update_best( best_error=self.best_error, best_scale=self.best_scale, error=error, scale=scale, numel=numel, num_channels=self.num_in_channels, num_heads=self.num_heads, num_head_repeats=self.num_head_repeats, ) if self.logger.level <= tools.logging.DEBUG: self.error_history.append( ( sum(math.root_(e.to(torch.float64).sum(), self.config.degree).item() for e in error), sum(math.root_(b.to(torch.float64).sum(), self.config.degree).item() for b in self.best_error), ) ) if self.is_last_candidate_in_iter(): logs: list[list[list[tuple]]] = [[] for _ in range(len(self.span_mode_pairs))] for i in range(self.population_size): c, r = self._split_candidate_id(i) alpha, beta = self.alpha_beta_pairs[c] if c % 5 == 0: logs[r].append([]) logs[r][-1].append((alpha, beta, self.error_history[i][0], self.error_history[i][1])) for r in range(len(self.span_mode_pairs)): self.logger.debug( " - x / w range = %s / %s", self.span_mode_pairs[r][0].name, self.span_mode_pairs[r][1].name ) for log in logs[r]: self.logger.debug( " - alpha = [%s]", ", ".join(f"{alpha:10.4f}" for alpha, beta, e, b in log), ) self.logger.debug( " - beta = [%s]", ", ".join(f"{beta:10.4f}" for alpha, beta, e, b in log), ) self.logger.debug( " - sum error = [%s]", ", ".join(f"{e:10.4f}" for alpha, beta, e, b in log) ) self.logger.debug( " - best error = [%s]", ", ".join(f"{b:10.4f}" for alpha, beta, e, b in log), ) del logs self.error_history.clear() if self.is_last_iter(): scale = self.get_best() tools.logging.Formatter.indent_dec() self.logger.debug( " + error = %.4f", sum(math.root_(b.to(torch.float64).sum(), self.config.degree).item() for b in self.best_error), ) self.logger.debug(" + scale = [min=%.4f, max=%.4f]", scale.min().item(), scale.max().item()) tools.logging.Formatter.indent_inc() def _reshape_scale( self, scale: torch.Tensor, tensor: torch.Tensor, channels_dim: int, needs_reduction: bool = False ) -> torch.Tensor: if self.num_unique_heads > 0 and needs_reduction: scale = scale.view(self.num_unique_heads, self.num_head_repeats, -1)[:, 0, :].reshape(-1) shape = [1] * tensor.ndim shape[channels_dim] = -1 return scale.view(shape) def _process_x_in_xw(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if not self.needs_x_quant_for_wgts: return x if channels_dim is MISSING: channels_dim = self.x_quantizer.channels_dim shape, dtype = x.shape, x.dtype scale = self._reshape_scale(self.candidate, x, channels_dim) x = x.to(dtype=self.develop_dtype) if dtype != self.develop_dtype else x.clone() x = x.div_(scale) x = self.x_quantizer.quantize( x, channels_dim=channels_dim, default_dtype=dtype, develop_dtype=self.develop_dtype ).data x = x.mul_(scale).to(dtype=dtype) return x.view(shape) def _process_w_in_xw(self, w: torch.Tensor) -> torch.Tensor: if not self.needs_w_quant_for_wgts: return w dtype = w.dtype channels_dim = 1 if self.w_quantizer.channels_dim is None else self.w_quantizer.channels_dim scale = self._reshape_scale(self.candidate, w, channels_dim=channels_dim) w = w.to(dtype=self.develop_dtype) if dtype != self.develop_dtype else w.clone() w = self.w_quantizer.quantize( w.mul_(scale), kernel=None, default_dtype=dtype, develop_dtype=self.develop_dtype ).data w = w.div_(scale).to(dtype=dtype) return w def _process_x_in_yx(self, x: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if not self.needs_x_quant_for_opts: return x shape, dtype = x.shape, x.dtype if self.objective != SearchBasedCalibObjective.OutputsError: if channels_dim is MISSING: channels_dim = self.x_quantizer.channels_dim scale = self._reshape_scale(self.candidate, x, channels_dim, needs_reduction=False) x = x.to(dtype=self.develop_dtype) if dtype != self.develop_dtype else x.clone() x = x.mul_(scale) # ! `x` is already scaled during `_process_opts_centric_mod` by scaling `xw` x = self.x_quantizer.quantize( x, channels_dim=channels_dim, default_dtype=dtype, develop_dtype=self.develop_dtype, ).data if self.objective != SearchBasedCalibObjective.OutputsError: x = x.div_(scale).to(dtype=dtype) return x.view(shape) def _process_y_in_yx(self, y: torch.Tensor, channels_dim: int | _MISSING_TYPE = MISSING) -> torch.Tensor: if not self.needs_y_quant_for_opts: return y shape, dtype = y.shape, y.dtype if self.objective != SearchBasedCalibObjective.OutputsError: if channels_dim is MISSING: channels_dim = self.x_quantizer.channels_dim scale = self._reshape_scale(self.candidate, y, channels_dim, needs_reduction=True) y = y.to(dtype=self.develop_dtype) if dtype != self.develop_dtype else y.clone() y = y.div_(scale) # ! `y` is already scaled during `_process_opts_centric_mod` by scaling `yw` y = self.y_quantizer.quantize( y, channels_dim=channels_dim, default_dtype=dtype, develop_dtype=self.develop_dtype, ).data if self.objective != SearchBasedCalibObjective.OutputsError: y = y.mul_(scale).to(dtype=dtype) return y.view(shape) def _process_xw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("The method `_process_xw_in_yx` should not be called in SmoothCalibrator.") def _process_yw_in_yx(self, w: torch.Tensor) -> torch.Tensor: raise RuntimeError("The method `_process_yw_in_yx` should not be called in SmoothCalibrator.") def _process_wgts_centric_mod( self, wgts: list[nn.Parameter], mods: list[nn.Module], update_state_dict: bool = True, splits: list[int] | None = None, **kwargs, ) -> None: if self.needs_w_quant_for_wgts and self.config.allow_low_rank and self.w_quantizer.is_enabled_low_rank(): assert len(wgts) == len(mods) for wgt in wgts: if update_state_dict: self._state_dict.append((wgt, wgt.data)) dtype = wgt.dtype scale = self._reshape_scale(self.candidate, wgt.data, channels_dim=1) wgt.data = wgt.data.to(dtype=self.develop_dtype).mul(scale).to(dtype=dtype) input_packager = self.x_quantizer.get_input_packager() if self.needs_x_quant else None for mod in mods: self._hooks.append( ActivationSmoother( self.candidate, self.x_quantizer.channels_dim, develop_dtype=self.develop_dtype, input_packager=input_packager, ) .as_hook() .register(mod) ) if splits: wgts_splits: list[list[nn.Parameter]] = split_sequence(wgts, splits) mods_splits: list[list[nn.Module]] = split_sequence(mods, splits) else: wgts_splits, mods_splits = [wgts], [mods] for wgts_split, mods_split in zip(wgts_splits, mods_splits, strict=True): for qwgt, lowr, wgt, mod in zip( *self.w_quantizer.quantize_with_low_rank(wgts_split, kernel=None, develop_dtype=self.develop_dtype), wgts_split, mods_split, strict=True, ): wgt.data = qwgt.data self._hooks.append(lowr.as_hook(input_packager=input_packager).register(mod)) if self.needs_x_quant_for_wgts: self._hooks.append(self.x_quantizer.as_hook().register(mod)) else: super()._process_wgts_centric_mod(wgts=wgts, mods=mods, update_state_dict=update_state_dict, **kwargs) def _process_opts_centric_mod( self, x_wgts: list[nn.Parameter], y_wgts: list[nn.Parameter], x_mods: list[nn.Module], y_mods: list[nn.Module], update_state_dict: bool = True, **kwargs, ) -> None: for w in x_wgts: if update_state_dict: self._state_dict.append((w, w.data)) scale = self._reshape_scale(self.candidate, w, channels_dim=0, needs_reduction=False) w.data = w.detach().data.to(dtype=self.develop_dtype).mul(scale).to(dtype=w.dtype) for w in y_wgts: if update_state_dict: self._state_dict.append((w, w.data)) scale = self._reshape_scale(self.candidate, w, channels_dim=0, needs_reduction=True) w.data = w.detach().data.to(dtype=self.develop_dtype).div(scale).to(dtype=w.dtype) super()._process_opts_centric_mod( x_wgts=x_wgts, y_wgts=y_wgts, x_mods=x_mods, y_mods=y_mods, update_state_dict=False, **kwargs, ) @staticmethod def _update_best( *, best_error: list[torch.Tensor] | None, best_scale: torch.Tensor, error: list[torch.Tensor], scale: torch.Tensor, numel: int, num_channels: int, num_heads: int, num_head_repeats: int, ) -> tuple[list[torch.Tensor], torch.Tensor]: if best_error is None: return error, scale elif numel == 1: # tensor wise quantization error if all(e <= b for b, e in zip(best_error, error, strict=True)): return error, scale return best_error, best_scale else: # channel group wise quantization error assert num_channels % numel == 0 group_size, num_groups = num_channels // numel, numel needs_reduction = num_heads > 1 and num_head_repeats > 1 if needs_reduction: num_head_channels = num_channels // num_heads num_unique_heads = num_heads // num_head_repeats if num_head_channels >= group_size: assert num_head_channels % group_size == 0 num_groups_per_head = num_head_channels // group_size num_repeats = num_head_repeats num_unqiue_heads_per_group = 1 else: assert group_size % num_head_channels == 0 num_heads_per_group = group_size // num_head_channels if num_heads_per_group < num_head_repeats: assert num_head_repeats % num_heads_per_group == 0 num_groups_per_head = 1 num_repeats = num_head_repeats // num_heads_per_group num_unqiue_heads_per_group = 1 else: assert num_heads_per_group % num_head_repeats == 0 num_groups_per_head = 1 num_repeats = 1 num_unqiue_heads_per_group = num_heads_per_group // num_head_repeats num_uniques = num_unique_heads // num_unqiue_heads_per_group needs_reduction = needs_reduction and num_repeats > 1 pos = torch.full((numel,), True, device=error[0][0].device) for b, e in zip(best_error, error, strict=True): if needs_reduction: b = b.view(num_uniques, num_repeats, num_groups_per_head).sum(dim=1, keepdim=True) e = e.view(num_uniques, num_repeats, num_groups_per_head).sum(dim=1, keepdim=True) pos = pos & (e < b).expand(num_uniques, num_repeats, num_groups_per_head).reshape_as(pos) else: pos = pos & (e < b) for b, e in zip(best_error, error, strict=True): b[pos] = e[pos] pos = pos.view(num_groups, 1).expand(num_groups, group_size) best_scale = best_scale.view(num_groups, group_size) best_scale[pos] = scale.view(num_groups, group_size)[pos] return best_error, best_scale class SmoothLinearCalibrator(SmoothCalibrator): """The smooth quantization calibrator for linear module.""" def __init__( self, config: SmoothCalibConfig, weight_quantizer: Quantizer | None, input_quantizer: Quantizer | None, num_heads: int = 1, num_head_repeats: int = 1, develop_dtype: torch.dtype = torch.float32, ) -> None: """Initialize the calibrator. Args: config (`SmoothCalibConfig`): The quantization smoothing calibration configuration. weight_quantizer (`Quantizer` or `None`): The weight quantizer. input_quantizer (`Quantizer` or `None`): The input quantizer. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type. """ super().__init__( tensor_type=TensorType.Weights, config=config, w_quantizer=weight_quantizer, x_quantizer=input_quantizer, y_quantizer=None, num_heads=num_heads, num_head_repeats=num_head_repeats, develop_dtype=develop_dtype, ) class SmoothAttentionCalibrator(SmoothCalibrator): """The smooth quantization calibrator for attention module.""" def __init__( self, config: SmoothCalibConfig, query_quantizer: Quantizer | None, key_quantizer: Quantizer | None, num_heads: int = 1, num_head_repeats: int = 1, with_rope: bool = True, develop_dtype: torch.dtype = torch.float32, ) -> None: """Initialize the calibrator. Args: config (`SmoothCalibConfig`): The quantization smoothing calibration configuration. query_quantizer (`Quantizer` or `None`): The query quantizer. key_quantizer (`Quantizer` or `None`): The key quantizer. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. with_rope (`bool`, *optional*, defaults to `True`): Whether rotary position embedding is used. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type. """ super().__init__( tensor_type=TensorType.Outputs, config=config, w_quantizer=None, x_quantizer=query_quantizer, y_quantizer=key_quantizer, num_heads=num_heads, num_head_repeats=num_head_repeats, with_rope=with_rope, develop_dtype=develop_dtype, ) def calibrate( self, q_proj_weight: nn.Parameter, k_proj_weight: nn.Parameter, queries: TensorsCache, keys: TensorsCache, query_module: nn.Module, key_module: nn.Module, eval_module: nn.Module | None = None, eval_inputs: TensorsCache | None = None, eval_kwargs: dict[str, tp.Any] | None = None, ) -> tp.Any: """Calibrate the quantization for attention. Args: q_proj_weight (`nn.Parameter`): The query projection weight. k_proj_weight (`nn.Parameter`): The key projection weight. queries (`TensorsCache`): The query activations. keys (`TensorsCache`): The key activations. query_module (`nn.Module`): The module that generates the query activations, e.g., either `q_proj` for pre-rope or `q_rotary_emb` for post-rope. key_module (`nn.Module`): The module that generates the key activations, e.g., either `k_proj` for pre-rope or `k_rotary_emb` for post-rope. eval_module (`nn.Module`, *optional*): The evaluation module. eval_inputs (`TensorsCache`, *optional*): The evaluation inputs. eval_kwargs (`dict[str, tp.Any]`, *optional*): The evaluation keyword arguments. Returns: tp.Any: The evaluation result. """ return super().calibrate( x_wgts=[q_proj_weight], y_wgts=[k_proj_weight], x_acts=queries, y_acts=keys, x_mods=[query_module], y_mods=[key_module], eval_module=eval_module, eval_inputs=eval_inputs, eval_kwargs=eval_kwargs, ) def smooth_upscale_param(param: nn.Parameter, scale: torch.Tensor, channels_dim: int = 1) -> None: """In-place smooth the parameter by upscaling. Args: param (`nn.Parameter`): The parameter to smooth. scale (`torch.Tensor`): The scale to upscale. channels_dim (`int`, *optional*, defaults to `1`): The dimension of channels. """ dtype = param.dtype view_shape = [1] * param.ndim view_shape[channels_dim] = -1 scale = scale.to(device=param.device).view(view_shape) param.data = param.data.to(dtype=scale.dtype).mul_(scale).to(dtype=dtype) assert not param.data.isnan().any(), "NaN found in param when smoothing" assert not param.data.isinf().any(), "Inf found in param when smoothing" def smooth_downscale_param(param: nn.Parameter, scale: torch.Tensor, channels_dim: int = 0) -> None: """In-place smooth the parameter by downscaling. Args: param (`nn.Parameter`): The parameter to smooth. scale (`torch.Tensor`): The scale to downscale. channels_dim (`int`, *optional*, defaults to `0`): The dimension of channels. """ dtype = param.dtype view_shape = [1] * param.ndim view_shape[channels_dim] = -1 scale = scale.to(device=param.device).view(view_shape) param_data = param.data.to(dtype=scale.dtype) param_data.narrow(channels_dim, 0, scale.numel()).div_(scale) param.data = param_data.to(dtype=dtype) assert not param.data.isnan().any(), "NaN found in param when smoothing" assert not param.data.isinf().any(), "Inf found in param when smoothing" def convert_smooth_upscale_to_downscale( scale: torch.Tensor, num_heads: int = 1, num_head_repeats: int = 1 ) -> torch.Tensor: """Convert the upscale smooth scale to downscale smooth scale. Args: scale (`torch.Tensor`): The upscale smooth scale. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. Returns: `torch.Tensor`: The downscale smooth scale. """ if num_heads > 1 and num_head_repeats > 1: head_channels = scale.numel() // num_heads num_unique_heads = num_heads // num_head_repeats return scale.view(num_unique_heads, num_head_repeats, head_channels)[:, 0, :].reshape(-1) else: return scale @torch.inference_mode() def smooth_linear_modules( prevs: nn.Module | tp.Sequence[nn.Module] | None, modules: tp.Sequence[nn.Linear] | nn.Linear, *, scale: torch.Tensor | None, config: SmoothCalibConfig | None = None, weight_quantizer: Quantizer | None = None, input_quantizer: Quantizer | None = None, weights: list[nn.Parameter] | None = None, inputs: TensorsCache | None = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module = None, eval_kwargs: dict[str, tp.Any] = None, num_heads: int = 1, num_head_repeats: int = 1, splits: list[int] | None = None, extra_modules: list[nn.Linear] | None = None, develop_dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Smooth two consecutive modules. Args: prevs (`nn.Module` or `list[nn.Module]`): The first module(s). modules (`nn.Linear` or `list[nn.Linear]`): The second module(s). scale (`torch.Tensor` or `None`, *optional*, defaults to `None`): The smooth quantization scale. config (`SmoothCalibConfig` or `None`, *optional*, defaults to `None`): The smooth quantization configuration. weight_quantizer (`Quantizer` or `None`, *optional*, defaults to `None`): The quantizer for weights. input_quantizer (`Quantizer` or `None`, *optional*, defaults to `None`): The quantizer for inputs. weights (`list[nn.Parameter]` or `None`, *optional*, defaults to `None`): The weights of the modules. If `None`, the weights of the modules will be used. inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the input activations. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs corresponding to the `eval_module`. eval_module (`nn.Module`, *optional*, defaults to `None`): The module to evaluate the quantization error. eval_kwargs (`dict[str, tp.Any]`, *optional*, defaults to `None`): The keyword arguments for evaluation. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. extra_modules (`list[nn.Module]` or `None`, *optional*, defaults to `None`): Extra modules to smooth. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type. Returns: `torch.Tensor`: The smooth quantization scale in CPU. """ if not isinstance(modules, (list, tuple)): modules = [modules] extra_modules = [] if extra_modules is None else extra_modules if scale is None: assert inputs is not None or eval_inputs is not None, "inputs or eval_inputs must be provided" scale = SmoothLinearCalibrator( config=config, weight_quantizer=weight_quantizer, input_quantizer=input_quantizer, num_heads=num_heads, num_head_repeats=num_head_repeats, develop_dtype=develop_dtype, ).calibrate( x_wgts=[module.weight for module in modules] if weights is None else weights, x_acts=inputs, x_mods=modules, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, splits=splits, ) gc.collect() torch.cuda.empty_cache() upscale = scale for module in modules + extra_modules: upscale = upscale.to(device=module.weight.device) smooth_upscale_param(module.weight, upscale, channels_dim=1) if prevs is not None: downscale = convert_smooth_upscale_to_downscale(upscale, num_heads=num_heads, num_head_repeats=num_head_repeats) if isinstance(prevs, nn.Module): prevs = [prevs] for module in prevs: if module is None: continue downscale = downscale.to(device=module.weight.device) smooth_downscale_param(module.weight, downscale, channels_dim=0) if hasattr(module, "bias") and module.bias is not None: smooth_downscale_param(module.bias, downscale, channels_dim=0) return scale.to(device="cpu") @torch.inference_mode() def smooth_attention( *, q_proj: nn.Linear, k_proj: nn.Linear, scale: torch.Tensor | None, config: SmoothCalibConfig | None = None, query_quantizer: Quantizer | None = None, key_quantizer: Quantizer | None = None, queries: TensorsCache | None = None, keys: TensorsCache | None = None, attn_q: nn.Module | None = None, attn_k: nn.Module | None = None, eval_inputs: TensorsCache | None = None, eval_module: nn.Module = None, eval_kwargs: dict[str, tp.Any] = None, num_heads: int = 1, num_head_repeats: int = 1, with_rope: bool = True, develop_dtype: torch.dtype = torch.float32, ) -> torch.Tensor: """Smooth attention. Args: q_proj (`nn.Linear`): The query projection module. k_proj (`nn.Linear`): The key projection module. scale (`torch.Tensor` or `None`, *optional*, defaults to `None`): The smooth quantization scale. config (`SmoothCalibConfig` or `None`, *optional*, defaults to `None`): The smooth quantization configuration. query_quantizer (`Quantizer` or `None`, *optional*, defaults to `None`): The quantizer for queries. key_quantizer (`Quantizer` or `None`, *optional*, defaults to `None`): The quantizer for keys. queries (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the queries. keys (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the keys. attn_q (`nn.Module` or `None`, *optional*, defaults to `None`): The module that generates the queries. attn_k (`nn.Module` or `None`, *optional*, defaults to `None`): The module that generates the keys. eval_inputs (`TensorsCache` or `None`, *optional*, defaults to `None`): The cache of the inputs corresponding to the evaluation module. eval_module (`nn.Module`, *optional*, defaults to `None`): The module to evaluate the quantization error. eval_kwargs (`dict[str, tp.Any]`, *optional*, defaults to `None`): The keyword arguments for evaluation. num_heads (`int`, *optional*, defaults to `1`): The number of heads. num_head_repeats (`int`, *optional*, defaults to `1`): The number of head repeats. with_rope (`bool`, *optional*, defaults to `True`): Whether quantization is applied after rotary position embedding. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The development data type. Returns: `torch.Tensor`: The smooth quantization scale in CPU. """ if scale is None: assert queries is not None and keys is not None and eval_inputs is not None assert attn_q is not None and attn_k is not None, "modules must be provided" scale = SmoothAttentionCalibrator( config=config, query_quantizer=query_quantizer, key_quantizer=key_quantizer, num_heads=num_heads, num_head_repeats=num_head_repeats, with_rope=with_rope, develop_dtype=develop_dtype, ).calibrate( q_proj_weight=q_proj.weight, k_proj_weight=k_proj.weight, queries=queries, keys=keys, query_module=attn_q, key_module=attn_k, eval_inputs=eval_inputs, eval_module=eval_module, eval_kwargs=eval_kwargs, ) gc.collect() torch.cuda.empty_cache() upscale = scale.to(device=q_proj.weight.device) smooth_upscale_param(q_proj.weight, upscale, channels_dim=0) downscale = convert_smooth_upscale_to_downscale(upscale, num_heads=num_heads, num_head_repeats=num_head_repeats) smooth_downscale_param(k_proj.weight, downscale, channels_dim=0) return scale.to(device="cpu") ================================================ FILE: deepcompressor/csrc/load.py ================================================ # -*- coding: utf-8 -*- """Deepcompressor Extension.""" import os from torch.utils.cpp_extension import load __all__ = ["_C"] dirpath = os.path.dirname(__file__) _C = load( name="deepcompressor_C", sources=[f"{dirpath}/pybind.cpp", f"{dirpath}/quantize/quantize.cu"], extra_cflags=["-g", "-O3", "-fopenmp", "-lgomp", "-std=c++20"], extra_cuda_cflags=[ "-O3", "-std=c++20", "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__", "-U__CUDA_NO_HALF2_OPERATORS__", "-U__CUDA_NO_HALF2_CONVERSIONS__", "-U__CUDA_NO_BFLOAT16_OPERATORS__", "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", "-U__CUDA_NO_BFLOAT162_OPERATORS__", "-U__CUDA_NO_BFLOAT162_CONVERSIONS__", "--expt-relaxed-constexpr", "--expt-extended-lambda", "--use_fast_math", "--ptxas-options=--allow-expensive-optimizations=true", "--threads=8", ], ) ================================================ FILE: deepcompressor/csrc/pybind.cpp ================================================ #include #include #include "quantize/quantize.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("round_to_nearest_in_codebook_cuda", &round_to_nearest_in_codebook_cuda, py::arg("tensor"), py::arg("codebook"), py::arg("inplace") = false, py::arg("bnb") = false, "RTN with codebook (CUDA)"); } ================================================ FILE: deepcompressor/csrc/quantize/quantize.cu ================================================ #include #include #include #include #include #include #include #include #include "quantize.h" // The following code is adapted from the bitsandbytes library: // https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/csrc/kernels.cu#L232 template __device__ __forceinline__ typename std::conditional::type bnb_nearest_neighbor(float_t x, float_t *codebook, const int C) { int mid = (C >> 1) - 1; int hi = C - 1; int lo = 0; float_t lval = codebook[lo]; float_t hval = codebook[hi]; float_t mval = codebook[mid]; for (int step = (C >> 2); step > 0; step >>= 1) { if (x > mval) { lo = mid; lval = mval; mid += step; } else { hi = mid; hval = mval; mid -= step; } mval = codebook[mid]; } if (x > mval) { if constexpr (ret_val) { return (x - mval > hval - x) ? hval : mval; } else { return (x - mval > hval - x) ? hi : mid; } } else { if constexpr (ret_val) { return (x - lval < mval - x) ? lval : mval; } else { return (x - lval < mval - x) ? lo : mid; } } } template __device__ __forceinline__ typename std::conditional::type nearest_neighbor(float_t x, const float_t *codebook, int C) { int lo = 0; int bit = 1 << (31 - __clz(C)); float_t lval = codebook[lo]; while (bit) { int next = lo | bit; float_t nval = codebook[next]; bool pred = next < C && nval <= x; lo = pred ? next : lo; lval = pred ? nval : lval; bit >>= 1; } int hi = lo + (lo < C - 1); float_t hval = codebook[hi]; if constexpr (ret_val) { return (x + x < lval + hval) ? lval : hval; } else { return (x + x < lval + hval) ? lo : hi; } } // CUDA kernel: Each thread processes one element from x and finds the nearest // codebook entry. The codebook (of size C < 256) is first loaded into shared // memory. template __global__ void round_to_nearest_in_codebook_kernel( const float_t *__restrict__ x, const float_t *__restrict__ codebook, float_t *__restrict__ y, const int N, const int C) { // Use a shared memory array for the codebook. __shared__ float_t s_codebook[256]; // Have the first few threads load the codebook into shared memory. for (int i = threadIdx.x; i < C; i += blockDim.x) { s_codebook[i] = codebook[i]; } __syncthreads(); // Global index for the element processed by this thread. int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < N) { if constexpr (bnb) { y[idx] = bnb_nearest_neighbor(x[idx], s_codebook, C); } else { y[idx] = nearest_neighbor(x[idx], s_codebook, C); } } } torch::Tensor round_to_nearest_in_codebook_cuda(torch::Tensor tensor, torch::Tensor codebook, bool inplace, bool bnb) { auto x = tensor.contiguous(); auto c = codebook.contiguous(); auto y = inplace ? x : torch::empty_like(tensor); const int N = x.numel(); const int C = c.numel(); const int threads = 256; const int blocks = (N + threads - 1) / threads; AT_DISPATCH_FLOATING_TYPES( tensor.scalar_type(), "round_to_nearest_in_codebook_cuda", [&] { if (bnb && (C & (C - 1)) == 0) { round_to_nearest_in_codebook_kernel <<>>(x.data_ptr(), c.data_ptr(), y.data_ptr(), N, C); } else { round_to_nearest_in_codebook_kernel <<>>(x.data_ptr(), c.data_ptr(), y.data_ptr(), N, C); } }); return y; } ================================================ FILE: deepcompressor/csrc/quantize/quantize.h ================================================ #pragma once #include torch::Tensor round_to_nearest_in_codebook_cuda(torch::Tensor tensor, torch::Tensor codebook, bool inplace = false, bool bnb = false); ================================================ FILE: deepcompressor/data/__init__.py ================================================ # -*- coding: utf-8 -*- from .dtype import QDType, QuantDataType from .range import DynamicRange, LogQuantRange, QuantRange, RangeBound from .scale import QuantScale from .tensor import QuantTensor ================================================ FILE: deepcompressor/data/cache.py ================================================ # -*- coding: utf-8 -*- """Activation cache module.""" import math import typing as tp from collections import OrderedDict from dataclasses import dataclass, field import torch from ..utils.common import tree_map from .utils.reshape import ReshapeFn __all__ = ["TensorCache", "TensorsCache", "IOTensorsCache"] @dataclass class ModuleForwardInput: """Module forward input.""" args: list[tp.Any] = field(default_factory=list) kwargs: dict[str, tp.Any] = field(default_factory=dict) def to(self, device: torch.device | str) -> "ModuleForwardInput": """Move input to device. Args: device (`torch.device` or `str`): Device. Returns: `ModuleForwardInput`: Module forward input. """ return ModuleForwardInput( args=tree_map(lambda x: x.to(device=device), self.args), kwargs=tree_map(lambda x: x.to(device=device), self.kwargs), ) def update(self, x: dict[str | int, tp.Any] | None = None) -> "ModuleForwardInput": """Return a new ModuleForwardInput with updated values. Args: x (`dict[str | int, tp.Any]` or `None`, *optional*, defaults to `None`): Values to update. Returns: `ModuleForwardInput`: Module forward input. """ args, kwargs = tree_map(lambda x: x, self.args), tree_map(lambda x: x, self.kwargs) if x is not None: for k, v in x.items(): if isinstance(k, int): args[k] = v else: kwargs[k] = v return ModuleForwardInput(args=args, kwargs=kwargs) @dataclass class TensorCache: """Tensor cache. Args: data (`list[torch.Tensor]`): Cached tensors. channels_dim (`int`, *optional*, defaults to `1`): Channels dimension. reshape (`ReshapeFn`, *optional*, defaults to `ReshapeFn()`): Function for reshaping inputs to 2-dimension used for GEMM. num_cached (`int`, *optional*, defaults to `0`): Number of cached tensors. num_total (`int`, *optional*, defaults to `0`): Number of total tensors. num_samples (`int`, *optional*, defaults to `0`): Number of samples. orig_device (`torch.device` or `str`, *optional*, defaults to `torch.device("cpu")`): Original device. """ data: list[torch.Tensor] = field(default_factory=list) channels_dim: int = 1 reshape: ReshapeFn = ReshapeFn() num_cached: int = 0 num_total: int = 0 num_samples: int = 0 orig_device: torch.device | str = torch.device("cpu") def clear(self): """Clear cached tensors.""" self.data.clear() self.num_cached = 0 def get_factory_kwargs(self, **kwargs) -> dict[str, tp.Any]: """Get factory kwargs.""" kwargs.setdefault("channels_dim", self.channels_dim) kwargs.setdefault("reshape", self.reshape) kwargs.setdefault("num_cached", self.num_cached) kwargs.setdefault("num_total", self.num_total) kwargs.setdefault("orig_device", self.orig_device) return kwargs def get_standardized_data(self, reshape: bool = False) -> list[torch.Tensor]: """Get standardized data, i.e., flatten dimensions before `channels_dim`. Args: reshape (`bool`, *optional*, defaults to `False`): Whether to apply reshape function. Returns: `list[torch.Tensor]`: Standardized data. """ if reshape: return [self.reshape(x.view(-1, *x.shape[self.channels_dim :])) for x in self.data] else: return [x.view(-1, *x.shape[self.channels_dim :]) for x in self.data] def repartition(self, max_batch_size: int, max_size: int, standardize: bool, reshape: bool) -> "TensorCache": """Relocate data based on the maximum batch size and size. Args: max_batch_size (`int`): Maximum batch size. max_size (`int`): Maximum size. standardize (`bool`): Whether to standardize data, i.e., flatten dimensions before `channels_dim`. reshape (`bool`): Whether to apply reshape function. Returns: `TensorCache`: Tensor cache. """ assert len(self.data) > 0, "No data to relocate." assert max_batch_size != 0, "max_batch_size must be non-zero." assert max_size != 0, "max_size must be non-zero." assert all(x.ndim == self.data[0].ndim for x in self.data), "All tensors must have the same #dims." assert all(x.shape == self.data[0].shape for x in self.data), "All tensors must have the same shape." data, dim, fn = self.data, self.channels_dim, self.reshape if standardize: data = [x.view(-1, *x.shape[dim:]) for x in self.data] dim = 1 if reshape: data = [fn(x) for x in data] dim = -1 fn = ReshapeFn() dim = dim % data[0].ndim orig_total = data[0].shape[0] * len(data) if max_batch_size > 0: batch_size = data[0].shape[0] if batch_size > max_batch_size: data = [ x[i * max_batch_size : (i + 1) * max_batch_size] for x in data for i in range(int(batch_size // max_batch_size)) ] batch_size = data[0].shape[0] if max_size > 0 and batch_size * len(data) > max_size: assert max_size >= batch_size, "max_size must be greater than or equal to batch_size." data = data[:: int(len(data) // (max_size // batch_size))] else: assert max_size < 0, "max_size must be negative if max_batch_size is negative." used_total = data[0].shape[0] * len(data) ratio = used_total / orig_total return TensorCache( data, channels_dim=dim, reshape=fn, orig_device=self.orig_device, num_cached=int(math.ceil(ratio * self.num_cached)), num_total=int(math.ceil(ratio * self.num_total)), num_samples=int(math.ceil(ratio * self.num_samples)), ) class TensorsCache: """Tensors cache.""" tensors: OrderedDict[str | int, TensorCache] def __init__(self, tensors: OrderedDict[str | int, TensorCache] | TensorCache) -> None: """Post initialization.""" self.tensors = OrderedDict({0: tensors}) if isinstance(tensors, TensorCache) else tensors @property def num_tensors(self) -> int: """Get the number of tensor caches.""" return len(self.tensors) def front(self) -> TensorCache: """Get the first tensor cache.""" return next(iter(self.tensors.values())) def items(self) -> tp.ItemsView[str | int, TensorCache]: """Iterate over tensor caches.""" return self.tensors.items() def keys(self) -> tp.KeysView[str | int]: """Get tensor cache keys.""" return self.tensors.keys() def values(self) -> tp.ValuesView[TensorCache]: """Get tensor caches.""" return self.tensors.values() def __getitem__(self, key: str | int) -> TensorCache: """Get tensor cache.""" return self.tensors[key] def __iter__(self) -> tp.Iterator[TensorCache]: """Iterate over tensor caches.""" return iter(self.tensors.values()) def __len__(self) -> int: """Get the number of tensor caches.""" return len(self.tensors) def clear(self): """Clear cached tensors.""" for tensor in self.tensors.values(): tensor.clear() def set_num_samples(self, num_samples: int): """Set the number of samples.""" for tensor in self.tensors.values(): tensor.num_samples = num_samples def extract(self, index: int, kwargs: dict[str, tp.Any]) -> ModuleForwardInput: """Extract data for binding to module forward. Args: index (`int`): Index. kwargs (`dict[str, tp.Any]`): Keyword arguments. Returns: `ModuleForwardInput`: Module forward input. """ _args, _kwargs = [], {} _kwargs.update(kwargs) for key, tensor in self.tensors.items(): if isinstance(key, int): assert len(_args) == key, f"Expected {key} args, but got {len(_args)}" _args.append(tensor.data[index].to(tensor.orig_device, non_blocking=True)) else: _kwargs[key] = tensor.data[index].to(tensor.orig_device, non_blocking=True) return ModuleForwardInput(args=_args, kwargs=_kwargs) class IOTensorsCache: """Input and output cache.""" inputs: TensorsCache | None outputs: TensorsCache | None def __init__( self, inputs: TensorCache | TensorsCache | None = None, outputs: TensorCache | TensorsCache | None = None ): self.inputs = TensorsCache(inputs) if isinstance(inputs, TensorCache) else inputs self.outputs = TensorsCache(outputs) if isinstance(outputs, TensorCache) else outputs def clear(self): """Clear cached tensors.""" if self.inputs is not None: self.inputs.clear() if self.outputs is not None: self.outputs.clear() def set_num_samples(self, num_samples: int): """Set the number of samples.""" if self.inputs is not None: self.inputs.set_num_samples(num_samples) if self.outputs is not None: self.outputs.set_num_samples(num_samples) ================================================ FILE: deepcompressor/data/codebook.py ================================================ # -*- coding: utf-8 -*- """Codebook for quantization.""" from dataclasses import dataclass import torch from deepcompressor.csrc.load import _C __all__ = ["Codebook"] @dataclass class Codebook: """A codebook for quantization. Attributes: size (`int`): Number of values in the codebook. bits (`int`): Number of bits for the binary code. values (`torch.FloatTensor`): A value book in ascending order. codes (`torch.ByteTensor`): A binary book containing the binary representation of the value. """ size: int bits: int values: torch.Tensor codes: torch.Tensor def __post_init__(self): assert self.size <= self.values.numel(), "Codebook size is larger than the values size" assert self.values.shape == self.codes.shape, "Values and Codes must have the same shape" def round(self, tensor: torch.Tensor) -> torch.Tensor: """Round the tensor to the nearest value in the codebook. Args: tensor (`torch.Tensor`): A tensor to round. Returns: `torch.Tensor`: A rounded tensor. """ dtype = tensor.dtype tensor = tensor.to(self.values.dtype).contiguous() return _C.round_to_nearest_in_codebook_cuda(tensor, self.values).to(dtype=dtype) def to(self, *, device: torch.device | None = None, dtype: torch.dtype | None = None) -> "Codebook": """Move the codebook to the specified device and dtype. Args: device (`torch.device`): Device to move the codebook. dtype (`torch.dtype`): Dtype to move the codebook. Returns: `Codebook`: A codebook. """ device = device if device is not None else self.values.device dtype = dtype if dtype is not None else self.values.dtype return Codebook( size=self.size, bits=self.bits, values=self.values.to(device=device, dtype=dtype), codes=self.codes.to(device=device), ) @staticmethod def construct( maps: list[tuple[float, int]], *, bits: int, device: torch.device | str = "cpu", dtype: torch.dtype = torch.float32, ) -> "Codebook": """Create a map of values to a code of `code_bits` bits. Args: maps (`list[tuple[float, int]]`): A list of tuples of (value, binary code). bits (`int`): Number of bits for the binary code. device (`torch.device` or str, *optional*, defaults to `"cpu"`): Device to put the codebook and binarybook on. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): Dtype of the codebook. Returns: `Codebook`: A codebook. """ if bits > 8: raise NotImplementedError("Codebook with more than 8 bits is not supported") assert len(maps) <= 2**bits, "Too many (value, code) maps for the code bits" size = len(maps) maps.sort(key=lambda x: x[0]) values = torch.tensor([v[0] for v in maps], device=device, dtype=dtype) codes = torch.tensor( [v[1] for v in maps], dtype=torch.uint8 if bits <= 8 else (torch.int16 if bits < 16 else torch.int32), device=device, ) return Codebook(size=size, bits=bits, values=values, codes=codes) @staticmethod def build_for_float_point( *, total_bits: int, exponent_bits: int, signed: bool = True, has_subnormal: bool = True, has_inf: bool = False, has_nan: bool = False, device: torch.device | str = "cpu", dtype: torch.dtype = torch.float32, ) -> "Codebook": """Create a map of floating point values to a code of `code_bits` bits. Args: total_bits (`int`): Number of bits for the floating point value. exponent_bits (`int`): Number of bits for the exponent. signed (`bool`, *optional*, defaults to `True`): Whether to use signed code. has_inf (`bool`, *optional*, defaults to `False`): Whether to include infinity. has_nan (`bool`, *optional*, defaults to `False`): Whether to include NaN. device (`torch.device` or str, *optional*, defaults to `"cpu"`): Device to put the codebook on. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): Dtype of the codebook. Returns: `list[Codebook]`: A list of codebooks. """ mantissa_bits = total_bits - exponent_bits - int(signed) assert exponent_bits > 0, "Exponent bits must be positive" assert mantissa_bits >= 0, "Mantissa bits must be non-negative" has_nan = has_inf or has_nan sign_mask = 1 << (total_bits - 1) if mantissa_bits > 0: end_evalue = 2**exponent_bits - int(has_inf) else: end_evalue = 2**exponent_bits - int(has_nan) end_mvalue = 2**mantissa_bits bias = 2 ** (exponent_bits - 1) - 1 maps, code = [], 0 for evalue in range(end_evalue): for mvalue in range(end_mvalue): if evalue == 0 and has_subnormal: value = (mvalue / end_mvalue) * (2 ** (1 - bias)) else: value = (1 + mvalue / end_mvalue) * (2 ** (evalue - bias)) maps.append((value, code)) if signed: maps.append((-value, code | sign_mask)) code += 1 if mantissa_bits > 0 and not has_inf and has_nan: maps = maps[: -(1 + int(signed))] return Codebook.construct(maps, bits=total_bits, device=device, dtype=dtype) @staticmethod def build_for_integer( *, total_bits: int, signed: bool = True, magnitude: bool = False, device: torch.device | str = "cpu", dtype: torch.dtype = torch.float32, ) -> "Codebook": """Create a map of integer values to a code of `code_bits` bits. Args: total_bits (`int`): Number of bits for the integer value. signed (`bool`, *optional*, defaults to `True`): Whether to use signed code. magnitude (`bool`, *optional*, defaults to `False`): Whether to use magnitude-based integer. device (`torch.device` or `str`, *optional*, defaults to `"cpu"`): Device to put the codebook on. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): Dtype of the codebook. Returns: `list[Codebook]`: A list of codebooks. """ if signed: end_value = 2 ** (total_bits - 1) min_value = -end_value + int(magnitude) else: end_value = 2**total_bits min_value = 0 maps = [] for value in range(min_value, end_value): if value >= 0: code = value elif magnitude: code = end_value - value else: code = end_value + end_value + value maps.append((value, code)) return Codebook.construct(maps, bits=total_bits, device=device, dtype=dtype) ================================================ FILE: deepcompressor/data/common.py ================================================ # -*- coding: utf-8 -*- """Common uantization data.""" import enum __all__ = ["TensorType"] class TensorType(enum.Enum): """The tensor type.""" Weights = enum.auto() Inputs = enum.auto() Outputs = enum.auto() ================================================ FILE: deepcompressor/data/dtype.py ================================================ # -*- coding: utf-8 -*- """Quantization data type.""" import typing as tp import torch from .codebook import Codebook __all__ = ["QuantDataType", "QDType"] class QuantDataType: """Quantization data type.""" _registered: tp.ClassVar[dict[str, "QuantDataType"]] = {} def __init__( self, total_bits: int, *, signed: bool = True, exponent_bits: int = 0, has_subnormal: bool = True, has_nan: bool = False, has_inf: bool = False, magnitude: bool = False, codebook: Codebook | None = None, codebook_name: str = "", ): """Initialize the quantization data type. Args: total_bits (`int`): Total number of bits. Must be greater than 0. signed (`bool`, *optional*, defaults to `True`): Whether the data type is signed. exponent_bits (`int`, *optional*, defaults to `0`): Number of bits for the exponent. has_subnormal (`bool`, *optional*, defaults to `True`): Whether the data type has subnormal. has_nan (`bool`, *optional*, defaults to `False`): Whether the data type has NaN if it is float-point. has_inf (`bool`, *optional*, defaults to `False`): Whether the data type has Inf if it is float-point. magnitude (`bool`, *optional*, defaults to `False`): Whether the data type is magnitude-based if it is integer. codebook (`Codebook` or `None`, *optional*, defaults to `None`): Codebook for the data type. codebook_name (`str`, *optional*, defaults to `""`): Name of the codebook. Must be specified if `codebook` is not `None`. """ self.__signed = signed # region set bit widths self.__total_bits = total_bits self.__exponent_bits = exponent_bits assert self.__total_bits > 0, "Total bits must be greater than 0." assert self.__exponent_bits >= 0, "Exponent bits must be non-negative." self.__mantissa_bits = self.__total_bits - self.__exponent_bits - int(self.__signed) # endregion # region set data type properties if self.__exponent_bits > 0: # for floating-point data type self.__has_subnormal = has_subnormal self.__has_inf = has_inf self.__has_nan = has_inf or has_nan self.__magnitude = True if self.__mantissa_bits == 0: assert not self.__has_inf, "Inf is not supported for exponent-only floating-point data type." if self.__exponent_bits == 1: assert not self.__has_nan, "NaN is not supported for 1-bit exponent-only floating-point data type." else: # for integer data type self.__has_subnormal = False self.__has_inf = False self.__has_nan = False self.__magnitude = magnitude # endregion # region set codebook if codebook is not None: assert self.is_float_point, "Codebook is only supported for floating-point data type." self.__codebook = codebook assert codebook_name, "Codebook name must be specified." self.__codebook_name = codebook_name assert self.max_value >= 0, "Max value must be non-negative." self.__name = self.__codebook_name if self.__name not in QuantDataType._registered: QuantDataType._registered[self.__name] = self else: _registered = QuantDataType._registered[self.__name] assert _registered.total_bits == self.total_bits, "Total bits must be the same as the registered one." assert _registered.exponent_bits == self.exponent_bits, ( "Exponent bits must be the same as the registered one." ) assert _registered.signed == self.signed, "Signed must be the same as the registered one." assert _registered.has_subnormal == self.has_subnormal, ( "Subnormal must be the same as the registered one." ) assert _registered.has_inf == self.has_inf, "Inf must be the same as the registered one." assert _registered.has_nan == self.has_nan, "NaN must be the same as the registered one." assert _registered.magnitude == self.magnitude, "Magnitude must be the same as the registered one." assert _registered.__codebook is not None, "Codebook must be the same as the registered one." assert torch.allclose(_registered.__codebook.values, self.__codebook.values), ( "Codebook values must be the same as the registered one." ) else: self.__codebook = None self.__codebook_name = "" self.__name = self._build_default_name() if self.__name not in QuantDataType._registered: QuantDataType._registered[self.__name] = self # endregion # region set codebooks self.__codebooks: dict[tuple[torch.device, torch.dtype], Codebook] = {} # endregion # region properties @property def name(self) -> str: """Name of the data type.""" return self.__name @property def codebook_name(self) -> str: """Name of the codebook.""" return self.__codebook_name @property def signed(self) -> bool: """Whether the data type is signed.""" return self.__signed @property def unsigned(self) -> bool: """Whether the data type is unsigned.""" return not self.__signed @property def total_bits(self) -> int: """Total number of bits.""" return self.__total_bits @property def exponent_bits(self) -> int: """Number of bits for the exponent.""" return self.__exponent_bits @property def mantissa_bits(self) -> int: """Number of bits for the mantissa.""" return self.__mantissa_bits @property def has_subnormal(self) -> bool: """Whether the data type has subnormal.""" return self.__has_subnormal @property def has_inf(self) -> bool: """Whether the data type has Inf.""" return self.__has_inf @property def has_nan(self) -> bool: """Whether the data type has NaN.""" return self.__has_nan @property def magnitude(self) -> bool: """Whether the data type is magnitude-based.""" return self.__magnitude @property def is_float_point(self) -> bool: """Whether the data type is floating-point.""" return self.exponent_bits > 0 @property def is_integer(self) -> bool: """Whether the data type is integer.""" return self.exponent_bits == 0 @property def is_exponent(self) -> bool: """Whether the data type is exponent-only floating-point.""" return self.exponent_bits > 0 and self.mantissa_bits == 0 and not self.has_subnormal @property def exponent_mask(self) -> int: """Bit mask for the exponent.""" return ((1 << self.exponent_bits) - 1) << self.mantissa_bits @property def mantissa_mask(self) -> int: """Bit mask for the mantissa.""" return (1 << self.mantissa_bits) - 1 @property def _end_mantissa(self) -> int: return 2**self.mantissa_bits @property def _end_exponent(self) -> int: if self.mantissa_bits > 0: return 2**self.exponent_bits - int(self.has_inf) else: return 2**self.exponent_bits - int(self.has_nan) @property def exponent_bias(self) -> int: """Exponent bias.""" if self.is_float_point: return 2 ** (self.exponent_bits - 1) - 1 else: return 0 @property def max_exponent_value(self) -> int: """Maximum exponent value.""" if self.is_float_point: return self._end_exponent - 1 - self.exponent_bias else: return self.total_bits - 1 - int(self.signed) @property def min_exponent_value(self) -> int: """Minimum exponent value.""" if self.is_float_point: return int(self.has_subnormal) - self.exponent_bias else: return 0 @property def max_positive_normal_value(self) -> float: """Maximum positive normal value.""" if self.is_float_point: if self.mantissa_bits > 0 and not self.has_inf and self.has_nan: base_value = 2 - 2 / self._end_mantissa else: base_value = 2 - 1 / self._end_mantissa return base_value * 2**self.max_exponent_value else: return self._end_mantissa - 1 @property def min_positive_normal_value(self) -> float: """Minimum positive normal value.""" return 2**self.min_exponent_value @property def max_positive_subnormal(self) -> float: """Maximum positive subnormal value.""" if self.is_float_point and self.has_subnormal and self.mantissa_bits > 0: b = 1 - 1 / self._end_mantissa e = 1 - self.exponent_bias return b * 2**e else: return 0 @property def min_positive_subnormal(self) -> float: """Minimum non-negative subnormal value.""" if self.is_float_point and self.has_subnormal and self.mantissa_bits > 0: b = 1 / self._end_mantissa e = 1 - self.exponent_bias return b * 2**e else: return 0 @property def max_value(self) -> float: """Maximum value.""" return self.max_positive_normal_value if self.__codebook is None else self.__codebook.values[-1].item() @property def min_value(self) -> float: """Minimum value.""" if self.__codebook is not None: return self.__codebook.values[0].item() if self.signed: if self.magnitude: return -self.max_value else: return -self.max_value - 1 else: return 0 # endregion def to_unsigned(self) -> "QuantDataType": """Get an unsigned version of the data type. Returns: `QuantDataType`: The unsigned version of the data type. """ return QuantDataType.from_str("u" + self.name[1:]) def get_codebook(self, *, device: torch.device | str = "cpu", dtype: torch.dtype = torch.float32) -> Codebook: """Get a get_codebook of `code_bits` bits for the quantization. Args: device (`torch.device` or `str`, *optional*, defaults to `"cpu"`): Device to create the codebook on. dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): Data type to create the codebook with. Returns: `Codebook`: Codebook with the specified device and dtype. """ device = torch.device("cpu") if device is None else torch.device(device) key = (device, dtype) if key not in self.__codebooks: if self.__codebook is not None: self.__codebooks[key] = self.__codebook.to(device=device, dtype=dtype) else: self.__codebook = self._build_codebook(device=device, dtype=dtype) self.__codebooks[key] = self.__codebook return self.__codebooks[key] def round(self, tensor: torch.Tensor) -> torch.Tensor: """Round the tensor to the nearest quantized value. Args: tensor (`torch.Tensor`): Tensor to round. Returns: `torch.Tensor`: Rounded tensor. """ if self.is_integer: return tensor.round() else: return self.get_codebook(device=tensor.device).round(tensor) @classmethod def from_str(cls, s: str, /) -> "QuantDataType": """Create a QuantDataType from a string.""" if s not in cls._registered: cls._registered[s] = cls._default_from_str(s) return cls._registered[s] def _build_codebook(self, *, device: torch.device | str = "cpu", dtype: torch.dtype = torch.float32) -> Codebook: if self.is_float_point: return Codebook.build_for_float_point( total_bits=self.total_bits, exponent_bits=self.exponent_bits, signed=self.signed, has_subnormal=self.has_subnormal, has_inf=self.has_inf, has_nan=self.has_nan, device=device, dtype=dtype, ) else: return Codebook.build_for_integer( total_bits=self.total_bits, signed=self.signed, magnitude=self.magnitude, device=device, dtype=dtype ) def _build_default_name(self) -> str: s = "s" if self.signed else "u" if self.is_float_point: if self.has_subnormal or self.mantissa_bits > 0: s += "fp" if self.has_subnormal else "fn" s += f"{self.total_bits}_e{self.exponent_bits}m{self.mantissa_bits}" s += "_inf" if self.has_inf else ("_nan" if self.has_nan else "_all") else: assert not self.has_subnormal, "Subnormal is not supported for exponent-only floating-point data type." assert not self.has_inf, "Inf is not supported for exponent-only floating-point data type." s += f"exp{self.exponent_bits}" s += "_nan" if self.has_nan else "_all" else: s += "mag" if self.magnitude else "int" s += f"{self.total_bits}" return s @staticmethod def _default_from_str(s: str, /) -> "QuantDataType": s = s.strip().lower() signed = s[0] == "s" s = s[1:] if s.startswith("int"): return QuantDataType(int(s[3:]), signed=signed) elif s.startswith("mag"): return QuantDataType(int(s[3:]), signed=signed, magnitude=True) elif s.startswith("exp"): ss = s.split("_") total_bits = int(ss[0][3:]) if len(ss) >= 2: has_nan = ss[1] == "nan" else: has_nan = False return QuantDataType( total_bits=total_bits, signed=signed, exponent_bits=total_bits - int(signed), has_subnormal=False, has_nan=has_nan, ) elif s.startswith("f"): ss = s.split("_") has_subnormal = s[1] == "p" total_bits = int(ss[0][2:]) exponent_bits = int(ss[1][1 : ss[1].find("m")]) if len(ss) >= 3: has_inf = ss[2] == "inf" has_nan = has_inf or (ss[2] == "nan") else: has_inf, has_nan = False, False return QuantDataType( total_bits=total_bits, signed=signed, exponent_bits=exponent_bits, has_subnormal=has_subnormal, has_inf=has_inf, has_nan=has_nan, ) else: raise ValueError(f"Unknown QuantDataType {s}") def __str__(self) -> str: return self.__name def __repr__(self) -> str: return self.__name def __eq__(self, value: object) -> bool: if not isinstance(value, QuantDataType): return False return self.name == value.name def __hash__(self) -> int: return hash(self.name) class _QDTypeMeta(type): def __getattr__(cls, __name: str) -> tp.Any: if __name.startswith("_"): return getattr(super(), __name) else: return QuantDataType.from_str(__name) class QDType(metaclass=_QDTypeMeta): """QuantDataType class for easy access to QuantDataType by name.""" pass ================================================ FILE: deepcompressor/data/range.py ================================================ # -*- coding: utf-8 -*- """Dynamic range calculation for quantization.""" import math import typing as tp from dataclasses import dataclass import torch from .dtype import QuantDataType from .zero import ZeroPointDomain __all__ = ["RangeBound", "QuantRange", "LogQuantRange", "ProtectiveQuantRange", "DynamicRange"] @dataclass class RangeBound: """Range bound data class.""" min: float | None = None max: float | None = None def is_set(self) -> bool: """Return whether the range bound is set.""" return self.min is not None or self.max is not None def to_dict(self) -> dict[str, tp.Any]: """Return the dictionary representation of the range bound.""" return {"min": self.min, "max": self.max} @classmethod def from_dict(cls, data: dict[str, tp.Any] | None) -> tp.Optional[tp.Self]: """Return the range bound from the given dictionary.""" return cls(min=data["min"], max=data["max"]) if data is not None else None class QuantRange(RangeBound): """Quantization range data class.""" def log2(self) -> "LogQuantRange": """Return the log-scale of the current quantization range.""" log2_abs_min = int(math.log2(min(abs(self.min or 0), abs(self.max or 0)))) return LogQuantRange( min=None, max=None if self.max is None else log2_abs_min, ) def intersect(self, quant_dtype: QuantDataType, *, has_zero_point: bool) -> "QuantRange": """Return the intersection of the current quantization range and the given data type. Args: quant_dtype (`QuantDataType`): The quantization data type. has_zero_point (`bool`): Whether the quantization range has zero-point. Returns: `QuantRange`: The intersection of the current quantization range and the given data type. """ max_value = quant_dtype.max_value if self.max is None else min(self.max, quant_dtype.max_value) min_value = quant_dtype.min_value if self.min is None else max(self.min, quant_dtype.min_value) if quant_dtype.signed and not has_zero_point: max_value = min(abs(min_value), abs(max_value)) min_value = -max_value return QuantRange(min=min_value, max=max_value) def intersect_log2(self, quant_dtype: QuantDataType) -> "LogQuantRange": """Return the intersection of the current quantization range and the given data type in log2 space. Args: quant_dtype (`QuantDataType`): The quantization data type. Returns: `LogQuantRange`: The intersection of the current quantization range and the given data type in log2 space. """ return self.log2().intersect_log2(quant_dtype) @staticmethod def construct( dtype: QuantDataType, *, has_zero_point: bool, quant_range: tp.Optional["QuantRange"] = None ) -> "QuantRange": """Return the intersection of the given quantization range and the given data type. Args: dtype (`QuantDataType`): The quantization data type. has_zero_point (`bool`): Whether the quantization range has zero-point. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The extra quantization range. Returns: `QuantRange`: The intersection of the given quantization range and the given data type. """ return (quant_range or QuantRange()).intersect(dtype, has_zero_point=has_zero_point) class LogQuantRange(QuantRange): """Log-scale quantization range data class.""" def log2(self) -> "LogQuantRange": """Return the log-scale of the quantization range.""" return self def intersect(self, quant_dtype: QuantDataType, *, has_zero_point: bool) -> "QuantRange": """Return the intersection of the current quantization range and the given data type. Args: quant_dtype (`QuantDataType`): The quantization data type. has_zero_point (`bool`): Whether the quantization range has zero-point. Returns: `QuantRange`: The intersection of the current quantization range and the given data type. """ raise NotImplementedError("LogQuantRange does not support intersect method") def intersect_log2(self, quant_dtype: QuantDataType) -> "LogQuantRange": """Return the intersection of the current quantization range and the given data type in log2 space. Args: quant_dtype (`QuantDataType`): The quantization data type. Returns: `LogQuantRange`: The intersection of the current quantization range and the given data type in log2 space. """ max_value = ( quant_dtype.max_exponent_value if self.max is None else min(self.max, quant_dtype.max_exponent_value) ) min_value = ( quant_dtype.min_exponent_value if self.min is None else max(self.min, quant_dtype.min_exponent_value) ) return LogQuantRange(min=min_value, max=max_value) @staticmethod def construct( dtype: QuantDataType, quant_range: tp.Optional[tp.Union["LogQuantRange", QuantRange]] = None ) -> "LogQuantRange": """Return the intersection of the given quantization range and the given data type in log2 space. Args: dtype (`QuantDataType`): The quantization data type. quant_range (`LogQuantRange` or `QuantRange` or `None`, *optional*, defaults to `None`): The extra quantization range. Returns: `LogQuantRange`: The intersection of the given quantization range and the given data type in log2 space. """ return (quant_range or LogQuantRange()).intersect_log2(dtype) class ProtectiveQuantRange(QuantRange): _instances: tp.ClassVar[ dict[tuple[QuantDataType, QuantDataType, tuple[float, float], ZeroPointDomain], "ProtectiveQuantRange"] ] = {} @staticmethod def construct( outer_dtype: QuantDataType, inner_dtype: QuantDataType, zero_domain: ZeroPointDomain | None, inner_quant_range: QuantRange | None = None, ) -> QuantRange: """Return the protective quantization range. Args: outer_dtype (`QuantDataType`): The data type of the outer level in the quantization hierarchy. inner_dtype (`QuantDataType`): The data type of the inner level in the quantization hierarchy. zero_domain (`ZeroPointDomain` or `None`): The zero-point domain. inner_quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The inner quantization range. Returns: `QuantRange`: The protective quantization range. """ assert outer_dtype.is_integer, "outer_dtype must be integer data type" assert inner_dtype.is_integer, "inner_dtype must be integer data type" assert zero_domain is not None or outer_dtype.signed == inner_dtype.signed if zero_domain is None: return QuantRange.construct(outer_dtype, has_zero_point=False) inner_quant_range = QuantRange.construct(inner_dtype, has_zero_point=True, quant_range=inner_quant_range) qmax, qmin = int(inner_quant_range.max), int(inner_quant_range.min) # type: ignore key = (outer_dtype, inner_dtype, (qmin, qmax), zero_domain) if key not in ProtectiveQuantRange._instances: outer_quant_range = QuantRange.construct(outer_dtype, has_zero_point=False) vrmax, vrmin = int(outer_quant_range.max), int(outer_quant_range.min) # type: ignore qrmax, qrmin = int(inner_dtype.max_value), int(inner_dtype.min_value) vranges: set[tuple[int, int]] = set() for vmax in range(0, vrmax + 1): for vmin in range(vrmin, vmax + 1): s = round((vmax - vmin) / (qmax - qmin)) assert s >= 0, "s must be non-negative" s = 1 if s == 0 else s s = min(s, vrmax) if zero_domain == ZeroPointDomain.PreScale: z = max(min(round(qmin - vmin / s), qrmax), qrmin) m = (max(min(round(vmax / s + z), qmax), qmin) - z) * s n = (max(min(round(vmin / s + z), qmax), qmin) - z) * s elif zero_domain == ZeroPointDomain.PostScale: z = max(min(round(qmin * s - vmin), vrmax), vrmin) m = max(min(round((vmax + z) / s), qmax), qmin) * s - z n = max(min(round((vmin + z) / s), qmax), qmin) * s - z else: raise ValueError(f"unsupported zero-point domain {zero_domain}") if vrmin <= m <= vrmax and vrmin <= n <= vrmax: vranges.add((vmin, vmax)) found_pmax = None for pmax in range(vrmax, 0, -1): pmin = -pmax valid = True for vmax in range(0, pmax + 1): for vmin in range(pmin, vmax + 1): if (vmin, vmax) not in vranges: valid = False break if not valid: break if valid: found_pmax = pmax break assert found_pmax is not None, "failed to find the protective quantization range" ProtectiveQuantRange._instances[key] = ProtectiveQuantRange(min=-found_pmax, max=found_pmax) return ProtectiveQuantRange._instances[key] @dataclass class DynamicRange: """Dynamic range data class.""" min: torch.Tensor | None = None max: torch.Tensor | None = None ratio: float | torch.Tensor | None = None def __post_init__(self) -> None: if self.max is None: assert self.min is None, "min must be None if max is None" def is_set(self) -> bool: """Return whether the dynamic range is set.""" return self.min is not None or self.max is not None or self.ratio is not None def intersect(self, range_bound: RangeBound | None) -> "DynamicRange": """Return the intersection of the current dynamic range and the given range bound. Args: range_bound (`RangeBound` or `None`): The range bound. Returns: `DynamicRange`: The intersection of the current dynamic range and the given range bound. """ assert self.max is not None, "max must be specified" vmax, vmin = self.max, self.min if range_bound is not None: if range_bound.max is not None: vmax = vmax.clamp(max=range_bound.max) if vmin is not None and range_bound.min is not None: vmin = vmin.clamp(min=range_bound.min) return DynamicRange(min=vmin, max=vmax) def measure( # noqa: C901 self, tensors: torch.Tensor | list[torch.Tensor], /, *, zero_domain: ZeroPointDomain | None, is_float_point: bool, ) -> "DynamicRange": """Return a dynamic range of the given tensor. Args: tensors (`torch.Tensor` or `list[torch.Tensor]`): The tensor in the shape of (#g0, gs0, #g1, gs1, ..., #gn, gsn). zero_domain (`ZeroPointDomain` or `None`): The zero-point domain. is_float_point (`bool`): Whether the data type is floating-point. Returns: `DynamicRange`: The dynamic range. If the max value is already specified, return the current object. """ if isinstance(tensors, torch.Tensor): tensors = [tensors] if self.ratio is None and self.max is not None: # static range tensor = tensors[0] shape = torch.Size([s if i % 2 == 0 else 1 for i, s in enumerate(tensor.shape)]) vmax = self._format_m_(self.max, shape=shape, dtype=tensor.dtype, device=tensor.device) vmin = self._format_m_(self.min, shape=shape, dtype=tensor.dtype, device=tensor.device) else: if self.max is None: assert self.min is None, "min must be None if max is None" reduced = list(range(1, tensors[0].ndim, 2)) # region step 1: determine the value range (i.e., vmax and vmin) if zero_domain is None: vmin = None vmax = tensors[0].abs().amax(dim=reduced, keepdim=True) for tensor in tensors[1:]: vmax = torch.maximum(vmax, tensor.abs().amax(dim=reduced, keepdim=True).to(vmax.device)) else: vmax = tensors[0].amax(dim=reduced, keepdim=True) for tensor in tensors[1:]: vmax = torch.maximum(vmax, tensor.amax(dim=reduced, keepdim=True).to(vmax.device)) vmin = tensors[0].amin(dim=reduced, keepdim=True) for tensor in tensors[1:]: vmin = torch.minimum(vmin, tensor.amin(dim=reduced, keepdim=True).to(vmin.device)) if is_float_point: # ! we adapt the zero-point to be the mean of the data vavg = tensors[0].mean(dim=reduced, keepdim=True) if len(tensors) > 1: for tensor in tensors[1:]: vavg = vavg + tensor.mean(dim=reduced, keepdim=True).to(vavg.device) vavg = vavg / len(tensors) # endregion # region step 2: scale the value range by self.ratio if zero_domain is None: if self.ratio is not None: vmax = vmax * self.ratio else: assert vmin is not None, "vmin must be specified" if is_float_point: vmag = torch.maximum(vmax - vavg, vavg - vmin) if self.ratio is not None: vmag = vmag * self.ratio vmax = vavg + vmag vmin = vavg - vmag else: if self.ratio is not None: vmin = vmin * self.ratio vmax = vmax * self.ratio if zero_domain == ZeroPointDomain.PreScale: vmax = vmax.clamp(min=0) vmin = vmin.clamp(max=0) # endregion # region step 3: clamp the value range by (self.min, self.max) if self.max is not None: vmax = vmax.clamp(max=self.max.to(vmax.device)) if vmin is not None and self.min is not None: vmin = vmin.clamp(min=self.min.to(vmin.device)) # endregion return DynamicRange(min=vmin, max=vmax) def scale( self, ratio: float | torch.Tensor, zero_domain: ZeroPointDomain | None, is_float_point: bool ) -> "DynamicRange": """Return new dynamic range by scaling the current range. Args: ratio (`float` or `torch.Tensor`): The scaling ratio. zero_domain (`ZeroPointDomain` or `None`): The zero-point domain. is_float_point (`bool`): Whether the data type is floating-point. Returns: `DynamicRange`: The new dynamic range. """ assert ratio is not None, "ratio must be specified" if zero_domain is None: assert self.max is not None, "self.max must be specified" assert self.min is None, "self.min must be None for data type without zero-point" max_value = self.max * ratio min_value = None else: assert self.min is not None, "self.min must be specified" assert self.max is not None, "self.max must be specified" if is_float_point: centroid_value = (self.min + self.max) / 2 vmag = (self.max - centroid_value) * ratio max_value = centroid_value + vmag min_value = centroid_value - vmag else: min_value = self.min * ratio max_value = self.max * ratio if zero_domain == ZeroPointDomain.PreScale: max_value = max_value.clamp(min=0) min_value = min_value.clamp(max=0) return DynamicRange(min=min_value, max=max_value) @staticmethod def construct( tensors: torch.Tensor | list[torch.Tensor], /, *, zero_domain: ZeroPointDomain | None, is_float_point: bool, ) -> "DynamicRange": return DynamicRange().measure(tensors, zero_domain=zero_domain, is_float_point=is_float_point) @staticmethod def _format_m_( value: torch.Tensor | float | None, *, shape: torch.Size, dtype: torch.dtype, device: torch.device, ) -> torch.Tensor | None: if value is None: return None elif isinstance(value, torch.Tensor): if value.numel() == 1: return value.view(-1).to(dtype=dtype, device=device).expand(shape) elif value.numel() == shape.numel(): return value.view(shape).to(dtype=dtype, device=device) elif value.shape[1:] == shape[1:] and value.shape[0] == 1: return value.to(dtype=dtype, device=device).expand(shape) else: raise ValueError(f"Invalid value shape: {value.shape}") else: return torch.full(shape, value, dtype=dtype, device=device) def to_dict(self) -> dict[str, tp.Any]: """Return the dictionary representation of the dynamic range.""" return {"min": self.min, "max": self.max, "ratio": self.ratio} @classmethod def from_dict(cls, data: dict[str, tp.Any] | None) -> tp.Optional[tp.Self]: """Return the dynamic range from the given dictionary.""" return cls(min=data["min"], max=data["max"], ratio=data["ratio"]) if data is not None else None ================================================ FILE: deepcompressor/data/scale.py ================================================ # -*- coding: utf-8 -*- """Quantization scale module.""" import typing as tp import torch __all__ = ["QuantScale"] class QuantScale: data: torch.Tensor _children: list["QuantScale"] _leaves: list[torch.Tensor] def __init__(self): self.data, self._children, self._leaves = None, [], [] # type: ignore @property def num_children(self) -> int: """Get the number of children.""" return len(self._children) @property def num_leaves(self) -> int: """Get the number of leaves.""" return len(self._leaves) def is_quantized(self) -> bool: """Check if the scale is quantized.""" return self.data is not None and bool(self._leaves or all(child.is_quantized() for child in self._children)) def get_child(self, index: int) -> "QuantScale": """Get a child scale.""" return self._children[index] def append(self, scale: tp.Union[torch.Tensor, "QuantScale"]) -> "QuantScale": """Append a scale.""" if isinstance(scale, torch.Tensor): assert not self._children, "Cannot append a tensor scale to a non-leaf QuantScale." self.data = _join_scale_tensor(self.data, scale) self._leaves.append(scale) elif isinstance(scale, QuantScale): assert not self._leaves, "Cannot append a non-leaf QuantScale to a leaf QuantScale." self.data = _join_scale_tensor(self.data, scale.data) self._children.append(scale) else: raise TypeError(f"Unsupported scale type: {type(scale)}") return self def extend(self, scale: "QuantScale") -> "QuantScale": """Extend with another QuantScale.""" self.data = _join_scale_tensor(self.data, scale.data) if scale._children: assert not self._leaves, "Cannot extend a leaf QuantScale with a non-leaf QuantScale." self._children.extend(scale._children) elif scale._leaves: assert not scale._children, "Cannot extend a non-leaf QuantScale with a leaf QuantScale." self._leaves.extend(scale._leaves) return self def join(self, scale: "QuantScale") -> "QuantScale": """Return a new QuantScale by joining with another QuantScale.""" return QuantScale().append(self).append(scale) def remove_zero(self) -> "QuantScale": """Remove zero scales.""" self.data[self.data == 0] = 1 return self def state_dict( self, param_name: str, device: torch.device | str = "cpu", flatten: bool = True, level_base: int = 0, ) -> dict[str, torch.Tensor]: """Get the state dictionary.""" if self._children: state_dict = {} for i, child in enumerate(self._children): child_param_name = param_name if flatten else f"{param_name}.{i}" child_level_base = len(state_dict) if flatten else 0 child_state_dict = child.state_dict(child_param_name, device, flatten, child_level_base) state_dict.update(child_state_dict) return state_dict else: return {f"{param_name}.{level_base + i}": leaf.to(device) for i, leaf in enumerate(self._leaves)} def _join_scale_tensor(global_scale: torch.Tensor | None, local_scale: torch.Tensor) -> torch.Tensor: """Multiply the local scale tensor by the global scale tensor. Args: global_scale (`torch.Tensor` or `None`): Global scale tensor. local_scale (`torch.Tensor`): Local scale tensor. Returns: `torch.Tensor`: The compounded scale tensor. """ # global_scale: (#gs_g0, 1, #gs_g1, 1, #gs_g2, 1, ...) # local_scale: (#ss_g0, 1, #ss_g1, 1, #ss_g2, 1, ...) -> (#gs_g0, rs0, #gs_g1, rs1, #gs_g2, rs2, ...) shape = local_scale.shape return ( local_scale if global_scale is None else local_scale.view( tuple( global_scale.shape[i] if j == 0 else local_scale.shape[i] // global_scale.shape[i] for i in range(0, len(global_scale.shape), 2) for j in range(2) ) ).mul(global_scale) ).view(shape) ================================================ FILE: deepcompressor/data/tensor.py ================================================ # -*- coding: utf-8 -*- """Quantized tensor module.""" import torch from .scale import QuantScale __all__ = ["QuantTensor"] class QuantTensor: """Quantized tensor.""" _dequantized: torch.Tensor | None _quantized: torch.Tensor | None scale: QuantScale | None zero: torch.Tensor | float | None view_shape: torch.Size | None def __init__( self, dequantized: torch.Tensor | None = None, quantized: torch.Tensor | None = None, scale: QuantScale | None = None, zero: torch.Tensor | float | None = None, view_shape: torch.Size | None = None, ): """Initialize the quantized tensor.""" assert ( dequantized is not None or quantized is not None ), "Either the dequantized or quantized tensor must be provided." self.view_shape = view_shape self._dequantized = dequantized self._quantized = quantized self.scale = scale self.zero = zero @property def data(self) -> torch.Tensor | None: """Get the dequantized tensor.""" return self._dequantized @property def qdata(self) -> torch.Tensor | None: """Get the quantized tensor.""" return self._quantized ================================================ FILE: deepcompressor/data/utils/__init__.py ================================================ # -*- coding: utf-8 -*- from . import dtype as DtypeUtils from . import scale as ScaleUtils from . import shape as ShapeUtils ================================================ FILE: deepcompressor/data/utils/dtype.py ================================================ # -*- coding: utf-8 -*- """Utility functions for dtype in quantization.""" import torch from ..dtype import QuantDataType __all__ = ["infer_dtype_bits", "infer_dtype_name", "eval_dtype"] def infer_dtype_bits(dtype: torch.dtype | QuantDataType) -> int: """Get the number of bits of a torch.dtype or QuantDataType. Args: dtype (`torch.dtype` or `QuantDataType`): The dtype to get the number of bits of. Returns: `int`: The number of bits. """ if isinstance(dtype, QuantDataType): return dtype.total_bits else: if dtype == torch.float32: return 32 elif dtype == torch.float16 or dtype == torch.bfloat16: return 16 elif dtype == torch.float64: return 64 elif dtype == torch.int32: return 32 elif dtype == torch.int16: return 16 elif dtype == torch.int8: return 8 elif dtype == torch.uint8: return 8 else: raise ValueError(f"Unknown dtype {dtype}") def infer_dtype_name(dtype: torch.dtype | QuantDataType) -> str: """Get the string representation of a torch.dtype or QuantDataType. Args: dtype (`torch.dtype` | `QuantDataType`): The dtype to get the string representation of. Returns: `str`: The string representation. """ if isinstance(dtype, QuantDataType): return str(dtype) elif isinstance(dtype, torch.dtype): if dtype == torch.float16: return "fp16" elif dtype == torch.float32: return "fp32" elif dtype == torch.float64: return "fp64" elif dtype == torch.bfloat16: return "bf16" else: return str(dtype).split(".")[-1] else: raise ValueError(f"Unknown dtype {dtype}") def eval_dtype( # noqa: C901 s: str | torch.dtype | QuantDataType | None, with_quant_dtype: bool = True, with_none: bool = True ) -> torch.dtype | QuantDataType | None: if isinstance(s, torch.dtype): return s if isinstance(s, QuantDataType): if with_quant_dtype: return s else: raise ValueError(f"Unknown dtype {s}") if s is None: if with_none: return None else: raise ValueError(f"Unknown dtype {s}") assert isinstance(s, str), f"Unknown dtype {s}" s = s.lower() if s in ("torch.float64", "float64", "fp64", "f64", "double"): return torch.float64 elif s in ("torch.float32", "float32", "fp32", "f32", "single", "float"): return torch.float32 elif s in ("torch.float16", "float16", "fp16", "f16", "half"): return torch.float16 elif s in ("torch.bfloat16", "bfloat16", "bf16", "b16", "brain"): return torch.bfloat16 elif s in ("torch.int64", "int64", "i64", "long"): return torch.int64 elif s in ("torch.int32", "int32", "i32", "int"): return torch.int32 elif s in ("torch.int16", "int16", "i16", "short"): return torch.int16 elif s in ("torch.int8", "int8", "i8", "byte"): return torch.int8 elif s in ("torch.uint8", "uint8", "u8", "ubyte"): return torch.uint8 else: if with_none and s in ("", "none", "null", "nil"): return None if with_quant_dtype: return QuantDataType.from_str(s) raise ValueError(f"Unknown dtype {s}") ================================================ FILE: deepcompressor/data/utils/reshape.py ================================================ # -*- coding: utf-8 -*- """Type hints used in deepcompressor.""" import torch import torch.nn.functional as F __all__ = [ "ReshapeFn", "LinearReshapeFn", "ConvInputReshapeFn", "ConvOutputReshapedFn", "AttentionInputReshapeFn", ] class ReshapeFn: """Reshape function.""" def __call__(self, x: torch.Tensor, /, ic_last: bool = True) -> torch.Tensor: """Reshape input tensor to the desired shape used for GEMM. Args: x (`torch.Tensor`): Input tensor. ic_last (`bool`, *optional*, defaults to `True`): Whether input channel is the last dimension. Returns: `torch.Tensor`: Reshaped tensor. """ return x class LinearReshapeFn(ReshapeFn): """Inputs reshape function for linear layers.""" def __call__(self, x: torch.Tensor, /, ic_last: bool = True) -> torch.Tensor: """Reshape input tensor to the desired 2D shape used for GEMM. Args: x (`torch.Tensor`): Input tensor. ic_last (`bool`, *optional*, defaults to `True`): Whether input channel is the last dimension. Returns: `torch.Tensor`: Reshaped tensor. """ return x.view(-1, x.shape[-1]).permute(int(not ic_last), int(ic_last)) class ConvInputReshapeFn(ReshapeFn): """Inputs reshape function for convolutional layers.""" def __init__( self, kernel_size: tuple[int, ...], padding: tuple[int, ...], stride: tuple[int, ...], dilation: tuple[int, ...] ) -> None: """Initialize the reshape function. Args: kernel_size (`tuple[int, ...]`): Kernel size. padding (`tuple[int, ...]`): Padding. stride (`tuple[int, ...]`): Stride. dilation (`tuple[int, ...]`): Dilation. """ self.kernel_size = kernel_size self.padding = padding self.stride = stride self.dilation = dilation def __call__(self, x: torch.Tensor, /, ic_last: bool = True) -> torch.Tensor: """Reshape input tensor to the desired 2D shape used for GEMM. Args: x (`torch.Tensor`): Input tensor. ic_last (`bool`, *optional*, defaults to `True`): Whether input channel is the last dimension. Returns: `torch.Tensor`: Reshaped tensor. """ x = F.unfold( x, kernel_size=self.kernel_size, padding=self.padding, stride=self.stride, dilation=self.dilation, ) ic = x.shape[1] if ic_last: return x.permute(0, 2, 1).reshape(-1, ic) else: return x.permute(1, 0, 2).reshape(ic, -1) class ConvOutputReshapedFn(ReshapeFn): """Outputs reshape function for convolutional layers.""" def __call__(self, x: torch.Tensor, /, ic_last: bool = True) -> torch.Tensor: """Reshape output tensor to the desired shape. Args: x (`torch.Tensor`): Input tensor. ic_last (`bool`, *optional*, defaults to `True`): Whether input channel is the last dimension. Returns: `torch.Tensor`: Reshaped tensor. """ ic = x.shape[1] x = x.view(x.shape[0], ic, -1) if ic_last: return x.permute(0, 2, 1).reshape(-1, ic) else: return x.permute(1, 0, 2).reshape(ic, -1) class AttentionInputReshapeFn(ReshapeFn): """Inputs reshape function for attention layer.""" def __init__(self, channels_dim: int) -> None: """Initialize the reshape function. Args: channels_dim (`int`): The dimension of the channels. """ self.channels_dim = channels_dim def __call__(self, x: torch.Tensor, /, ic_last: bool = True) -> torch.Tensor: """Reshape input tensor to the desired 2D shape used for GEMM. Args: x (`torch.Tensor`): Input tensor. ic_last (`bool`, *optional*, defaults to `True`): Whether input channel is the last dimension. Returns: `torch.Tensor`: Reshaped tensor. """ num_channels = x.shape[self.channels_dim] shape_before = x.shape[: self.channels_dim] shape_after = x.shape[self.channels_dim + 1 :] x = x.view(shape_before.numel(), num_channels, shape_after.numel()) if ic_last: return x.permute(0, 2, 1).reshape(-1, num_channels) else: return x.permute(1, 0, 2).reshape(num_channels, -1) ================================================ FILE: deepcompressor/data/utils/scale.py ================================================ # -*- coding: utf-8 -*- """Utility functions for quantization scale.""" import typing as tp import torch from ..dtype import QuantDataType __all__ = ["infer_scale_dtypes", "infer_scale_quant_spans", "infer_exponent_scale_level"] def infer_scale_dtypes( scale_dtypes: tp.Sequence[torch.dtype | QuantDataType | None], default_dtype: torch.dtype | QuantDataType ) -> list[torch.dtype | QuantDataType]: """Get the scale dtypes for the given tensor dtype. Args: scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`): The scale dtypes. default_dtype (`torch.dtype`): The default scale dtype. Returns: `list[torch.dtype | QuantDataType]`: The scale dtypes. """ assert isinstance( default_dtype, (torch.dtype, QuantDataType) ), f"dtype must be torch.dtype or QuantDataType, got {default_dtype}" return [s_dtype or default_dtype for s_dtype in scale_dtypes] def infer_scale_quant_spans(scale_dtypes: tp.Sequence[QuantDataType], base: int = 1) -> list[float]: quant_spans: list[float] = [base] for s_dtype in reversed(scale_dtypes[1:]): assert isinstance(s_dtype, QuantDataType), f"s_dtype must be QuantDataType, got {s_dtype}" quant_spans.append(s_dtype.max_value * quant_spans[-1]) return list(reversed(quant_spans)) def infer_exponent_scale_level(scale_dtypes: tp.Sequence[torch.dtype | QuantDataType]) -> int: """Get the exponent scaling level. Args: scale_dtypes (`Sequence[torch.dtype | QuantDataType]`): The scale data types. Returns: `int`: The exponent scaling level. """ for level, scale_dtype in enumerate(scale_dtypes): if isinstance(scale_dtype, QuantDataType) and scale_dtype.is_exponent: return level return len(scale_dtypes) ================================================ FILE: deepcompressor/data/utils/shape.py ================================================ # -*- coding: utf-8 -*- """Utility functions for shape calulation in quantization.""" import typing as tp import torch from ..dtype import QuantDataType from .dtype import eval_dtype __all__ = ["infer_group_shape_name", "format_group_configs", "infer_group_shapes", "infer_view_shape", "infer_shape"] def infer_group_shape_name(group_shape: tp.Sequence[int]) -> str: """Get the name of the group shape. Args: group_shape (`Sequence[int]`): The group shape. Returns: `str`: The name of the group shape. """ if all(gs <= 0 for gs in group_shape[2:]): if group_shape[1] <= 0: if group_shape[0] <= 0: return "tsnr" elif group_shape[0] == 1: return "gchn" else: return f"t{group_shape[0]}gchn" else: if group_shape[0] <= 0: return f"tsg{group_shape[1]}" elif group_shape[0] == 1: return f"g{group_shape[1]}" else: return f"t{group_shape[0]}g{group_shape[1]}" elif all(gs == 1 for gs in group_shape[2:]): if group_shape[1] <= 0: if group_shape[0] <= 0: return "tspx" elif group_shape[0] == 1: return "vchn" else: return f"t{group_shape[0]}vchn" else: if group_shape[0] <= 0: return f"tsv{group_shape[1]}" elif group_shape[0] == 1: return f"v{group_shape[1]}" else: return f"t{group_shape[0]}v{group_shape[1]}" return f"{'x'.join(str(gs) if gs >= 1 else '_' for gs in group_shape)}" def format_group_configs( *, group_shapes: tp.Sequence[tp.Sequence[int]], scale_dtypes: tp.Sequence[torch.dtype | QuantDataType | None] | torch.dtype | QuantDataType | None, ) -> tuple[tuple[tuple[int, ...], ...], tuple[torch.dtype | QuantDataType | None, ...]]: """Format the group shape and scale dtype. Args: group_shapes (`Sequence[tp.Sequence[int]]`): The group shapes. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]` or `torch.dtype` or `QuantDataType` or `None`): The scale dtypes. Returns: `tuple[tuple[tuple[int, ...], ...], tuple[torch.dtype | QuantDataType | None, ...]]`: The formatted group shapes and scale dtypes. """ assert isinstance(group_shapes, (list, tuple)), "group_shapes must be a list or tuple" _group_shapes = [] for group_shape in group_shapes: if isinstance(group_shape, tp.Sequence): n = len(group_shape) group_shape = tuple(map(int, group_shape)) assert n >= 2, "the group shape must have at least two dimensions" assert all(gs >= -1 for gs in group_shape), "the group shape must be larger than -1" _group_shapes.append(tuple(group_shape) if n >= 3 else (*group_shape, -1)) _scale_dtypes = tuple(scale_dtypes) if isinstance(scale_dtypes, tp.Sequence) else (scale_dtypes,) _scale_dtypes = tuple( dtype if isinstance(dtype, (torch.dtype, QuantDataType, type(None))) else eval_dtype(dtype) for dtype in _scale_dtypes ) assert len(_group_shapes) > 0, "group_sizes must be a non-empty list" assert len(_group_shapes) == len(_scale_dtypes), ( f"group_shapes and scale_dtypes must have the same length, " f"got {_group_shapes}(len={len(_group_shapes)}) and {_scale_dtypes}(len={len(_scale_dtypes)})" ) exp_scale = True for dtype in reversed(_scale_dtypes): if isinstance(dtype, QuantDataType) and dtype.is_exponent: if not exp_scale: raise ValueError("The exponential scale must be after linear scale") else: exp_scale = False assert all(isinstance(dtype, QuantDataType) for dtype in _scale_dtypes[1:]) return tuple(_group_shapes), _scale_dtypes def infer_group_shapes(group_shapes: tuple[tuple[int, ...], ...], shape: torch.Size) -> list[torch.Size]: """Infer the group shapes using group shape config on the given tensor shape. Args: group_shapes (`tuple[tuple[int, ...], ...]`): The group shapes. shape (`torch.Size`): The shape of the tensor. Returns: `list[torch.Size]`: The inferred group shapes. """ assert isinstance(shape, torch.Size), f"shape must be torch.Size, got {shape} ({type(shape)})" assert len(shape) >= 2, f"shape must have at least 2 dimensions, got {shape} ({len(shape)} < 2)" _group_shapes: list[torch.Size] = [] _prev_group_shape = shape for level, group_shape in enumerate(group_shapes): m = len(group_shape) - 1 _group_shape = [] for i, ts in enumerate(shape): gs = group_shape[min(i, m)] if gs <= 0: gs = ts ps = _prev_group_shape[i] if gs > ps: gs = ps # the group shape must be less than or equal to the previous group shape assert ps % gs == 0, ( f"the level {level} group size ({gs}) must be divisible by " f"the previous group size ({_prev_group_shape}[{i}])" ) _group_shape.append(gs) _group_shapes.append(torch.Size(_group_shape)) _prev_group_shape = _group_shape return _group_shapes def infer_view_shape( tensor_shape: torch.Size, /, group_shape: tp.Sequence[int], skip_first_dim: bool = False, ) -> torch.Size: """Infer the view shape from the tensor shape and the group shape. Args: tensor_shape (`torch.Size`): The tensor shape. group_shape (`Sequence[int]`): The group shape. skip_first_dim (`bool`, *optional*, defaults to `False`): Whether to skip the first dimension. Returns: `torch.Size`: The view shape of (#g0, gs0, #g1, gs1, #g2, gs2, ...) """ m, view_shape = len(group_shape) - 1, [] for i, ts in enumerate(tensor_shape): gs = group_shape[min(i, m)] gs = ts if gs <= 0 else gs view_shape.append(ts // gs) view_shape.append(gs) if skip_first_dim: view_shape[0], view_shape[1] = 1, tensor_shape[0] return torch.Size(view_shape) def infer_scale_view_shapes( group_shapes: tp.Sequence[tp.Sequence[int]] | tp.Sequence[torch.Size], shape: torch.Size ) -> list[torch.Size]: """Infer the view shapes of quantization scale for the given tensor shape. Args: group_shapes (`Sequence[tp.Sequence[int]]` or `list[torch.Size]`): The group shapes. shape (`torch.Size`): The shape of the tensor to be quantized. Returns: `list[torch.Size]`: list of view shapes of the scale tensor for each quantization group. """ if not isinstance(group_shapes[0], torch.Size): group_shapes = infer_group_shapes(group_shapes=group_shapes, shape=shape) # type: ignore assert all(isinstance(gs, torch.Size) for gs in group_shapes), "group_shapes must be a list of torch.Size" min_group_shape = group_shapes[-1] s_view_shapes = [] for group_shape in group_shapes: s_view_shape = [] for ts, gs, mgs in zip(shape, group_shape, min_group_shape, strict=True): num_groups, num_reduct = ts // gs, gs // mgs s_view_shape.append(num_groups) s_view_shape.append(num_reduct) s_view_shapes.append(torch.Size(s_view_shape)) return s_view_shapes def infer_shape(view_shape: torch.Size) -> torch.Size: """Infer the shape from the view shape. Args: view_shape (`torch.Size`): The view shape. Returns: `torch.Size`: The shape of the tensor. """ return torch.Size(view_shape[i] * view_shape[i + 1] for i in range(0, len(view_shape), 2)) ================================================ FILE: deepcompressor/data/zero.py ================================================ # -*- coding: utf-8 -*- """Zero-point for quantization.""" import enum __all__ = ["ZeroPointDomain"] class ZeroPointDomain(enum.Enum): """Zero-point domain.""" PreScale = enum.auto() PostScale = enum.auto() ================================================ FILE: deepcompressor/dataset/__init__.py ================================================ # -*- coding: utf-8 -*- from .action import CacheAction, ConcatCacheAction from .cache import BaseCalibCacheLoader from .config import BaseDataLoaderConfig ================================================ FILE: deepcompressor/dataset/action.py ================================================ # -*- coding: utf-8 -*- """Actions for caching inputs and outputs.""" import typing as tp from abc import ABC, abstractmethod import torch import torch.nn as nn from ..data.cache import IOTensorsCache, TensorsCache from ..utils.hooks import BaseInputPackager, BaseOutputPackager, Hook, IOHook, KeyedInputPackager, KeyedOutputPackager __all__ = ["CacheAction", "ConcatCacheAction"] class CacheHook(IOHook): def __init__( self, name: str, module: nn.Module, action: "CacheAction", cache: TensorsCache, info_mode: bool, is_output: bool ): """Initialize the hook. Args: name (``str``): Module name. module (``nn.Module``): Module. action (``CacheAction``): Cache action. cache (``TensorsCache``): Cache. info_mode (``bool``): Whether to update cache information. is_output (``bool``): Whether the hook is an output hook. """ super().__init__( pre=not is_output, post=is_output, input_packager=None if is_output else action.get_input_packager(name, module, cache), output_packager=action.get_output_packager(name, module, cache) if is_output else None, ) self.name = name self.action = action self.cache = cache self.info_mode = info_mode def pre_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], ) -> None: tensors = self.input_packager.unpack(module, input_args, input_kwargs) if self.info_mode: self.action.info(self.name, module, tensors, self.cache) assert len(tensors) == self.cache.num_tensors, f"Expected {self.cache.num_tensors} args, but got {len(tensors)}" if not self.info_mode: self.action.apply(self.name, module, tensors, self.cache) def post_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: torch.Tensor | tuple[torch.Tensor, ...], ) -> None: tensors = self.output_packager.unpack(module, input_args, input_kwargs, output) if self.info_mode: self.action.info(self.name, module, tensors, self.cache) assert len(tensors) == self.cache.num_tensors, f"Expected {self.cache.num_tensors} args, but got {len(tensors)}" if not self.info_mode: self.action.apply(self.name, module, tensors, self.cache) class CacheAction(ABC): """Actions for caching activations.""" device: torch.device | None = None def __init__(self, device: torch.device | str | None = None) -> None: """Initialize the action. Args: device (`torch.device or `str` or `None, *optional*, defaults to `None`): Device for caching. """ self.device = device @abstractmethod def apply( self, name: str, module: nn.Module, tensors: dict[int | str, torch.Tensor], cache: TensorsCache, ) -> None: """Cache activations. Args: name (`str`): Module name. module (`nn.Module`): Module. tensors (`dict[int or str, torch.Tensor]`): Tensors to cache. cache (`TensorsCache`): Cache. """ ... @abstractmethod def info( self, name: str, module: nn.Module, tensors: dict[int | str, torch.Tensor], cache: TensorsCache, ) -> None: """Update cache information. Args: name (`str`): Module name. module (`nn.Module`): Module. tensors (`dict[int or str, torch.Tensor]`): Tensors to cache. cache (`TensorsCache`): Cache. """ ... def get_input_packager(self, name: str, module: nn.Module, cache: TensorsCache) -> BaseInputPackager: """Get input packager. Args: name (`str`): Module name. module (`nn.Module`): Module. cache (`TensorsCache`): Cache. Returns: `BaseInputPackager`: Input packager. """ return KeyedInputPackager(module=module, index_or_keys=list(cache.keys())) def get_output_packager(self, name: str, module: nn.Module, cache: TensorsCache) -> BaseOutputPackager: """Get output packager. Args: name (`str`): Module name. module (`nn.Module`): Module. cache (`TensorsCache`): Cache. Returns: `BaseOutputPackager`: Output packager. """ return KeyedOutputPackager(index_or_keys=list(cache.keys())) def register( self, name: str, module: nn.Module, cache: IOTensorsCache, info_mode: bool, needs_inputs: bool, needs_outputs: bool, ) -> list[Hook]: """Register hooks for caching activations. Args: name (`str`): Module name. module (`nn.Module`): Module. cache (`IOTensorsCache`): Cache. info_mode (`bool`): Whether to update cache information. needs_inputs (`bool`): Whether to cache inputs. needs_outputs (`bool`): Whether to cache outputs. Returns: `list[Hook]`: Cache hooks. """ hooks = [] if needs_inputs: assert cache.inputs is not None hooks.append(CacheHook(name, module, self, cache.inputs, info_mode, is_output=False).register(module)) if needs_outputs: assert cache.outputs is not None hooks.append(CacheHook(name, module, self, cache.outputs, info_mode, is_output=True).register(module)) return hooks class ConcatCacheAction(CacheAction): """Action for concatenating cached activations for calibration.""" def apply( self, name: str, module: nn.Module, tensors: dict[int | str, torch.Tensor], cache: TensorsCache, ) -> None: """Concatenate cached activations along the sample dimension. Args: name (`str`): Module name. module (`nn.Module`): Module. tensors (`dict[int or str, torch.Tensor]`): Tensors to cache. cache (`TensorsCache`): Cache. """ for k, c in cache.tensors.items(): x = tensors[k] shape, device = x.shape, self.device or x.device num_prev_cached = c.num_cached c.num_cached += shape[0] if num_prev_cached == 0: assert len(c.data) == 0 c.data.append(torch.empty((c.num_total, *shape[1:]), dtype=x.dtype, device=device)) c.data[0][num_prev_cached : c.num_cached].copy_(x) def info( self, name: str, module: nn.Module, tensors: dict[int | str, torch.Tensor], cache: TensorsCache, ) -> None: """Update cache information. Args: name (`str`): Module name. module (`nn.Module`): Module. tensors (`dict[int or str, torch.Tensor]`): Tensors to cache. cache (`TensorsCache`): Cache. """ for k, c in cache.tensors.items(): x = tensors[k] c.num_total += x.shape[0] c.orig_device = x.device ================================================ FILE: deepcompressor/dataset/cache.py ================================================ # -*- coding: utf-8 -*- """Caching calibration dataset.""" import functools import gc import typing as tp from abc import ABC, abstractmethod from collections import OrderedDict from dataclasses import MISSING import psutil import torch import torch.nn as nn import torch.utils.data import torch.utils.hooks from tqdm import tqdm from ..data.cache import IOTensorsCache, ModuleForwardInput, TensorCache from ..data.utils.reshape import ConvInputReshapeFn, ConvOutputReshapedFn, LinearReshapeFn from ..utils import tools from ..utils.common import tree_copy_with_ref, tree_map from ..utils.hooks import EarlyStopException, EarlyStopHook, Hook from .action import CacheAction __all__ = ["BaseCalibCacheLoader"] class BaseCalibCacheLoader(ABC): """Base class for caching calibration dataset.""" dataset: torch.utils.data.Dataset batch_size: int def __init__(self, dataset: torch.utils.data.Dataset, batch_size: int): """Initialize the dataset. Args: dataset (`torch.utils.data.Dataset`): Calibration dataset. batch_size (`int`): Batch size. """ self.dataset = dataset self.batch_size = batch_size @property def num_samples(self) -> int: """Number of samples in the dataset.""" return len(self.dataset) @abstractmethod def iter_samples(self, *args, **kwargs) -> tp.Generator[ModuleForwardInput, None, None]: """Iterate over model input samples.""" ... def _init_cache(self, name: str, module: nn.Module) -> IOTensorsCache: """Initialize activation cache. Args: name (`str`): Module name. module (`nn.Module`): Module. Returns: `IOTensorsCache`: Tensors cache for inputs and outputs. """ if isinstance(module, (nn.Linear,)): return IOTensorsCache( inputs=TensorCache(channels_dim=-1, reshape=LinearReshapeFn()), outputs=TensorCache(channels_dim=-1, reshape=LinearReshapeFn()), ) elif isinstance(module, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): assert module.padding_mode == "zeros", f"Padding mode {module.padding_mode} is not supported" if isinstance(module.padding, str): if module.padding == "valid": padding = (0,) * len(module.kernel_size) elif module.padding == "same": padding = tuple(reversed(tuple(t for t in module._reversed_padding_repeated_twice[::2]))) else: padding = tuple(module.padding) return IOTensorsCache( inputs=TensorCache( channels_dim=1, reshape=ConvInputReshapeFn(module.kernel_size, padding, module.stride, module.dilation), ), outputs=TensorCache(channels_dim=1, reshape=ConvOutputReshapedFn()), ) else: raise NotImplementedError(f"Module {module.__class__.__name__} is not supported") def _convert_layer_inputs( self, m: nn.Module, args: tuple[tp.Any, ...], kwargs: dict[str, tp.Any], save_all: bool = False ) -> ModuleForwardInput: """Convert layer inputs to module forward input. Args: m (`nn.Module`): Layer. args (`tuple[Any, ...]`): Layer input arguments. kwargs (`dict[str, Any]`): Layer input keyword arguments. save_all (`bool`, *optional*, defaults to `False`): Whether to save all inputs. Returns: `ModuleForwardInput`: Module forward input. """ x = args[0].detach().cpu() if save_all else MISSING return ModuleForwardInput(args=[x, *args[1:]], kwargs=kwargs) def _convert_layer_outputs(self, m: nn.Module, outputs: tp.Any) -> dict[str | int, tp.Any]: """Convert layer outputs to dictionary for updating the next layer inputs. Args: m (`nn.Module`): Layer. outputs (`Any`): Layer outputs. Returns: `dict[str | int, Any]`: Dictionary for updating the next layer inputs. """ if not isinstance(outputs, torch.Tensor): outputs = outputs[0] assert isinstance(outputs, torch.Tensor), f"Invalid outputs type: {type(outputs)}" return {0: outputs.detach().cpu()} def _layer_forward_pre_hook( self, m: nn.Module, args: tuple[torch.Tensor, ...], kwargs: dict[str, tp.Any], cache: list[ModuleForwardInput], save_all: bool = False, ) -> None: inputs = self._convert_layer_inputs(m, args, kwargs, save_all=save_all) if len(cache) > 0: inputs.args = tree_copy_with_ref(inputs.args, cache[0].args) inputs.kwargs = tree_copy_with_ref(inputs.kwargs, cache[0].kwargs) else: inputs.args = tree_map(lambda x: x, inputs.args) inputs.kwargs = tree_map(lambda x: x, inputs.kwargs) cache.append(inputs) @torch.inference_mode() def _iter_layer_activations( # noqa: C901 self, model: nn.Module, *args, action: CacheAction, layers: tp.Sequence[nn.Module] | None = None, needs_inputs_fn: tp.Callable[[str, nn.Module], bool] | bool | None = True, needs_outputs_fn: tp.Callable[[str, nn.Module], bool] | bool | None = None, recomputes: list[bool] | None = None, use_prev_layer_outputs: list[bool] | None = None, early_stop_module: nn.Module | None = None, clear_after_yield: bool = True, **kwargs, ) -> tp.Generator[ tuple[ str, tuple[ nn.Module, dict[str, IOTensorsCache], list[ModuleForwardInput], ], ], None, None, ]: """Iterate over model activations in layers. Args: model (`nn.Module`): Model. action (`CacheAction`): Action for caching activations. layers (`Sequence[nn.Module]` or `None`, *optional*, defaults to `None`): Layers to cache activations. If `None`, cache all layers. needs_inputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `True`): Function for determining whether to cache inputs for a module given its name and itself. needs_outputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `None`): Function for determining whether to cache outputs for a module given its name and itself. recomputes (`list[bool]` or `bool` or `None`, *optional*, defaults to `None`): Whether to recompute the activations for each layer. use_prev_layer_outputs (`list[bool]` or `bool` or `None`, *optional*, defaults to `None`): Whether to use the previous layer outputs as inputs for the current layer. early_stop_module (`nn.Module` or `None`, *optional*, defaults to `None`): Module for early stopping. clear_after_yield (`bool`, *optional*, defaults to `True`): Whether to clear the cache after yielding the activations. *args: Arguments for ``iter_samples``. **kwargs: Keyword arguments for ``iter_samples``. Yields: Generator[ tuple[str, tuple[nn.Module, dict[str, IOTensorsCache], list[ModuleForwardInput]]], None, None ]: Generator of tuple of - layer name - a tuple of - layer itself - inputs and outputs cache of each module in the layer - layer input arguments """ if needs_outputs_fn is None: needs_outputs_fn = lambda name, module: False # noqa: E731 elif isinstance(needs_outputs_fn, bool): if needs_outputs_fn: needs_outputs_fn = lambda name, module: True # noqa: E731 else: needs_outputs_fn = lambda name, module: False # noqa: E731 if needs_inputs_fn is None: needs_inputs_fn = lambda name, module: False # noqa: E731 elif isinstance(needs_inputs_fn, bool): if needs_inputs_fn: needs_inputs_fn = lambda name, module: True # noqa: E731 else: needs_inputs_fn = lambda name, module: False # noqa: E731 if layers is None: recomputes = [True] use_prev_layer_outputs = [False] else: assert isinstance(layers, (nn.Sequential, nn.ModuleList, list, tuple)) if recomputes is None: recomputes = [False] * len(layers) elif isinstance(recomputes, bool): recomputes = [recomputes] * len(layers) if use_prev_layer_outputs is None: use_prev_layer_outputs = [True] * len(layers) elif isinstance(use_prev_layer_outputs, bool): use_prev_layer_outputs = [use_prev_layer_outputs] * len(layers) use_prev_layer_outputs[0] = False assert len(recomputes) == len(use_prev_layer_outputs) == len(layers) cache: dict[str, dict[str, IOTensorsCache]] = {} module_names: dict[str, list[str]] = {"": []} named_layers: OrderedDict[str, nn.Module] = {"": model} # region we first collect infomations for yield modules forward_cache: dict[str, list[ModuleForwardInput]] = {} info_hooks: list[Hook] = [] forward_hooks: list[torch.utils.hooks.RemovableHandle] = [] hook_args: dict[str, list[tuple[str, nn.Module, bool, bool]]] = {} layer_name = "" for module_name, module in model.named_modules(): if layers is not None and module_name and module in layers: layer_name = module_name assert layer_name not in module_names named_layers[layer_name] = module module_names[layer_name] = [] forward_cache[layer_name] = [] if layers is None or (layer_name and module_name.startswith(layer_name)): # we only cache modules in the layer needs_inputs = needs_inputs_fn(module_name, module) needs_outputs = needs_outputs_fn(module_name, module) if needs_inputs or needs_outputs: module_names[layer_name].append(module_name) cache.setdefault(layer_name, {})[module_name] = self._init_cache(module_name, module) hook_args.setdefault(layer_name, []).append((module_name, module, needs_inputs, needs_outputs)) info_hooks.extend( action.register( name=module_name, module=module, cache=cache[layer_name][module_name], info_mode=True, needs_inputs=needs_inputs, needs_outputs=needs_outputs, ) ) if len(cache) == 0: return if layers is not None: module_names.pop("") named_layers.pop("") assert layer_name, "No layer in the given layers is found in the model" assert "" not in cache, "The model should not have empty layer name" ordered_named_layers: OrderedDict[str, nn.Module] = OrderedDict() for layer in layers: for name, module in named_layers.items(): if module is layer: ordered_named_layers[name] = module break assert len(ordered_named_layers) == len(named_layers) assert len(ordered_named_layers) == len(layers) named_layers = ordered_named_layers del ordered_named_layers for layer_idx, (layer_name, layer) in enumerate(named_layers.items()): forward_hooks.append( layer.register_forward_pre_hook( functools.partial( self._layer_forward_pre_hook, cache=forward_cache[layer_name], save_all=not recomputes[layer_idx] and not use_prev_layer_outputs[layer_idx], ), with_kwargs=True, ) ) else: assert len(named_layers) == 1 and "" in named_layers assert len(module_names) == 1 and "" in module_names assert len(cache) == 1 and "" in cache # endregion with tools.logging.redirect_tqdm(): # region we then collect cache information by running the model with all samples if early_stop_module is not None: forward_hooks.append(early_stop_module.register_forward_hook(EarlyStopHook())) with torch.inference_mode(): device = "cuda" if torch.cuda.is_available() else "cpu" tbar = tqdm( desc="collecting acts info", leave=False, total=self.num_samples, unit="samples", dynamic_ncols=True, ) num_samples = 0 for sample in self.iter_samples(*args, **kwargs): num_samples += self.batch_size sample = sample.to(device=device) try: model(*sample.args, **sample.kwargs) except EarlyStopException: pass tbar.update(self.batch_size) tbar.set_postfix({"ram usage": psutil.virtual_memory().percent}) if psutil.virtual_memory().percent > 90: raise RuntimeError("memory usage > 90%%, aborting") for layer_cache in cache.values(): for module_cache in layer_cache.values(): module_cache.set_num_samples(num_samples) for hook in forward_hooks: hook.remove() for hook in info_hooks: hook.remove() del info_hooks, forward_hooks # endregion for layer_idx, (layer_name, layer) in enumerate(named_layers.items()): # region we first register hooks for caching activations layer_hooks: list[Hook] = [] for module_name, module, needs_inputs, needs_outputs in hook_args[layer_name]: layer_hooks.extend( action.register( name=module_name, module=module, cache=cache[layer_name][module_name], info_mode=False, needs_inputs=needs_inputs, needs_outputs=needs_outputs, ) ) hook_args.pop(layer_name) # endregion if recomputes[layer_idx]: if layers is None: if early_stop_module is not None: layer_hooks.append(EarlyStopHook().register(early_stop_module)) else: layer_hooks.append(EarlyStopHook().register(layer)) tbar = tqdm( desc=f"collecting acts in {layer_name}", leave=False, total=self.num_samples, unit="samples", dynamic_ncols=True, ) for sample in self.iter_samples(*args, **kwargs): sample = sample.to(device=device) try: model(*sample.args, **sample.kwargs) except EarlyStopException: pass tbar.update(self.batch_size) tbar.set_postfix({"ram usage": psutil.virtual_memory().percent}) if psutil.virtual_memory().percent > 90: raise RuntimeError("memory usage > 90%%, aborting") gc.collect() else: # region we then forward the layer to collect activations device = next(layer.parameters()).device layer_outputs: list[tp.Any] = [] tbar = tqdm( forward_cache[layer_name], desc=f"collecting acts in {layer_name}", leave=False, unit="batches", dynamic_ncols=True, ) if not use_prev_layer_outputs[layer_idx]: prev_layer_outputs: list[dict[str | int, tp.Any]] = [None] * len(tbar) for i, inputs in enumerate(tbar): inputs = inputs.update(prev_layer_outputs[i]).to(device=device) outputs = layer(*inputs.args, **inputs.kwargs) layer_outputs.append(self._convert_layer_outputs(layer, outputs)) tbar.set_postfix({"ram usage": psutil.virtual_memory().percent}) if psutil.virtual_memory().percent > 90: raise RuntimeError("memory usage > 90%%, aborting") prev_layer_outputs = layer_outputs del inputs, outputs, layer_outputs if (layer_idx == len(named_layers) - 1) or not use_prev_layer_outputs[layer_idx + 1]: del prev_layer_outputs # endregion for hook in layer_hooks: hook.remove() del layer_hooks layer_inputs = forward_cache.pop(layer_name, []) if not recomputes[layer_idx] and not use_prev_layer_outputs[layer_idx]: layer_inputs = [ self._convert_layer_inputs(layer, inputs.args, inputs.kwargs) for inputs in layer_inputs ] gc.collect() torch.cuda.empty_cache() yield layer_name, (layer, cache[layer_name], layer_inputs) # region clear layer cache if clear_after_yield: for module_cache in cache[layer_name].values(): module_cache.clear() cache.pop(layer_name) del layer_inputs gc.collect() torch.cuda.empty_cache() # endregion @abstractmethod def iter_layer_activations( # noqa: C901 self, model: nn.Module, *args, action: CacheAction, needs_inputs_fn: tp.Callable[[str, nn.Module], bool] | bool | None = True, needs_outputs_fn: tp.Callable[[str, nn.Module], bool] | bool | None = None, **kwargs, ) -> tp.Generator[ tuple[ str, tuple[ nn.Module, dict[str, IOTensorsCache], list[ModuleForwardInput], ], ], None, None, ]: """Iterate over model activations in layers. Args: model (`nn.Module`): Model. action (`CacheAction`): Action for caching activations. needs_inputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `True`): Function for determining whether to cache inputs for a module given its name and itself. needs_outputs_fn (`Callable[[str, nn.Module], bool]` or `bool` or `None`, *optional*, defaults to `None`): Function for determining whether to cache outputs for a module given its name and itself. *args: Arguments for ``iter_samples``. **kwargs: Keyword arguments for ``iter_samples``. Yields: Generator[ tuple[str, tuple[nn.Module, dict[str, IOTensorsCache], list[ModuleForwardInput]]], None, None ]: Generator of tuple of - layer name - a tuple of - layer itself - inputs and outputs cache of each module in the layer - layer input arguments """ ... ================================================ FILE: deepcompressor/dataset/config.py ================================================ # -*- coding: utf-8 -*- """Configuration for collecting calibration dataset for quantization.""" from abc import ABC, abstractmethod from dataclasses import dataclass from omniconfig import configclass from torch.utils.data import DataLoader, Dataset from .cache import BaseCalibCacheLoader __all__ = ["BaseDataLoaderConfig"] @configclass @dataclass(kw_only=True) class BaseDataLoaderConfig(ABC): """Configuration for dataset loader. Args: data (`str`): Dataset name. num_samples (`int`): Number of dataset samples. batch_size (`int`): Batch size when loading dataset. """ data: str num_samples: int batch_size: int def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Get the names of the configuration fields. Args: prefix (`str`, *optional*): Prefix for the names. Returns: `list[str]`: Names of the configuration. """ name = f"{self.data}.{self.num_samples}" return [f"{prefix}.{name}" if prefix else name] @abstractmethod def build_dataset(self, *args, **kwargs) -> Dataset: """Build dataset.""" ... @abstractmethod def build_loader(self, *args, **kwargs) -> DataLoader | BaseCalibCacheLoader: """Build data loader.""" ... ================================================ FILE: deepcompressor/nn/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: deepcompressor/nn/patch/__init__.py ================================================ # -*- coding: utf-8 -*- from .conv import * from .linear import * from .lowrank import * from .sdpa import * ================================================ FILE: deepcompressor/nn/patch/conv.py ================================================ # -*- coding: utf-8 -*- """Concat Convolution 2d Module.""" import typing as tp import torch import torch.nn as nn import torch.nn.functional as F from torch.nn.common_types import _size_2_t __all__ = ["ConcatConv2d", "ShiftedConv2d"] class ConcatConv2d(nn.Module): def __init__( self, in_channels_list: list[int], out_channels: int, kernel_size: _size_2_t, stride: _size_2_t = 1, padding: tp.Union[str, _size_2_t] = 0, dilation: _size_2_t = 1, groups: int = 1, bias: bool = True, padding_mode: str = "zeros", # TODO: refine this type device=None, dtype=None, ) -> None: super().__init__() assert len(in_channels_list) > 1, "ConcatConv2d requires at least 2 input channels" self.in_channels_list = in_channels_list self.in_channels = sum(in_channels_list) num_convs = len(in_channels_list) self.convs = nn.ModuleList( [ nn.Conv2d( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias if idx == num_convs - 1 else False, padding_mode, device, dtype, ) for idx, in_channels in enumerate(in_channels_list) ] ) def forward(self, x: torch.Tensor) -> torch.Tensor: # slice x based on in_channels_list x_splits: list[torch.Tensor] = x.split(self.in_channels_list, dim=1) # apply each conv to each slice (we have to make contiguous input for quantization) out_splits = [conv(x_split.contiguous()) for conv, x_split in zip(self.convs, x_splits, strict=True)] # sum the results return sum(out_splits) @staticmethod def from_conv2d(conv: nn.Conv2d, splits: list[int]) -> "ConcatConv2d": splits.append(conv.in_channels - sum(splits)) splits = [s for s in splits if s > 0] assert len(splits) > 1, "ConcatConv2d requires at least 2 input channels" concat_conv = ConcatConv2d( in_channels_list=splits, out_channels=conv.out_channels, kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, dilation=conv.dilation, groups=conv.groups, bias=conv.bias is not None, padding_mode=conv.padding_mode, device=conv.weight.device, dtype=conv.weight.dtype, ) used_in_channels = 0 for sub_conv in concat_conv.convs: assert isinstance(sub_conv, nn.Conv2d) in_channels = sub_conv.in_channels sub_conv.weight.data.copy_(conv.weight[:, used_in_channels : used_in_channels + in_channels]) used_in_channels += in_channels if conv.bias is not None: assert sub_conv.bias is not None sub_conv.bias.data.copy_(conv.bias) return concat_conv class ShiftedConv2d(nn.Module): shift: torch.Tensor def __init__( self, in_channels: int, out_channels: int, kernel_size: _size_2_t, shift: float | torch.Tensor, stride: _size_2_t = 1, padding: tp.Union[str, _size_2_t] = 0, dilation: _size_2_t = 1, groups: int = 1, bias: bool = True, padding_mode: str = "zeros", # TODO: refine this type device=None, dtype=None, ) -> None: super().__init__() self.conv = nn.Conv2d( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, device, dtype, ) self.conv.shifted = True if not isinstance(shift, torch.Tensor): shift = torch.tensor(shift, device=device, dtype=dtype) shift = shift.flatten().to(device=device, dtype=dtype) shift_channels = shift.numel() if shift_channels > 1: assert padding == 0, "Padding is not supported for multi-channel shift" assert in_channels >= shift_channels and in_channels % shift_channels == 0 shift = shift.view(shift_channels, 1).expand(shift_channels, in_channels // shift_channels) shift = shift.reshape(1, in_channels, 1, 1) self.register_buffer("shift", shift.view(1, -1, 1, 1)) # region update padding-related attributes self.padding_size = self.conv._reversed_padding_repeated_twice self.padding_mode, self.padding_value = self.conv.padding_mode, None if all(p == 0 for p in self.padding_size): self.padding_mode = "" elif self.padding_mode == "zeros": self.padding_mode = "constant" assert shift.numel() == 1, "Zero padding is not supported for multi-channel shift" self.padding_value = shift.item() self.conv.padding = "valid" self.conv.padding_mode = "zeros" self.conv._reversed_padding_repeated_twice = [0, 0] * len(self.conv.kernel_size) # endregion def forward(self, input: torch.Tensor) -> torch.Tensor: input = input + self.shift if self.padding_mode: input = F.pad(input, self.padding_size, mode=self.padding_mode, value=self.padding_value) return self.conv(input) @staticmethod def from_conv2d(conv: nn.Conv2d, shift: float | torch.Tensor) -> "ShiftedConv2d": device, dtype = conv.weight.device, conv.weight.dtype shifted = ShiftedConv2d( in_channels=conv.in_channels, out_channels=conv.out_channels, kernel_size=conv.kernel_size, shift=shift, stride=conv.stride, padding=conv.padding, dilation=conv.dilation, groups=conv.groups, bias=True, padding_mode=conv.padding_mode, device=device, dtype=dtype, ) shifted.conv.weight.data.copy_(conv.weight) shift = shifted.shift if shift.numel() == 1: shifted_bias = conv.weight.double().sum(dim=[1, 2, 3]) * shift.double() else: shifted_bias = torch.matmul(conv.weight.double().sum(dim=[2, 3]), shift.view(-1).double()) shifted_bias = shifted_bias.view(shifted.conv.bias.size()) if conv.bias is not None: shifted.conv.bias.data.copy_((conv.bias.data.double() - shifted_bias).to(dtype)) else: shifted.conv.bias.data.copy_(-shifted_bias.to(dtype)) return shifted ================================================ FILE: deepcompressor/nn/patch/linear.py ================================================ # -*- coding: utf-8 -*- """Concat Linear Module.""" import torch import torch.nn as nn __all__ = ["ConcatLinear", "ShiftedLinear"] class ConcatLinear(nn.Module): def __init__( self, in_features_list: list[int], out_features: int, bias: bool = True, device=None, dtype=None, ) -> None: super().__init__() assert len(in_features_list) > 1, "ConcatLinear requires at least 2 input features" self.in_features_list = in_features_list self.in_features = sum(in_features_list) self.out_features = out_features num_linears = len(in_features_list) self.linears = nn.ModuleList( [ nn.Linear( in_features, out_features, bias if idx == num_linears - 1 else False, device, dtype, ) for idx, in_features in enumerate(in_features_list) ] ) def forward(self, x: torch.Tensor) -> torch.Tensor: # slice x based on in_features_list x_splits: list[torch.Tensor] = x.split(self.in_features_list, dim=-1) # apply each linear to each slice (we have to make contiguous input for quantization) out_splits = [linear(x_split.contiguous()) for linear, x_split in zip(self.linears, x_splits, strict=True)] # sum the results return sum(out_splits) @staticmethod def from_linear(linear: nn.Linear, splits: list[int]) -> "ConcatLinear": splits.append(linear.in_features - sum(splits)) splits = [s for s in splits if s > 0] assert len(splits) > 1, "ConcatLinear requires at least 2 input features" concat_linear = ConcatLinear( in_features_list=splits, out_features=linear.out_features, bias=linear.bias is not None, device=linear.weight.device, dtype=linear.weight.dtype, ) used_in_features = 0 for sub_linear in concat_linear.linears: assert isinstance(sub_linear, nn.Linear) in_features = sub_linear.in_features sub_linear.weight.data.copy_(linear.weight[:, used_in_features : used_in_features + in_features]) used_in_features += in_features if linear.bias is not None: assert sub_linear.bias is not None sub_linear.bias.data.copy_(linear.bias) return concat_linear class ShiftedLinear(nn.Module): shift: torch.Tensor def __init__( self, in_features: int, out_features: int, shift: float | torch.Tensor, bias: bool = True, device=None, dtype=None, ) -> None: super().__init__() self.linear = nn.Linear(in_features, out_features, bias, device, dtype) self.linear.shifted = True device, dtype = self.linear.weight.device, self.linear.weight.dtype if not isinstance(shift, torch.Tensor): shift = torch.tensor(shift, device=device, dtype=dtype) shift = shift.flatten().to(device=device, dtype=dtype) shift_features = shift.numel() if shift_features > 1: assert in_features >= shift_features and in_features % shift_features == 0 shift = shift.view(-1, 1).expand(-1, in_features // shift_features).flatten() self.register_buffer("shift", shift) @property def in_features(self) -> int: return self.linear.in_features @property def out_features(self) -> int: return self.linear.out_features def forward(self, input: torch.Tensor) -> torch.Tensor: return self.linear(input + self.shift.view([1] * (input.dim() - 1) + [-1])) @staticmethod def from_linear(linear: nn.Linear, shift: float | torch.Tensor) -> "ShiftedLinear": device, dtype = linear.weight.device, linear.weight.dtype shifted = ShiftedLinear( in_features=linear.in_features, out_features=linear.out_features, shift=shift, bias=True, device=device, dtype=dtype, ) shifted.linear.weight.data.copy_(linear.weight) shift = shifted.shift if shift.numel() == 1: shifted_bias = linear.weight.double().sum(dim=1) * shift.double() else: shifted_bias = torch.matmul(linear.weight.double(), shift.view(1, -1).double()) shifted_bias = shifted_bias.view(shifted.linear.bias.size()) if linear.bias is not None: shifted.linear.bias.data.copy_((linear.bias.data.double() - shifted_bias).to(dtype)) else: shifted.linear.bias.data.copy_(shifted_bias.to(dtype).neg_()) return shifted ================================================ FILE: deepcompressor/nn/patch/lowrank.py ================================================ # -*- coding: utf-8 -*- import torch import torch.linalg import torch.nn as nn from ...utils.hooks import AccumBranchHook, BaseInputPackager, BaseOutputPackager __all__ = ["LowRankBranch"] class LowRankBranch(nn.Module): def __init__( self, in_features: int, out_features: int, rank: int, alpha: float = 1.0, weight: torch.Tensor | None = None ): super().__init__() self.in_features = in_features self.out_features = out_features self.rank = rank self.alpha = alpha if rank == 0: self.a, self.b = None, None elif rank < 0: self.a, self.b = nn.Linear(in_features, out_features, bias=False), nn.Identity() else: self.a, self.b = nn.Linear(in_features, rank, bias=False), nn.Linear(rank, out_features, bias=False) self.reset_parameters(weight) @torch.no_grad() def reset_parameters(self, weight: torch.Tensor | None = None) -> None: if weight is None: if self.rank < 0: nn.init.zeros_(self.a.weight) elif self.rank > 0: nn.init.kaiming_uniform_(self.a.weight) nn.init.zeros_(self.b.weight) return if weight.ndim >= 2: assert weight.shape[2:].numel() == 1, "LinearLoRAHook only supports 2D input tensor" weight = weight.view(weight.shape[0], -1) device, dtype = weight.device, weight.dtype self.to(device=device, dtype=dtype) out_features, in_features = weight.shape assert self.in_features == in_features, "Input features size mismatch" assert self.out_features == out_features, "Output features size mismatch" if self.rank < 0: self.a.weight.data.copy_(weight) elif self.rank > 0: u, s, vh = torch.linalg.svd(weight.double()) # tensor: [oc, ic], u: [oc, oc], s: [oc], vh: [ic, ic] # us: [oc, rank], vh: [rank, ic] us = u[:, : self.rank] * s[: self.rank] vh = vh[: self.rank] assert not us.isnan().any(), "NaN in U * S" assert not vh.isnan().any(), "NaN in V^T" assert not us.isinf().any(), "Inf in U * S" assert not vh.isinf().any(), "Inf in V^T" self.a.weight.data.copy_(vh.to(dtype)) self.b.weight.data.copy_(us.to(dtype)) def get_effective_weight(self) -> torch.Tensor | None: if self.rank == 0: return None elif self.rank < 0: return self.a.weight else: return self.b.weight @ self.a.weight def forward(self, input: torch.Tensor) -> torch.Tensor | None: if self.a is None: return None else: if input.ndim <= 3: return self.alpha * self.b(self.a(input)) else: assert input.ndim == 4 assert input.shape[-1] != self.in_features assert input.shape[1] == self.in_features # [B, C, H, W] -> [B, H, W, C] -> [B, H * W, C] B, C, H, W = input.shape input = input.permute(0, 2, 3, 1).reshape(B, H * W, C) output = self.alpha * self.b(self.a(input)) # [B, H * W, C] -> [B, H, W, C] -> [B, C, H, W] output = output.reshape(B, H, W, -1).permute(0, 3, 1, 2) return output def as_hook( self, input_packager: BaseInputPackager | None = None, output_packager: BaseOutputPackager | None = None, ) -> AccumBranchHook: """Wrap the module as a branch hook. Args: input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): Input packager. output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): Output packager. Returns: `AccumBranchHook`: The branch hook. """ return AccumBranchHook(self, input_packager=input_packager, output_packager=output_packager) ================================================ FILE: deepcompressor/nn/patch/sdpa.py ================================================ # -*- coding: utf-8 -*- """Sparse attention module.""" import typing as tp import torch import torch.nn as nn import torch.nn.functional as F __all__ = ["ScaleDotProductAttention"] class ScaleDotProductAttention(nn.Module): def forward( self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, attn_mask: tp.Optional[torch.Tensor] = None, dropout_p: float = 0.0, is_causal: bool = False, scale: tp.Optional[float] = None, ) -> torch.Tensor: return F.scaled_dot_product_attention( query, key, value, attn_mask=attn_mask, dropout_p=dropout_p, is_causal=is_causal, scale=scale ) ================================================ FILE: deepcompressor/nn/struct/__init__.py ================================================ # -*- coding: utf-8 -*- from .attn import * from .base import * ================================================ FILE: deepcompressor/nn/struct/attn.py ================================================ # -*- coding: utf-8 -*- """Transformer and attention module struct.""" import typing as tp from abc import abstractmethod from dataclasses import dataclass, field import torch.nn as nn from ...utils.common import join_name from .base import BaseModuleStruct __all__ = [ "AttentionStruct", "SelfAttentionStruct", "CrossAttentionStruct", "JointAttentionStruct", "FeedForwardStruct", "FeedForwardStruct", "TransformerBlockStruct", "BaseTransformerStruct", "AttentionConfigStruct", "FeedForwardConfigStruct", ] @dataclass(kw_only=True) class AttentionConfigStruct: """Attention module configuration. Args: hidden_size (`int`): The size (i.e., #channels) of the input/output activations. add_hidden_size (`int`): The size (i.e., #channels) of the additional activations. inner_size (`int`): The size of the inner activations, i.e., the number of **query** channels. num_query_heads (`int`): Number of query heads. num_key_value_heads (`int`): Number of key and value heads. with_qk_norm (`bool`, *optional*, defaults to `False`): Whether to apply normalization to queries and keys. with_rope (`bool`, *optional*, defaults to `True`): Whether to use Rotary Positional Encoding (RoPE). linear_attn (`bool`, *optional*, defaults to `False`): Whether to use linear attention. """ hidden_size: int add_hidden_size: int = 0 inner_size: int num_query_heads: int num_key_value_heads: int with_qk_norm: bool = False with_rope: bool = True linear_attn: bool = False @property def head_size(self) -> int: """Get the head size.""" return self.num_query_channels // self.num_query_heads @property def num_key_value_groups(self) -> int: """Get the number of key-value groups.""" return self.num_query_heads // self.num_key_value_heads @property def num_channels(self) -> int: """Get the number of channels for the input and output.""" return self.hidden_size @property def num_add_channels(self) -> int: """Get the number of channels for additional inputs.""" return self.add_hidden_size @property def num_query_channels(self) -> int: """Get the number of query channels.""" return self.inner_size @property def num_key_value_channels(self) -> int: """Get the number of key-value channels.""" return self.num_head_channels * self.num_key_value_heads @property def num_head_channels(self) -> int: """Get the head dimension.""" return self.head_size @property def num_head_repeats(self) -> int: """Get the number of head repeats.""" return self.num_key_value_groups @dataclass(kw_only=True) class FeedForwardConfigStruct: """Feed-forward module configuration. Args: hidden_size (`int`): The size of the input/output activations, i.e., the number of **input** channels. intermediate_size (`int`): The number of intermediate channels in the feedforward network. intermediate_act_type (`str`): The activation function for the intermediate activations in the feedforward network. num_experts (`int`, *optional*, defaults to `1`): Number of experts. Attributes: intermediate_lowerbound (`float` or `None`): The lowerbound of the intermediate activations. """ hidden_size: int intermediate_size: int intermediate_act_type: str num_experts: int = 1 @property def num_channels(self) -> int: """Get the model size.""" return self.hidden_size @property def num_intermediate_channels(self) -> int: """Get the intermediate size.""" return self.intermediate_size @property def intermediate_lowerbound(self) -> float | None: """The lowerbound of the intermediate activations.""" return self.infer_lowerbound(self.intermediate_act_type) @staticmethod def infer_lowerbound(act_type: str) -> float | None: if act_type.endswith("_glu"): return None elif act_type.endswith("_shifted"): return 0 elif act_type.startswith("relu"): return 0 elif act_type == "gelu": return -0.171875 # -0.17 elif act_type == "silu" or act_type == "swish": return -0.2734375 # -0.27 elif act_type == "mish": return -0.31640625 # -0.31 else: raise NotImplementedError(f"Unsupported activation type: {act_type}") @dataclass(kw_only=True) class AttentionStruct(BaseModuleStruct): """Attention module struct.""" # region relative keys qkv_proj_rkey: tp.ClassVar[str] = "qkv_proj" add_qkv_proj_rkey: tp.ClassVar[str] = "add_qkv_proj" out_proj_rkey: tp.ClassVar[str] = "out_proj" add_out_proj_rkey: tp.ClassVar[str] = "add_out_proj" q_rkey: tp.ClassVar[str] = "q" k_rkey: tp.ClassVar[str] = "k" v_rkey: tp.ClassVar[str] = "v" # endregion config: AttentionConfigStruct # region child modules q_proj: nn.Linear """Query projection.""" k_proj: nn.Linear | None """Key projection layer for self or joint attention.""" v_proj: nn.Linear | None """Value projection layer for self or joint attention.""" o_proj: nn.Linear """Output projection.""" add_q_proj: nn.Linear | None """Additional query projection layer for joint attention.""" add_k_proj: nn.Linear | None """Additional key projection layer for cross or joint attention.""" add_v_proj: nn.Linear | None """Additional value projection layer for cross or joint attention.""" add_o_proj: nn.Linear | None """Additional output projection.""" q: nn.Module """Module that generates queries for the attention mechanism.""" k: nn.Module """Module that generates keys for the attention mechanism.""" v: nn.Module """Module that generates values for the attention mechanism.""" # endregion # region relative names q_proj_rname: str k_proj_rname: str v_proj_rname: str o_proj_rname: str add_q_proj_rname: str add_k_proj_rname: str add_v_proj_rname: str add_o_proj_rname: str q_rname: str k_rname: str v_rname: str # endregion # region absolute names q_proj_name: str = field(init=False, repr=False) k_proj_name: str = field(init=False, repr=False) v_proj_name: str = field(init=False, repr=False) o_proj_name: str = field(init=False, repr=False) add_q_proj_name: str = field(init=False, repr=False) add_k_proj_name: str = field(init=False, repr=False) add_v_proj_name: str = field(init=False, repr=False) add_o_proj_name: str = field(init=False, repr=False) q_name: str = field(init=False, repr=False) k_name: str = field(init=False, repr=False) v_name: str = field(init=False, repr=False) # endregion # region absolute keys qkv_proj_key: str = field(init=False, repr=False) add_qkv_proj_key: str = field(init=False, repr=False) out_proj_key: str = field(init=False, repr=False) add_out_proj_key: str = field(init=False, repr=False) q_key: str = field(init=False, repr=False) k_key: str = field(init=False, repr=False) v_key: str = field(init=False, repr=False) # endregion # region aliases @property def qkv_proj(self) -> list[nn.Linear]: return [self.q_proj] if self.is_cross_attn() else [self.q_proj, self.k_proj, self.v_proj] @property def add_qkv_proj(self) -> list[nn.Linear]: if self.is_self_attn(): return [] elif self.is_cross_attn(): return [self.add_k_proj, self.add_v_proj] else: return [self.add_q_proj, self.add_k_proj, self.add_v_proj] @property def out_proj(self) -> nn.Linear: return self.o_proj @property def add_out_proj(self) -> nn.Linear: return self.add_o_proj @property def qkv_proj_rnames(self) -> list[str]: return ( [self.q_proj_rname] if self.is_cross_attn() else [self.q_proj_rname, self.k_proj_rname, self.v_proj_rname] ) @property def add_qkv_proj_rnames(self) -> list[str]: if self.is_self_attn(): return [] elif self.is_cross_attn(): return [self.add_k_proj_rname, self.add_v_proj_rname] else: return [self.add_q_proj_rname, self.add_k_proj_rname, self.add_v_proj_rname] @property def out_proj_rname(self) -> str: return self.o_proj_rname @property def add_out_proj_rname(self) -> str: return self.add_o_proj_rname @property def qkv_proj_names(self) -> list[str]: return [self.q_proj_name] if self.is_cross_attn() else [self.q_proj_name, self.k_proj_name, self.v_proj_name] @property def add_qkv_proj_names(self) -> list[str]: if self.is_self_attn(): return [] elif self.is_cross_attn(): return [self.add_k_proj_name, self.add_v_proj_name] else: return [self.add_q_proj_name, self.add_k_proj_name, self.add_v_proj_name] @property def out_proj_name(self) -> str: return self.o_proj_name @property def add_out_proj_name(self) -> str: return self.add_o_proj_name # endregion def __post_init__(self) -> None: super().__post_init__() assert self.o_proj is not None if self.add_k_proj is None: # self attention assert self.q_proj is not None and self.k_proj is not None and self.v_proj is not None assert self.add_q_proj is None and self.add_v_proj is None assert self.add_o_proj is None elif self.k_proj is None: # cross attention assert self.q_proj is not None and self.add_v_proj is not None assert self.add_q_proj is None and self.v_proj is None assert self.add_o_proj is None else: # joint attention assert self.q_proj is not None and self.add_q_proj is not None assert self.v_proj is not None and self.add_v_proj is not None # self.add_o_proj can be None or not for field_name in ( "q_proj", "k_proj", "v_proj", "o_proj", "add_q_proj", "add_k_proj", "add_v_proj", "add_o_proj", "q", "k", "v", ): rname = getattr(self, f"{field_name}_rname") if getattr(self, field_name) is not None or rname: assert rname, f"`{field_name}_rname` must not be empty if `{field_name}` is not None" setattr(self, f"{field_name}_name", join_name(self.name, rname)) else: setattr(self, f"{field_name}_name", "") self.qkv_proj_key = join_name(self.key, self.qkv_proj_rkey, sep="_") self.add_qkv_proj_key = join_name(self.key, self.add_qkv_proj_rkey, sep="_") self.out_proj_key = join_name(self.key, self.out_proj_rkey, sep="_") self.add_out_proj_key = join_name(self.key, self.add_out_proj_rkey, sep="_") self.q_key = join_name(self.key, self.q_rkey, sep="_") self.k_key = join_name(self.key, self.k_rkey, sep="_") self.v_key = join_name(self.key, self.v_rkey, sep="_") # region assertions if self.q_proj is not None: assert self.q_proj.weight.shape[1] == self.config.num_channels assert self.q_proj.weight.shape[0] == self.config.num_query_channels if self.add_q_proj is not None: assert self.add_q_proj.weight.shape[1] == self.config.num_add_channels assert self.add_q_proj.weight.shape[0] == self.config.num_query_channels if self.k_proj is not None: assert self.k_proj.weight.shape[1] == self.config.num_channels assert self.k_proj.weight.shape[0] == self.config.num_key_value_channels if self.add_k_proj is not None: assert self.add_k_proj.weight.shape[0] == self.config.num_key_value_channels assert self.add_k_proj.weight.shape[1] == self.config.num_add_channels if self.v_proj is not None: assert self.v_proj.weight.shape[1] == self.config.num_channels assert self.v_proj.weight.shape[0] == self.config.num_key_value_channels if self.add_v_proj is not None: assert self.add_v_proj.weight.shape[0] == self.config.num_key_value_channels assert self.add_v_proj.weight.shape[1] == self.config.num_add_channels if self.o_proj is not None: assert self.o_proj.weight.shape[1] == self.config.num_query_channels assert self.o_proj.weight.shape[0] == self.config.num_channels if self.add_o_proj is not None: assert self.add_o_proj.weight.shape[1] == self.config.num_query_channels assert self.add_o_proj.weight.shape[0] == self.config.num_add_channels # endregion def is_self_attn(self) -> bool: return self.add_k_proj is None def is_cross_attn(self) -> bool: return self.k_proj is None def is_joint_attn(self) -> bool: return self.add_k_proj is not None and self.k_proj is not None def filter_kwargs(self, kwargs: dict) -> dict: """Extract the keyword arguments that are relevant to the attention module.""" return kwargs def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: yield self.qkv_proj_key, self.q_proj_name, self.q_proj, self, "q_proj" if self.k_proj is not None: yield self.qkv_proj_key, self.k_proj_name, self.k_proj, self, "k_proj" if self.v_proj is not None: yield self.qkv_proj_key, self.v_proj_name, self.v_proj, self, "v_proj" if self.add_q_proj is not None: yield self.add_qkv_proj_key, self.add_q_proj_name, self.add_q_proj, self, "add_q_proj" if self.add_k_proj is not None: yield self.add_qkv_proj_key, self.add_k_proj_name, self.add_k_proj, self, "add_k_proj" if self.add_v_proj is not None: yield self.add_qkv_proj_key, self.add_v_proj_name, self.add_v_proj, self, "add_v_proj" yield self.out_proj_key, self.o_proj_name, self.o_proj, self, "o_proj" if self.add_o_proj is not None: yield self.add_out_proj_key, self.add_o_proj_name, self.add_o_proj, self, "add_o_proj" def iter_attention_structs(self) -> tp.Generator[tp.Self, None, None]: yield self @classmethod def get_default_keys(cls) -> list[str]: """Get the default keys.""" return [cls.qkv_proj_rkey, cls.add_qkv_proj_rkey, cls.out_proj_rkey, cls.add_out_proj_rkey] @dataclass(kw_only=True) class SelfAttentionStruct(AttentionStruct): """Self-attention module struct.""" # region child modules k_proj: nn.Linear """Key projection.""" v_proj: nn.Linear """Value projection.""" add_q_proj: None = field(init=False, repr=False, default=None) add_k_proj: None = field(init=False, repr=False, default=None) add_v_proj: None = field(init=False, repr=False, default=None) add_o_proj: None = field(init=False, repr=False, default=None) # endregion # region relative names add_q_proj_rname: str = field(init=False, repr=False, default="") add_k_proj_rname: str = field(init=False, repr=False, default="") add_v_proj_rname: str = field(init=False, repr=False, default="") add_o_proj_rname: str = field(init=False, repr=False, default="") # endregion @classmethod def get_default_keys(cls) -> list[str]: """Get the default keys.""" return [cls.qkv_proj_rkey, cls.out_proj_rkey] @dataclass(kw_only=True) class CrossAttentionStruct(AttentionStruct): """Cross-attention module struct.""" # region child modules k_proj: None = field(init=False, repr=False, default=None) v_proj: None = field(init=False, repr=False, default=None) add_q_proj: None = field(init=False, repr=False, default=None) add_k_proj: nn.Linear """Additional key projection.""" add_v_proj: nn.Linear """Additional value projection.""" add_o_proj: None = field(init=False, repr=False, default=None) # endregion # region relative names k_proj_rname: str = field(init=False, repr=False, default="") v_proj_rname: str = field(init=False, repr=False, default="") add_q_proj_rname: str = field(init=False, repr=False, default="") add_o_proj_rname: str = field(init=False, repr=False, default="") # endregion @classmethod def get_default_keys(cls) -> list[str]: """Get the default keys.""" return [cls.qkv_proj_rkey, cls.add_qkv_proj_rkey, cls.out_proj_rkey] @dataclass(kw_only=True) class JointAttentionStruct(AttentionStruct): """Joint-attention module struct.""" # region child modules k_proj: nn.Linear """Key projection.""" v_proj: nn.Linear """Value projection.""" add_q_proj: nn.Linear """Additional query projection.""" add_k_proj: nn.Linear """Additional key projection.""" add_v_proj: nn.Linear """Additional value projection.""" # endregion @dataclass(kw_only=True) class FeedForwardStruct(BaseModuleStruct): """Feed-forward module struct.""" # region relative keys up_proj_rkey: tp.ClassVar[str] = "up_proj" down_proj_rkey: tp.ClassVar[str] = "down_proj" moe_gate_rkey: tp.ClassVar[str] = "moe_gate" # endregion config: FeedForwardConfigStruct # region child modules up_projs: list[nn.Linear] """Up projections.""" down_projs: list[nn.Linear] """Down projections.""" moe_gate: nn.Linear | None """Mixture of experts gate.""" experts: list[nn.Module] """Expert modules.""" # endregion # region relative names up_proj_rnames: list[str] down_proj_rnames: list[str] moe_gate_rname: str experts_rname: str # endregion # region absolute names up_proj_names: list[str] = field(init=False, repr=False) down_proj_names: list[str] = field(init=False, repr=False) moe_gate_name: str = field(init=False, repr=False) experts_name: str = field(init=False, repr=False) expert_names: list[str] = field(init=False, repr=False) # endregion # region absolute keys up_proj_key: str = field(init=False, repr=False) down_proj_key: str = field(init=False, repr=False) moe_gate_key: str = field(init=False, repr=False) # endregion def __post_init__(self) -> None: super().__post_init__() num_experts = len(self.experts) assert len(self.up_projs) == num_experts * len(self.up_proj_rnames) assert len(self.down_projs) == num_experts * len(self.down_proj_rnames) if num_experts > 1: assert self.experts_rname, "experts name must be provided for MoE" assert self.moe_gate_rname, "moe gate name must be provided for MoE" assert self.moe_gate is not None, "moe gate must be provided for MoE" self.moe_gate_name = join_name(self.name, self.moe_gate_rname) self.experts_name = join_name(self.name, self.experts_rname) self.expert_names = [join_name(self.experts_name, str(e)) for e in range(num_experts)] else: assert self.moe_gate is None, "moe gate must be empty for non-MoE" self.experts_rname = self.experts_name = self.moe_gate_rname = self.moe_gate_name = "" self.expert_names = [self.name] self.up_proj_names = [ join_name(expert_name, rname) for rname in self.up_proj_rnames for expert_name in self.expert_names ] self.down_proj_names = [ join_name(expert_name, rname) for rname in self.down_proj_rnames for expert_name in self.expert_names ] self.up_proj_key = join_name(self.key, self.up_proj_rkey, sep="_") self.down_proj_key = join_name(self.key, self.down_proj_rkey, sep="_") self.moe_gate_key = join_name(self.key, self.moe_gate_rkey, sep="_") # region assertions assert num_experts == self.config.num_experts if self.moe_gate is not None: assert self.moe_gate.weight.shape[1] == self.config.num_channels for up_proj in self.up_projs: assert up_proj.weight.shape[1] == self.config.num_channels assert up_proj.weight.shape[0] in ( self.config.num_intermediate_channels, self.config.num_intermediate_channels * 2, # for fused GLU ) for down_proj in self.down_projs: assert down_proj.weight.shape[1] == self.config.num_intermediate_channels assert down_proj.weight.shape[0] == self.config.num_channels # endregion def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: if self.moe_gate is not None: yield self.moe_gate_key, self.moe_gate_name, self.moe_gate, self, "moe_gate" num_experts = self.config.num_experts for expert_idx in range(num_experts): for name, module in zip( self.up_proj_names[expert_idx::num_experts], self.up_projs[expert_idx::num_experts], strict=True ): yield self.up_proj_key, name, module, self, "up_proj" for name, module in zip( self.down_proj_names[expert_idx::num_experts], self.down_projs[expert_idx::num_experts], strict=True ): yield self.down_proj_key, name, module, self, "down_proj" @dataclass(kw_only=True) class TransformerBlockStruct(BaseModuleStruct): """Transformer block struct.""" # region relative keys attn_rkey: tp.ClassVar[str] = "attn" ffn_rkey: tp.ClassVar[str] = "ffn" add_ffn_rkey: tp.ClassVar[str] = "ffn_add" attn_struct_cls: tp.ClassVar[type[AttentionStruct]] = AttentionStruct ffn_struct_cls: tp.ClassVar[type[FeedForwardStruct]] = FeedForwardStruct # endregion parallel: bool """Whether the feed-forward modules are parallel to the attention modules.""" # region child modules pre_attn_norms: list[nn.Module] = field(repr=False) """Pre-attention normalization layers.""" pre_attn_add_norms: list[nn.Module] = field(repr=False) """Pre-attention additional normalization layers.""" attns: list[nn.Module] = field(repr=False) """Attention modules.""" post_attn_norms: list[nn.Module] = field(repr=False) """Post-attention normalization layers.""" post_attn_add_norms: list[nn.Module] = field(repr=False) """Post-attention additional normalization layers.""" pre_ffn_norm: nn.Module | None = field(repr=False) """Pre-feed-forward normalization.""" ffn: nn.Module | None = field(repr=False) """Feed-forward module.""" post_ffn_norm: nn.Module | None = field(repr=False) """Post-feed-forward normalization.""" pre_add_ffn_norm: nn.Module | None = field(repr=False) """Pre-additional-feed-forward normalization.""" add_ffn: nn.Module | None = field(repr=False) """Additional feed-forward module.""" post_add_ffn_norm: nn.Module | None = field(repr=False) """Post-additional-feed-forward normalization.""" # endregion # region relative names pre_attn_norm_rnames: list[str] pre_attn_add_norm_rnames: list[str] attn_rnames: list[str] post_attn_norm_rnames: list[str] post_attn_add_norm_rnames: list[str] pre_ffn_norm_rname: str ffn_rname: str post_ffn_norm_rname: str pre_add_ffn_norm_rname: str add_ffn_rname: str post_add_ffn_norm_rname: str # endregion # region absolute names pre_attn_norm_names: list[str] = field(init=False, repr=False) pre_attn_add_norm_names: list[str] = field(init=False, repr=False) attn_names: list[str] = field(init=False, repr=False) pre_ffn_norm_name: str = field(init=False, repr=False) ffn_name: str = field(init=False, repr=False) post_ffn_norm_name: str = field(init=False, repr=False) pre_add_ffn_norm_name: str = field(init=False, repr=False) add_ffn_name: str = field(init=False, repr=False) post_add_ffn_norm_name: str = field(init=False, repr=False) # endregion # region child structs attn_structs: list[AttentionStruct] = field(init=False, repr=False) ffn_struct: FeedForwardStruct | None = field(init=False, repr=False) add_ffn_struct: FeedForwardStruct | None = field(init=False, repr=False) # endregion def __post_init__(self) -> None: super().__post_init__() assert issubclass(self.attn_struct_cls, AttentionStruct) assert issubclass(self.ffn_struct_cls, FeedForwardStruct) # region assertions assert len(self.attns) == len(self.attn_rnames) assert len(self.pre_attn_norms) == len(self.pre_attn_norm_rnames) assert len(self.pre_attn_add_norms) == len(self.pre_attn_add_norm_rnames) assert len(self.post_attn_norms) == len(self.post_attn_norm_rnames) assert len(self.post_attn_add_norms) == len(self.post_attn_add_norm_rnames) if self.pre_attn_norms: assert len(self.pre_attn_norms) == len(self.attns) if self.post_attn_norms: assert len(self.post_attn_norms) == len(self.attns) # endregion self.pre_attn_norm_names = [join_name(self.name, rname) for rname in self.pre_attn_norm_rnames] self.pre_attn_add_norm_names = [join_name(self.name, rname) for rname in self.pre_attn_add_norm_rnames] self.attn_names = [join_name(self.name, rname) for rname in self.attn_rnames] self.post_attn_norm_names = [join_name(self.name, rname) for rname in self.post_attn_norm_rnames] self.post_attn_add_norm_names = [join_name(self.name, rname) for rname in self.post_attn_add_norm_rnames] self.pre_ffn_norm_name = join_name(self.name, self.pre_ffn_norm_rname) self.ffn_name = join_name(self.name, self.ffn_rname) self.post_ffn_norm_name = join_name(self.name, self.post_ffn_norm_rname) self.pre_add_ffn_norm_name = join_name(self.name, self.pre_add_ffn_norm_rname) self.add_ffn_name = join_name(self.name, self.add_ffn_rname) self.post_add_ffn_norm_name = join_name(self.name, self.post_add_ffn_norm_rname) self.attn_structs = [ self.attn_struct_cls.construct( attn, parent=self, fname="attn", rname=self.attn_rnames[idx], rkey=self.attn_rkey, idx=idx ) for idx, attn in enumerate(self.attns) ] if self.ffn is not None: self.ffn_struct = self.ffn_struct_cls.construct( self.ffn, parent=self, fname="ffn", rname=self.ffn_rname, rkey=self.ffn_rkey ) self.ffn = self.ffn_struct.module else: self.ffn_struct = None if self.add_ffn is not None: self.add_ffn_struct = self.ffn_struct_cls.construct( self.add_ffn, parent=self, fname="add_ffn", rname=self.add_ffn_rname, rkey=self.add_ffn_rkey ) self.add_ffn = self.add_ffn_struct.module else: self.add_ffn_struct = None if self.pre_attn_add_norms or self.post_attn_add_norms: assert len(self.attns) >= len(self.pre_attn_add_norms) and len(self.attns) >= len(self.post_attn_add_norms) for i, attn in enumerate(self.attn_structs): if i < len(self.pre_attn_add_norms): if attn.is_self_attn(): assert self.pre_attn_add_norms[i] is None, "self attention cannot have additional norm" elif i < len(self.post_attn_add_norms): if attn.is_self_attn(): assert self.post_attn_add_norms[i] is None, "self attention cannot have additional norm" else: assert attn.is_self_attn(), "cross or joint attention must have additional norm" else: assert all(attn.is_self_attn() for attn in self.attn_structs) def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: for attn_struct in self.attn_structs: yield from attn_struct.named_key_modules() if self.ffn_struct is not None: yield from self.ffn_struct.named_key_modules() if self.add_ffn_struct is not None: yield from self.add_ffn_struct.named_key_modules() def iter_attention_structs(self) -> tp.Generator[AttentionStruct, None, None]: for attn_struct in self.attn_structs: yield from attn_struct.iter_attention_structs() def iter_transformer_block_structs(self) -> tp.Generator[tp.Self, None, None]: yield self @dataclass(kw_only=True) class BaseTransformerStruct(BaseModuleStruct): """Base Transformer struct.""" # region relative keys proj_in_rkey: tp.ClassVar[str] = "proj_in" proj_out_rkey: tp.ClassVar[str] = "proj_out" # endregion # region child modules norm_in: nn.Module | None """Input normalization.""" proj_in: nn.Linear | None """Input projection.""" norm_out: nn.Module | None """Output normalization.""" proj_out: nn.Linear | None """Output projection.""" # endregion # region relative names norm_in_rname: str proj_in_rname: str norm_out_rname: str proj_out_rname: str # endregion # region absolute names norm_in_name: str = field(init=False, repr=False) proj_in_name: str = field(init=False, repr=False) norm_out_name: str = field(init=False, repr=False) proj_out_name: str = field(init=False, repr=False) # endregion # region absolute keys proj_in_key: str = field(init=False, repr=False) proj_out_key: str = field(init=False, repr=False) # endregion @property @abstractmethod def num_blocks(self) -> int: """Get the number of transformer blocks.""" ... @property @abstractmethod def block_structs(self) -> list[TransformerBlockStruct]: """Get the list of transformer block structs.""" ... @property @abstractmethod def block_names(self) -> list[str]: """Get the list of transformer block names.""" ... def __post_init__(self) -> None: super().__post_init__() for field_name in ("norm_in", "proj_in", "norm_out", "proj_out"): rname = getattr(self, f"{field_name}_rname") if getattr(self, field_name) is not None or rname: assert rname, f"{field_name} relative name must not be empty" setattr(self, f"{field_name}_name", join_name(self.name, rname)) else: setattr(self, f"{field_name}_name", "") self.proj_in_key = join_name(self.key, self.proj_in_rkey, sep="_") self.proj_out_key = join_name(self.key, self.proj_out_rkey, sep="_") def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, BaseModuleStruct, str], None, None]: if self.proj_in is not None: yield self.proj_in_key, self.proj_in_name, self.proj_in, self, "proj_in" for block in self.block_structs: yield from block.named_key_modules() if self.proj_out is not None: yield self.proj_out_key, self.proj_out_name, self.proj_out, self, "proj_out" def iter_attention_structs(self) -> tp.Generator[AttentionStruct, None, None]: for block in self.block_structs: yield from block.iter_attention_structs() def iter_transformer_block_structs(self) -> tp.Generator[TransformerBlockStruct, None, None]: for block in self.block_structs: yield from block.iter_transformer_block_structs() ================================================ FILE: deepcompressor/nn/struct/base.py ================================================ # -*- coding: utf-8 -*- """Utility functions for Module Struct.""" import types import typing as tp from abc import ABC, abstractmethod from dataclasses import dataclass, field import torch.nn as nn from ...utils.common import join_name __all__ = ["BaseModuleStruct"] @dataclass(kw_only=True) class BaseModuleStruct(ABC): _factories: tp.ClassVar[ dict[ type[nn.Module], tp.Callable[[nn.Module, tp.Optional["BaseModuleStruct"], str, str, str, int], tp.Self], ] ] module: nn.Module = field(repr=False, kw_only=False) """The nn.Module instance.""" parent: tp.Optional["BaseModuleStruct"] = field(repr=False, default=None) """The parent module struct that contains this module struct.""" fname: str = field(default="") """The field name in the parent module struct.""" idx: int = field(default=0) """The index of this module struct if it is in a list of the parent module struct.""" rname: str """The relative name of this module from the parent module.""" name: str = field(init=False, repr=False) """The absolute name of this module from the root module.""" rkey: str """The relative key of this module from the parent module.""" key: str = field(init=False, repr=False) """The absolute key of this module from the root module.""" def __post_init__(self) -> None: if self.parent is None: assert self.idx == 0, f"idx must be 0 if parent is None, got {self.idx}" assert not self.fname, f"field name must be empty if parent is None, got {self.fname}" assert not self.rname, f"relative name must be empty if parent is None, got {self.rname}" assert not self.rkey, f"relative key must be empty if parent is None, got {self.rkey}" self.name = self.rname self.key = self.rkey else: assert self.fname, f"field name must not be empty if parent is not None, got {self.fname}" self.name = join_name(self.parent.name, self.rname) self.key = join_name(self.parent.key, self.rkey, sep="_") if hasattr(self.parent, f"{self.fname}_names"): assert self.name == getattr(self.parent, f"{self.fname}_names")[self.idx] else: assert self.idx == 0, f"idx must be 0 if parent is not None and {self.fname}_names not found" assert self.name == getattr(self.parent, f"{self.fname}_name") def __call__(self, *args: tp.Any, **kwds: tp.Any) -> tp.Any: return self.module(*args, **kwds) @abstractmethod def named_key_modules(self) -> tp.Generator[tp.Tuple[str, str, nn.Module, "BaseModuleStruct", str], None, None]: """Yield (module_key, module_name, module, parent_struct, field_name) tuple.""" ... @classmethod def get_default_keys(cls) -> list[str]: """Get the default keys.""" return [] @classmethod def register_factory( cls, module_types: type[nn.Module] | tuple[type[nn.Module], ...], /, factory: tp.Callable[[nn.Module, tp.Optional["BaseModuleStruct"], str, str, str, int], tp.Self], *, overwrite: bool = False, ) -> None: """Register a factory that constructs a module struct from a module. Args: module_types (`type[nn.Module]` or `tuple[type[nn.Module], ...]`): The module type(s). factory (`Callable[[nn.Module, BaseModuleStruct, str, str, str, int], BaseModuleStruct]`): The factory function. overwrite (`bool`, *optional*, defaults to `False`): Whether to overwrite the existing factory for the module type(s). """ def unpack(module_types): if isinstance(module_types, tp._UnionGenericAlias) or isinstance(module_types, types.UnionType): args = [] for arg in module_types.__args__: args.extend(unpack(arg)) return args elif isinstance(module_types, tuple): args = [] for arg in module_types: args.extend(unpack(arg)) return args return [module_types] module_types = unpack(module_types) for module_type in module_types: # assert issubclass(module_type, nn.Module), f"{module_type} is not a subclass of nn.Module" if not hasattr(cls, "_factories"): cls._factories = {} if not overwrite: assert module_type not in cls._factories, f"factory for {module_type} already exists" cls._factories[module_type] = factory @classmethod def construct( cls, module: nn.Module, /, parent: tp.Optional["BaseModuleStruct"] = None, fname: str = "", rname: str = "", rkey: str = "", idx: int = 0, **kwargs, ) -> tp.Self: """Construct a module struct from a module. Args: module (`nn.Module`): The module instance. parent (`BaseModuleStruct` or `None`, *optional*, defaults to `None): The parent module struct that contains this module struct. rname (`str`, *optional*, defaults to `""`): The relative name of this module from the parent module. rkey (`str`, *optional*, defaults to `""`): The relative key of this module from the parent module. idx (`int`, *optional*, defaults to `0`): The index of this module struct if it is in a list of the parent module struct. Returns: `Self`: The module struct. """ factory = cls._factories[type(module)] return factory(module, parent=parent, fname=fname, rname=rname, rkey=rkey, idx=idx, **kwargs) ================================================ FILE: deepcompressor/quantizer/__init__.py ================================================ # -*- coding: utf-8 -*- from .processor import Quantizer ================================================ FILE: deepcompressor/quantizer/config/__init__.py ================================================ # -*- coding: utf-8 -*- from .base import BaseQuantizerConfig, DecomposedQuantizerConfig, ProgressiveQuantizerConfig, QuantizerConfig from .kernel import BaseKeyEnableQuantKernelConfig, BaseQuantKernelConfig from .lowrank import QuantLowRankConfig ================================================ FILE: deepcompressor/quantizer/config/base.py ================================================ # -*- coding: utf-8 -*- """Quantization kernel config.""" import typing as tp from abc import abstractmethod from dataclasses import dataclass, field import omniconfig import torch from omniconfig import configclass from ...data.dtype import QuantDataType from ...data.utils import DtypeUtils, ScaleUtils, ShapeUtils from ...data.zero import ZeroPointDomain from ...utils.config import EnableConfig __all__ = [ "BaseQuantizerConfig", "DecomposedQuantizerConfig", "QuantizerConfig", "ProgressiveQuantizerConfig", ] class BaseQuantizerConfig(EnableConfig): """Base Quantizer configuration.""" @property @abstractmethod def quant_dtype(self) -> QuantDataType | None: """The quantization data type.""" ... @property @abstractmethod def zero_domain(self) -> ZeroPointDomain | None: """The zero-point domain.""" ... @property @abstractmethod def largest_group_shape(self) -> tp.Sequence[int]: """The shape of the largest group.""" ... @property @abstractmethod def smallest_group_shape(self) -> tp.Sequence[int]: """The shape of the smallest group.""" ... def is_enabled(self) -> bool: """Whether the quantization configuration is enabled.""" return self.quant_dtype is not None @abstractmethod def decompose(self) -> "DecomposedQuantizerConfig": """Decompose the configuration to a list of simple configurations.""" ... def generate_dirnames( self, *, prefix: str = "", shape: torch.Size | tuple[int, ...] = (4096, 4096), default_dtype: torch.dtype = torch.float16, **kwargs, ) -> list[str]: """Generate the directory names of the quantization configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. shape (`torch.Size` or `tuple[int, ...]`, *optional*, defaults to `(4096, 4096)`): The shape of the tensor to be quantized. Returns: `list[str]`: The names of the quantization configuration. - The number of effective bits. - The name of the quantization data type. - The name of the group shapes. """ return self.decompose().generate_dirnames( prefix=prefix, shape=torch.Size(shape), default_dtype=default_dtype, **kwargs ) @dataclass(frozen=True) class DecomposedQuantizerConfig(BaseQuantizerConfig): steps: tuple["QuantizerConfig", ...] needs_dequant_saturation: bool = False @property def quant_dtype(self) -> QuantDataType | None: return self.steps[-1].dtype if self.steps else None @property def zero_domain(self) -> ZeroPointDomain | None: return self.steps[-1].zero_point if self.steps else None @property def largest_group_shape(self) -> tp.Sequence[int]: return self.steps[0].largest_group_shape if self.steps else (-1, -1, -1) @property def smallest_group_shape(self) -> tp.Sequence[int]: return self.steps[-1].smallest_group_shape if self.steps else (-1, -1, -1) @property def num_steps(self) -> int: return len(self.steps) def decompose(self) -> "DecomposedQuantizerConfig": return self def __eq__(self, value: object) -> bool: if not isinstance(value, DecomposedQuantizerConfig): return False if self.num_steps != value.num_steps: return False for rhs, lhs in zip(self.steps, value.steps, strict=True): # ! we only compare the dtype, group_shapes, and scale_dtypes if rhs.dtype != lhs.dtype: return False if rhs.group_shapes != lhs.group_shapes: return False if rhs.scale_dtypes != lhs.scale_dtypes: return False if self.num_steps > 1: if self.needs_dequant_saturation != value.needs_dequant_saturation: return False return True def _get_effective_bits( self, *, shape: torch.Size | tuple[int, ...] = (4096, 4096), default_dtype: torch.dtype = torch.float16 ) -> float: """Get the effective bits of the quantization. Args: shape (`torch.Size` or `tuple[int, ...]`, *optional*, defaults to `(4096, 4096)`): The shape of the tensor to be quantized. dtype (torch.dtype, *optional*, defaults to `torch.float16`): The dtype of the tensor to be quantized. Returns: `float`: The effective bits. """ shape = torch.Size(shape) if self.quant_dtype is None: return DtypeUtils.infer_dtype_bits(default_dtype) bits = self.quant_dtype.total_bits for step_config in self.steps: group_shapes = ShapeUtils.infer_group_shapes(step_config.group_shapes, shape=shape) scale_dtypes = ScaleUtils.infer_scale_dtypes(step_config.scale_dtypes, default_dtype=default_dtype) for group_shape, scale_dtype in zip(group_shapes, scale_dtypes, strict=True): bits += DtypeUtils.infer_dtype_bits(scale_dtype) / group_shape.numel() if self.zero_domain == ZeroPointDomain.PreScale: bits += self.quant_dtype.total_bits / group_shapes[-1].numel() elif self.zero_domain == ZeroPointDomain.PostScale: bits += DtypeUtils.infer_dtype_bits(scale_dtype) / group_shape.numel() return bits def _get_dtype_name(self, default_dtype: torch.dtype = torch.float16) -> str: """Get the name of the quantization data type. Args: default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The default_dtype dtype of the input tensor. Returns: `str`: The name of the quantization data type. """ if self.quant_dtype is None: return DtypeUtils.infer_dtype_name(default_dtype) name = DtypeUtils.infer_dtype_name(self.quant_dtype) if self.zero_domain == ZeroPointDomain.PreScale: name += ".z" elif self.zero_domain == ZeroPointDomain.PostScale: name += ".zp" return name def _get_group_shapes_name(self, default_dtype: torch.dtype = torch.float16) -> str: """Get the name of the group shapes. Args: default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The default_dtype dtype of the input tensor. Returns: str: The name of the group shapes. """ if self.quant_dtype is None: return f"tnsr.{DtypeUtils.infer_dtype_name(default_dtype)}" num_steps = len(self.steps) names = [] step_default_dtype = default_dtype for step, step_config in enumerate(self.steps): step_names = [] for group_shape, sdtype in zip(step_config.group_shapes, step_config.scale_dtypes, strict=True): name = f"{ShapeUtils.infer_group_shape_name(group_shape)}" name += f".{DtypeUtils.infer_dtype_name(sdtype or step_default_dtype)}" step_names.append(name) step_name = ".".join(reversed(step_names)) names.append(f"[{step_name}]" if step < num_steps - 2 else step_name) step_default_dtype = step_config.dtype assert step_default_dtype is not None, "step_default_dtype must not be None" return ".".join(reversed(names)) def generate_dirnames( self, *, prefix: str = "", shape: torch.Size | tuple[int, ...] = (4096, 4096), default_dtype: torch.dtype = torch.float16, **kwargs, ) -> list[str]: """Generate the directory names of the quantization configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. shape (`torch.Size` or `tuple[int, ...]`, *optional*, defaults to `(4096, 4096)`): The shape of the tensor to be quantized. default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The dtype of the tensor to be quantized. Returns: `list[str]`: The names of the quantization configuration. - The number of effective bits. - The name of the quantization data type. - The name of the group shapes. """ shape = torch.Size(shape) bits_str = str(int(self._get_effective_bits(shape=shape, default_dtype=default_dtype))) dtype_str = self._get_dtype_name(default_dtype=default_dtype) group_str = self._get_group_shapes_name(default_dtype=default_dtype) names = [bits_str, dtype_str, group_str] if prefix: names = [f"{prefix}.{name}" for name in names] return names @configclass @dataclass class QuantizerConfig(BaseQuantizerConfig): """Quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. """ dtype: QuantDataType | None = None zero_point: ZeroPointDomain | None = None group_shapes: tp.Sequence[tp.Sequence[int]] = field( default=((-1, -1, -1),), metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": lambda s: [int(n) for n in s.split(",")]}}, ) scale_dtypes: tp.Sequence[torch.dtype | QuantDataType | None] = field( default=(None,), metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": DtypeUtils.eval_dtype}} ) def __post_init__(self) -> None: self.group_shapes, self.scale_dtypes = ShapeUtils.format_group_configs( group_shapes=self.group_shapes, scale_dtypes=self.scale_dtypes ) if self.dtype is None: self.group_shapes, self.scale_dtypes = ((-1, -1, -1),), (None,) @property def quant_dtype(self) -> QuantDataType | None: """The final quantization data type.""" return self.dtype @property def zero_domain(self) -> ZeroPointDomain | None: """The final zero-point domain.""" return self.zero_point @property def largest_group_shape(self) -> tp.Sequence[int]: """The shape of the largest group.""" return self.group_shapes[0] @property def smallest_group_shape(self) -> tp.Sequence[int]: """The shape of the smallest group.""" return self.group_shapes[-1] def decompose(self) -> DecomposedQuantizerConfig: """Decompose the configuration to a list of simple configurations.""" return DecomposedQuantizerConfig(steps=(self,) if self.dtype is not None else ()) @configclass @dataclass class ProgressiveQuantizerConfig(QuantizerConfig): """Progressive Quantizer configuration. Args: dtype (`QuantDataType` or `None`, *optional*, defaults to `None`): The quantization data type. zero_point (`ZeroPointDomain` or `None`, *optional*, defaults to `None`): The zero-point domain. group_shapes (`Sequence[Sequence[int]]`, *optional*, defaults to `((-1, -1, -1),)`): The shapes for per-group quantization. scale_dtypes (`Sequence[torch.dtype | QuantDataType | None]`, *optional*, defaults to `(None,)`): The quantization scale data type for per-group quantization. intermediate_dtypes (`Sequence[QuantDataType]`, *optional*, defaults to `()`): The intermediate quantization data types. intermediate_levels (Sequence[int], *optional*, defaults to `()`): The intermediate quantization levels. needs_dequant_saturation (`bool`, *optional*, defaults to `False`): Whether the dequantization needs saturation. """ intermediate_dtypes: tp.Sequence[QuantDataType] = field( default_factory=tuple, metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": QuantDataType.from_str}} ) intermediate_levels: tp.Sequence[int] = field( default_factory=tuple, metadata={omniconfig.ARGPARSE_KWARGS: {"nargs": "+", "type": int}} ) needs_dequant_saturation: bool = False def __post_init__(self) -> None: super().__post_init__() if self.dtype is None: self.intermediate_dtypes = () self.intermediate_levels = () self.needs_dequant_saturation = False return num_levels = len(self.group_shapes) if isinstance(self.intermediate_dtypes, QuantDataType): self.intermediate_dtypes = (self.intermediate_dtypes,) if isinstance(self.intermediate_levels, int): self.intermediate_levels = (self.intermediate_levels,) self.intermediate_dtypes = tuple(self.intermediate_dtypes) self.intermediate_levels = tuple(level % num_levels for level in self.intermediate_levels) if len(self.intermediate_dtypes) == 0: self.intermediate_levels = () self.needs_dequant_saturation = False assert len(self.intermediate_dtypes) == len(self.intermediate_levels) assert len(self.intermediate_levels) < num_levels assert all(isinstance(dtype, QuantDataType) for dtype in self.intermediate_dtypes) assert all(level < num_levels - 1 for level in self.intermediate_levels) def decompose(self) -> DecomposedQuantizerConfig: """Decompose the configuration to a list of simple configurations.""" if self.dtype is None: return DecomposedQuantizerConfig(steps=()) elif len(self.intermediate_dtypes) == 0: return DecomposedQuantizerConfig(steps=(self,)) else: steps = [] prev_level = 0 for level, dtype in zip(self.intermediate_levels, self.intermediate_dtypes, strict=True): steps.append( QuantizerConfig( dtype=dtype, zero_point=None, group_shapes=self.group_shapes[prev_level : level + 1], scale_dtypes=self.scale_dtypes[prev_level : level + 1], ) ) prev_level = level + 1 steps.append( QuantizerConfig( dtype=self.dtype, zero_point=self.zero_point, group_shapes=self.group_shapes[prev_level:], scale_dtypes=self.scale_dtypes[prev_level:], ) ) return DecomposedQuantizerConfig(steps=tuple(steps), needs_dequant_saturation=self.needs_dequant_saturation) ================================================ FILE: deepcompressor/quantizer/config/kernel.py ================================================ # -*- coding: utf-8 -*- """Quantizatizer kernel configurations.""" from abc import ABC, abstractmethod from dataclasses import dataclass, field, fields import torch from omniconfig import configclass from ...data.dtype import QuantDataType from ...data.range import QuantRange from ...data.zero import ZeroPointDomain from ...utils.config import EnableConfig, IncludeBasedConfig, KeyEnableConfig __all__ = ["BaseQuantKernel", "BaseQuantKernelConfig", "BaseKeyEnableQuantKernelConfig"] class BaseQuantKernel(ABC): """Quantization kernel.""" @abstractmethod def quantize( self, tensor: torch.Tensor, *, view_shape: torch.Size, quant_dtype: QuantDataType, zero_domain: ZeroPointDomain | None, scale: torch.Tensor, zero: torch.Tensor, quant_range: QuantRange | None = None, **kwargs, ) -> torch.Tensor: """Quantize the tensor. Args: tensor (`torch.Tensor`): The tensor to quantize. view_shape (`torch.Size`): The view shape when quantizing the tensor. quant_dtype (`QuantDataType`): The quantization data type. zero_domain (`ZeroPointDomain` or `None`): The zero point domain. scale (`torch.Tensor`): The scale tensor. zero (`torch.Tensor`): The zero point tensor. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. **kwargs: Other keyword arguments for the quantization kernel. Returns: `torch.Tensor`: The quantized tensor in the shape of ``view_shape``. """ ... class BaseQuantKernelConfig(ABC): """Base quantization kernel configuration.""" @property @abstractmethod def name(self) -> str: """The name of the quantization kernel.""" ... @abstractmethod def build(self) -> BaseQuantKernel: """Build the quantization kernel.""" ... @abstractmethod def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. Returns: `list[str]`: The directory names. """ ... @configclass @dataclass class BaseKeyEnableQuantKernelConfig(KeyEnableConfig, EnableConfig): """Configuration for quantization kernel.""" _names: list[str] = field(init=False, repr=False, compare=False, default_factory=list) _kernels: dict[str, BaseQuantKernelConfig | None] = field( init=False, repr=False, compare=False, default_factory=dict ) def __post_init__(self) -> None: self.organize() def is_enabled(self) -> bool: return bool(self._kernels) def is_enabled_for(self, key: str) -> bool: return key in self._kernels def specialize_for(self, key: str) -> BaseQuantKernelConfig | None: """Get the kernel configuration for the module key. Args: key (`str`): The key. Returns: `QuantKernelConfig` or `None`: The kernel configuration for the key. """ return self._kernels.get(key, None) def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. Returns: `list[str]`: The directory names. """ names = [] if self.is_enabled(): for name in self._names: config: IncludeBasedConfig = getattr(self, name) if config is not None and config.is_enabled(): names.extend(config.generate_dirnames(prefix=prefix, **kwargs)) return names def organize(self) -> None: """Organize the configuration.""" self._kernels.clear() for _field in fields(self): name = _field.name if name.startswith("_"): continue self._names.append(name) config = getattr(self, name) if config is not None: assert isinstance( config, IncludeBasedConfig ), f"Field '{name}' must be an instance of IncludeBasedConfig." assert isinstance( config, BaseQuantKernelConfig ), f"Field '{name}' must be an instance of BaseQuantKernelConfig." if config.is_enabled(): for key in config.includes: assert ( key not in self._kernels ), f"Key '{key}' is already included in other kernel configurations." self._kernels[key] = config else: setattr(self, name, None) continue ================================================ FILE: deepcompressor/quantizer/config/lowrank.py ================================================ # -*- coding: utf-8 -*- from dataclasses import dataclass from omniconfig import configclass from ...utils.common import num2str from ...utils.config import EnableConfig __all__ = ["QuantLowRankConfig"] @configclass @dataclass class QuantLowRankConfig(EnableConfig): """Quantization low-rank branch configuration. Args: rank (`int`, *optional*, defaults to `32`): The rank of the low-rank branch. exclusive (`bool`, *optional*, defaults to `False`): Whether to use exclusive low-rank branch for each weight sharing the inputs. compensate (`bool`, *optional*, defaults to `False`): Whether the low-rank branch compensates the quantization error. """ rank: int = 32 exclusive: bool = False compensate: bool = False def is_enabled(self) -> bool: return self.rank != 0 def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Returns: list[str]: The directory names. """ if not self.is_enabled(): return [] name = f"r{num2str(self.rank)}" if self.exclusive: name += ".exclusive" if self.compensate: name += ".compensate" return [f"{prefix}.{name}" if prefix else name] ================================================ FILE: deepcompressor/quantizer/impl/__init__.py ================================================ ================================================ FILE: deepcompressor/quantizer/impl/base.py ================================================ # -*- coding: utf-8 -*- """Quantizer.""" import typing as tp from dataclasses import dataclass, field import torch from ...data.range import DynamicRange, QuantRange, RangeBound from ...data.scale import QuantScale from ...data.tensor import QuantTensor from ...data.zero import ZeroPointDomain from ...utils.config import KeyEnableConfig from ..config.base import BaseQuantizerConfig from ..config.kernel import BaseQuantKernel, BaseQuantKernelConfig from ..kernel.rtn import QuantRtnKernel from .info import QuantInfo __all__ = ["QuantizerImpl"] @dataclass class QuantizerImpl: """Quantizer implementation. Args: config (`BasicQuantizerConfig` or `None`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. Attributes: info (`QuantInfo` or `None`): The quantization information. """ config: BaseQuantizerConfig | None key: str = "" info: QuantInfo | None = field(init=False, default=None) def is_enabled(self) -> bool: """Whether the quantizer is enabled.""" if self.config is None: return False if isinstance(self.config, KeyEnableConfig): return self.config.is_enabled_for(self.key) return self.config.is_enabled() def quantize( self, tensor: torch.Tensor, *, kernel: BaseQuantKernel | BaseQuantKernelConfig | None = None, channels_dim: int | None = None, # scale-based quantization arguments scale: torch.Tensor | tp.Sequence[torch.Tensor] | None = None, zero: torch.Tensor | None = None, # range-based quantization arguments dynamic_range: DynamicRange | tp.Sequence[DynamicRange] | None = None, # other arguments range_bound: RangeBound | None = None, quant_range: QuantRange | None = None, return_with_dequant: bool = True, return_with_quant: bool = False, default_dtype: torch.dtype | None = torch.float16, develop_dtype: torch.dtype = torch.float32, **kwargs, ) -> QuantTensor: """Quantize a floating point tensor. Args: tensor (`torch.Tensor`): The floating-point tensor to be quantized. kernel (`QuantKernel` or `QuantKernelConfig` or `None`, *optional*, defaults to `None`): The quantization kernel or its configuration. channels_dim (`int` or `None`, *optional*, defaults to `None`): The dimension of (input) channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. return_with_dequant (`bool`, *optional*, defaults to `True`): Whether to return with dequantized tensor. return_with_quant (`bool`, *optional*, defaults to `False`): Whether to return with quantized tensor. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `torch.float16`): The default dtype for scale. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The develop dtype. **kwargs: Other keyword arguments for the quantization kernel. For example, ``inputs`` for the input tensors in GPTQ kernel, ``round_delta`` for the rounding delta in the RTN kernel. Returns: `QuantTensor`: The quantized tensor. """ shape = tensor.shape if channels_dim is not None: tensor = tensor.reshape(-1, *shape[channels_dim:]) round_delta = kwargs.pop("round_delta", None) if round_delta is not None: round_delta = round_delta.view(-1, *shape[channels_dim:]) result = self._quantize( tensor, kernel=kernel, scale=scale, zero=zero, dynamic_range=dynamic_range, range_bound=range_bound, quant_range=quant_range, round_delta=round_delta, return_with_dequant=return_with_dequant, return_with_quant=return_with_quant, default_dtype=default_dtype or tensor.dtype, develop_dtype=develop_dtype, **kwargs, ) if result.data is not None: result._dequantized = result.data.view(shape) if result.qdata is not None: result._quantized = result.qdata.view(shape) return result def _quantize( # noqa: C901 self, tensor: torch.Tensor, *, kernel: BaseQuantKernel | BaseQuantKernelConfig | None = None, # scale-based quantization arguments scale: torch.Tensor | tp.Sequence[torch.Tensor | None] | None = None, zero: torch.Tensor | None = None, # range-based quantization arguments dynamic_range: DynamicRange | tp.Sequence[DynamicRange | None] | None = None, # other arguments range_bound: RangeBound | None = None, quant_range: QuantRange | None = None, round_delta: torch.Tensor | None = None, return_with_dequant: bool = True, return_with_quant: bool = False, default_dtype: torch.dtype = torch.float16, develop_dtype: torch.dtype = torch.float32, **kwargs, ) -> QuantTensor: """Quantize a floating point tensor. Args: tensor (`torch.Tensor`): The floating-point tensor to be quantized. kernel (`QuantKernel` or `QuantKernelConfig` or `None`, *optional*, defaults to `None`): The quantization kernel or its configuration. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. return_with_dequant (`bool`, *optional*, defaults to `True`): Whether to return with dequantized tensor. return_with_quant (`bool`, *optional*, defaults to `False`): Whether to return with quantized tensor. default_dtype (`torch.dtype`, *optional*, defaults to `torch.float16`): The default dtype for scale. develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The develop dtype. **kwargs: Other keyword arguments for the quantization kernel. For example, ``inputs`` for the input tensors in GPTQ kernel, ``round_delta`` for the rounding delta in the RTN kernel. Returns: `QuantTensor`: The quantized tensor. """ shape, dtype = tensor.shape, tensor.dtype self.update(shape, default_dtype, quant_range, range_bound) if self.info is None or self.info.num_steps == 0: return QuantTensor(dequantized=tensor, quantized=tensor, view_shape=shape) # region check scale and dynamic_range arguments num_steps = self.info.num_steps if scale is None: scale = (None,) * num_steps elif not isinstance(scale, tp.Sequence): scale = (scale,) if dynamic_range is None: dynamic_range = (None,) * num_steps elif isinstance(dynamic_range, DynamicRange): if not dynamic_range.is_set(): dynamic_range = (None,) * num_steps else: dynamic_range = (dynamic_range,) assert isinstance(scale, (tuple, list)), "scale must be a tuple or list." assert len(scale) == num_steps, "scale must have the same length as infos." assert isinstance(dynamic_range, (tuple, list)), "dynamic_range must be a tuple or list." assert len(dynamic_range) == num_steps, "dynamic_range must have the same length as infos." # endregion # region compute and quantize the scales and zero point for quantization quant_scale = QuantScale() develop_tensor = tensor.to(dtype=develop_dtype) if dtype != develop_dtype else tensor.clone() for step, (step_info, step_scale, step_dynamic_range) in enumerate( zip(self.info.steps, scale, dynamic_range, strict=True) ): step_scale, step_zero = step_info.scale.quantize( scale=step_scale, zero=None if step < num_steps - 1 else zero, tensor=develop_tensor, dynamic_range=step_dynamic_range, ) quant_scale.append(step_scale) if step < num_steps - 1: step_quant_range = step_info.tensor_quant_range develop_tensor = develop_tensor.view(step_info.tensor_view_shape).div_(step_scale.data).view(shape) develop_tensor = develop_tensor.clamp_(min=step_quant_range.min, max=step_quant_range.max) quant_zero = step_zero # endregion # region quantize the tensor assert isinstance(step_scale, QuantScale), "The last scale must be a QuantScale." assert isinstance(step_zero, torch.Tensor), "The last zero point must be a tensor." if round_delta is not None: if round_delta.shape[0] == 1: round_delta = round_delta.view(1, 1, *step_info.tensor_view_shape[2:]) else: round_delta = round_delta.view(step_info.tensor_view_shape) if isinstance(kernel, BaseQuantKernelConfig): kernel = kernel.build() kernel = kernel or QuantRtnKernel() develop_tensor = kernel.quantize( tensor=develop_tensor, view_shape=step_info.tensor_view_shape, quant_dtype=step_info.quant_dtype, zero_domain=step_info.zero_domain, scale=step_scale.data, zero=step_zero, quant_range=step_info.quant_range, range_bound=step_info.range_bound, round_delta=round_delta, **kwargs, ) assert not develop_tensor.isnan().any(), "Quantized tensor contains NaN." assert not develop_tensor.isinf().any(), "Quantized tensor contains Inf." # endregion # region update the quantized tensor quantized = None if return_with_quant: quantized = develop_tensor.detach() if return_with_dequant: quantized = develop_tensor.clone() quantized = develop_tensor.view(shape) # endregion # region update the dequantized tensor dequantized = None if return_with_dequant: dequantized = develop_tensor if self.config.zero_domain == ZeroPointDomain.PreScale: dequantized = dequantized.sub_(step_zero) dequantized = dequantized.mul_(step_scale.data) if self.config.zero_domain == ZeroPointDomain.PostScale: dequantized = dequantized.sub_(step_zero) for step in range(num_steps - 2, -1, -1): step_info, step_scale = self.info.get_child(step), quant_scale.get_child(step) step_min, step_max = step_info.quant_dtype.min_value, step_info.quant_dtype.max_value if self.info.needs_dequant_saturation or step < num_steps - 2: dequantized = dequantized.clamp_(min=step_min, max=step_max) else: assert dequantized.max() <= step_max, "Quantized tensor exceeds maximum value." assert dequantized.min() >= step_min, "Quantized tensor exceeds minimum value." dequantized = dequantized.view(step_info.tensor_view_shape).mul_(step_scale.data) dequantized = dequantized.view(shape).to(dtype=dtype) # endregion return QuantTensor( dequantized=dequantized, quantized=quantized, scale=quant_scale if return_with_quant else None, zero=quant_zero if return_with_quant else None, view_shape=self.info.steps[-1].tensor_view_shape if return_with_quant else None, ) def update( self, tensor_shape: torch.Size, default_dtype: torch.dtype | None, quant_range: QuantRange | None, range_bound: RangeBound | None, ) -> QuantInfo | None: """Update the quantization information. Args: tensor_shape (`torch.Size`): The shape of the tensor. default_dtype (`torch.dtype` or `None`): The default data type of the scale. quant_range (`QuantRange` or `None`): The quantization range. range_bound (`RangeBound` or `None`): The range bound. Returns: `QuantInfo` or `None`: The updated quantization. If the quantizer is disabled, return `None`. """ if not self.is_enabled(): self.info = None else: config = self.config.decompose() assert default_dtype is not None, "default_dtype must be set." if self.info is None or self.info.is_outdated( config, tensor_shape, default_dtype, quant_range, range_bound ): self.info = QuantInfo.construct( config, tensor_shape, default_dtype, quant_range=quant_range, range_bound=range_bound ) return self.info ================================================ FILE: deepcompressor/quantizer/impl/info.py ================================================ # -*- coding: utf-8 -*- """Quantization information class.""" from dataclasses import dataclass, field import torch from ...data.dtype import QuantDataType from ...data.range import ProtectiveQuantRange, QuantRange, RangeBound from ...data.utils import ShapeUtils from ...data.zero import ZeroPointDomain from ..config.base import DecomposedQuantizerConfig, QuantizerConfig from .scale import QuantScaleInfo __all__ = ["QuantScaleInfo", "QuantStepInfo", "QuantInfo"] @dataclass class QuantStepInfo: # region config quant_dtype: QuantDataType zero_domain: ZeroPointDomain | None group_shapes: tuple[tuple[int, ...], ...] scale_dtypes: tuple[torch.dtype | QuantDataType | None, ...] quant_range: QuantRange | None range_bound: RangeBound | None default_dtype: torch.dtype # endregion # region information tensor_shape: torch.Size """the shape is a torch.Size (s0, s1, ...)""" tensor_group_shapes: list[torch.Size] """each group shape is a torch.Size (gs0, gs1, ...)""" tensor_view_shape: torch.Size """the view shape is a torch.Size (#g0, gs0, #g1, gs1, ...)""" # endregion scale: QuantScaleInfo = field(init=False) def __post_init__(self): self.scale = QuantScaleInfo( tensor_view_shape=self.tensor_view_shape, tensor_quant_dtype=self.quant_dtype, tensor_zero_domain=self.zero_domain, tensor_quant_range=self.quant_range, tensor_range_bound=self.range_bound, scale_view_shapes=ShapeUtils.infer_scale_view_shapes(self.tensor_group_shapes, shape=self.tensor_shape), scale_quant_dtypes=self.scale_dtypes, default_quant_dtype=self.default_dtype, ) @property def tensor_zero_domain(self) -> ZeroPointDomain | None: return self.scale.tensor_zero_domain @property def tensor_quant_range(self) -> QuantRange: """The intersection of the quant_range and quant_dtype.""" return self.scale.tensor_quant_range @property def tensor_range_bound(self) -> RangeBound | None: return self.scale.tensor_range_bound def to_config(self) -> QuantizerConfig: return QuantizerConfig( dtype=self.quant_dtype, zero_point=self.zero_domain, group_shapes=self.tensor_group_shapes, scale_dtypes=self.scale.scale_quant_dtypes, ) @staticmethod def construct( config: QuantizerConfig, tensor_shape: torch.Size, default_dtype: torch.dtype, quant_range: QuantRange | None = None, range_bound: RangeBound | None = None, ) -> "QuantStepInfo": tensor_group_shapes = ShapeUtils.infer_group_shapes(config.group_shapes, shape=tensor_shape) tensor_view_shape = ShapeUtils.infer_view_shape(tensor_shape, group_shape=tensor_group_shapes[-1]) return QuantStepInfo( quant_dtype=config.dtype, zero_domain=config.zero_point, group_shapes=config.group_shapes, scale_dtypes=config.scale_dtypes, quant_range=quant_range, range_bound=range_bound, default_dtype=default_dtype, tensor_shape=tensor_shape, tensor_group_shapes=tensor_group_shapes, tensor_view_shape=tensor_view_shape, ) @dataclass class QuantInfo: steps: tuple[QuantStepInfo, ...] needs_dequant_saturation: bool = False @property def num_steps(self) -> int: return len(self.steps) def get_child(self, idx: int) -> QuantStepInfo: return self.steps[idx] def is_outdated( self, config: DecomposedQuantizerConfig, tensor_shape: torch.Size, default_dtype: torch.dtype, quant_range: QuantRange | None = None, range_bound: RangeBound | None = None, ) -> bool: """Check if the current quantization information is outdated.""" if self.num_steps != config.num_steps: return True for step_info, step_config in zip(self.steps, config.steps, strict=True): if step_info.quant_dtype != step_config.quant_dtype: return True if step_info.group_shapes != step_config.group_shapes: return True if step_info.scale_dtypes != step_config.scale_dtypes: return True if self.num_steps > 0: first_step = self.steps[0] if first_step.tensor_shape != tensor_shape: return True if first_step.default_dtype != default_dtype: return True if first_step.range_bound != range_bound: return True if self.steps[-1].quant_range != quant_range: return True if self.num_steps > 1 and self.needs_dequant_saturation != config.needs_dequant_saturation: return True return False @staticmethod def construct( config: DecomposedQuantizerConfig, tensor_shape: torch.Size, default_dtype: torch.dtype, quant_range: QuantRange | None = None, range_bound: RangeBound | None = None, ) -> "QuantInfo": steps: list[QuantStepInfo] = [] num_steps = config.num_steps step_default_dtype = default_dtype step_range_bound = range_bound for step, step_config in enumerate(config.steps): assert step_config.quant_dtype is not None, f"quant_dtype is required for step {step}" if step == num_steps - 1: step_quant_range = quant_range elif step < num_steps - 2 or config.needs_dequant_saturation: step_quant_range = None else: # ! only second last step quantization can be protected without saturation in the computation step_quant_range = ProtectiveQuantRange.construct( outer_dtype=step_config.quant_dtype, inner_dtype=config.steps[-1].quant_dtype, zero_domain=config.steps[-1].zero_domain, inner_quant_range=quant_range, ) steps.append( QuantStepInfo.construct( step_config, tensor_shape=tensor_shape, default_dtype=step_default_dtype, quant_range=step_quant_range, range_bound=step_range_bound, ) ) step_default_dtype = step_config.quant_dtype step_range_bound = steps[-1].scale.tensor_quant_range return QuantInfo(steps=tuple(steps), needs_dequant_saturation=config.needs_dequant_saturation) ================================================ FILE: deepcompressor/quantizer/impl/scale.py ================================================ # -*- coding: utf-8 -*- """Quantization scale module.""" import math import typing as tp from dataclasses import dataclass, field import torch from ...data.dtype import QuantDataType from ...data.range import DynamicRange, QuantRange, RangeBound from ...data.scale import QuantScale from ...data.utils import ScaleUtils from ...data.zero import ZeroPointDomain from .simple import simple_quantize __all__ = ["quantize_scale", "QuantScaleInfo"] def quantize_scale( s: torch.Tensor, /, *, quant_dtypes: tp.Sequence[QuantDataType], quant_spans: tp.Sequence[float], view_shapes: tp.Sequence[torch.Size], ) -> QuantScale: """Quantize the scale tensor. Args: s (`torch.Tensor`): The scale tensor. quant_dtypes (`Sequence[QuantDataType]`): The quantization dtypes of the scale tensor. quant_spans (`Sequence[float]`): The quantization spans of the scale tensor. view_shapes (`Sequence[torch.Size]`): The view shapes of the scale tensor. Returns: `QuantScale`: The quantized scale tensor. """ scale = QuantScale() s = s.abs() for view_shape, quant_dtype, quant_span in zip(view_shapes[:-1], quant_dtypes[:-1], quant_spans[:-1], strict=True): s = s.view(view_shape) # (#g0, rs0, #g1, rs1, #g2, rs2, ...) ss = s.amax(dim=list(range(1, len(view_shape), 2)), keepdim=True) # i.e., s_dynamic_span ss = simple_quantize( ss / quant_span, has_zero_point=False, quant_dtype=quant_dtype ) # i.e., s_scale = s_dynamic_span / s_quant_span s = s / ss scale.append(ss) view_shape = view_shapes[-1] s = s.view(view_shape) if any(v != 1 for v in view_shape[1::2]): ss = s.amax(dim=list(range(1, len(view_shape), 2)), keepdim=True) ss = simple_quantize(ss / quant_spans[-1], has_zero_point=False, quant_dtype=quant_dtypes[-1]) else: assert quant_spans[-1] == 1, "The last quant span must be 1." ss = simple_quantize(s, has_zero_point=False, quant_dtype=quant_dtypes[-1]) scale.append(ss) scale.remove_zero() return scale @dataclass class QuantScaleInfo: # region tensor information tensor_view_shape: torch.Size tensor_quant_dtype: torch.dtype | QuantDataType tensor_zero_domain: ZeroPointDomain | None tensor_quant_range: QuantRange tensor_range_bound: RangeBound | None # endregion default_quant_dtype: torch.dtype | QuantDataType scale_view_shapes: list[torch.Size] scale_quant_dtypes: list[torch.dtype | QuantDataType] exponent_scale_level: int = field(init=False) zero_quant_dtype: torch.dtype | QuantDataType = field(init=False) # region linear scale information linear_tensor_quant_span: float = field(init=False) linear_scale_quant_dtypes: list[torch.dtype | QuantDataType] = field(init=False) linear_scale_view_shapes: list[torch.Size] = field(init=False) linear_scale_quant_spans: list[float] = field(init=False) # endregion # region exponent scale information exponent_tensor_quant_span: float = field(init=False) exponent_scale_quant_dtypes: list[torch.dtype | QuantDataType] = field(init=False) exponent_scale_view_shapes: list[torch.Size] = field(init=False) exponent_scale_quant_spans: list[float] = field(init=False) # endregion @property def has_zero_point(self) -> bool: return self.tensor_zero_domain is not None def __post_init__(self): if isinstance(self.tensor_quant_dtype, torch.dtype): raise NotImplementedError("torch.dtype is not supported yet.") self.tensor_quant_range = QuantRange.construct( self.tensor_quant_dtype, has_zero_point=self.has_zero_point, quant_range=self.tensor_quant_range ) self.scale_quant_dtypes = ScaleUtils.infer_scale_dtypes(self.scale_quant_dtypes, self.default_quant_dtype) self.exponent_scale_level = ScaleUtils.infer_exponent_scale_level(self.scale_quant_dtypes) if self.has_zero_point: if self.tensor_zero_domain == ZeroPointDomain.PreScale: self.zero_quant_dtype = self.tensor_quant_dtype elif self.tensor_zero_domain == ZeroPointDomain.PostScale: # TODO: fix zero quant dtype (signed or unsigned) self.zero_quant_dtype = self.scale_quant_dtypes[-1] if isinstance(self.zero_quant_dtype, QuantDataType) and self.zero_quant_dtype.is_exponent: self.zero_quant_dtype = self.default_quant_dtype else: raise ValueError(f"Unsupported zero point domain: {self.tensor_zero_domain}") self.linear_tensor_quant_span = self.tensor_quant_range.max - self.tensor_quant_range.min self.exponent_tensor_quant_span = 2 ** int( math.log2(self.tensor_quant_range.max) + int(self.tensor_quant_dtype.signed) ) else: self.zero_quant_dtype = None self.linear_tensor_quant_span = self.tensor_quant_range.max self.exponent_tensor_quant_span = 2 ** int(math.log2(self.tensor_quant_range.max)) if self.exponent_scale_level >= 0 and self.exponent_scale_level < len(self.scale_quant_dtypes): lin_s_dtypes = self.scale_quant_dtypes[: self.exponent_scale_level] exp_s_dtypes = self.scale_quant_dtypes[self.exponent_scale_level :] lin_s_view_shapes = self.scale_view_shapes[: self.exponent_scale_level] exp_s_view_shapes = self.scale_view_shapes[self.exponent_scale_level :] exp_s_spans = ScaleUtils.infer_scale_quant_spans(exp_s_dtypes) lin_s_spans = ScaleUtils.infer_scale_quant_spans(lin_s_dtypes, base=exp_s_spans[-1]) if lin_s_dtypes else [] else: lin_s_dtypes, exp_s_dtypes = self.scale_quant_dtypes, [] lin_s_view_shapes, exp_s_view_shapes = self.scale_view_shapes, [] lin_s_spans, exp_s_spans = ScaleUtils.infer_scale_quant_spans(lin_s_dtypes), [] self.linear_scale_quant_dtypes = lin_s_dtypes self.linear_scale_view_shapes = lin_s_view_shapes self.linear_scale_quant_spans = lin_s_spans self.exponent_scale_quant_dtypes = exp_s_dtypes self.exponent_scale_view_shapes = exp_s_view_shapes self.exponent_scale_quant_spans = exp_s_spans def quantize( self, *, # scale-based quantization related arguments scale: torch.Tensor | None = None, zero: torch.Tensor | None = None, # range-based quantization related arguments tensor: torch.Tensor | None = None, dynamic_range: DynamicRange | None = None, ) -> tuple[QuantScale, torch.Tensor]: """Get the quantization scale and zero point of the tensor to be quantized. Args: scale (`torch.Tensor` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. tensor (`torch.Tensor` or `None`, *optional*, defaults to `None`): Ten tensor to be quantized. This is only used for range-based quantization. dynamic_range (`DynamicRange` or `None`, *optional*, defaults to `None`): The dynamic range of the tensor to be quantized. Returns: `tuple[QuantScale, torch.Tensor]`: The scale and the zero point. """ # region step 1: get the dynamic span for range-based scale or the scale tensor if scale is None: range_based = True assert isinstance(tensor, torch.Tensor), "View tensor must be a tensor." dynamic_range = dynamic_range or DynamicRange() dynamic_range = dynamic_range.measure( tensor.view(self.tensor_view_shape), zero_domain=self.tensor_zero_domain, is_float_point=self.tensor_quant_dtype.is_float_point, ) dynamic_range = dynamic_range.intersect(self.tensor_range_bound) dynamic_span = (dynamic_range.max - dynamic_range.min) if self.has_zero_point else dynamic_range.max else: range_based = False scale = scale.view(self.scale_view_shapes[-1]) assert isinstance(scale, torch.Tensor), "Scale must be a tensor." # endregion # region step 2: get the scale if self.linear_scale_quant_dtypes: if range_based: linear_scale = dynamic_span / self.linear_tensor_quant_span elif self.exponent_scale_quant_dtypes: linear_scale = scale.mul(self.exponent_tensor_quant_span).div(self.linear_tensor_quant_span) else: linear_scale = scale lin_s = quantize_scale( linear_scale, quant_dtypes=self.linear_scale_quant_dtypes, quant_spans=self.linear_scale_quant_spans, view_shapes=self.linear_scale_view_shapes, ) assert lin_s.data is not None, "Linear scale tensor is None." assert not lin_s.data.isnan().any(), "Linear scale tensor contains NaN." assert not lin_s.data.isinf().any(), "Linear scale tensor contains Inf." else: lin_s = QuantScale() if self.exponent_scale_quant_dtypes: if range_based: exp_scale = dynamic_span / self.exponent_tensor_quant_span else: exp_scale = scale if lin_s.data is not None: lin_s.data = lin_s.data.expand(self.linear_scale_view_shapes[-1]).reshape(self.scale_view_shapes[-1]) exp_scale = exp_scale / lin_s.data exp_s = quantize_scale( exp_scale, quant_dtypes=self.exponent_scale_quant_dtypes, quant_spans=self.exponent_scale_quant_spans, view_shapes=self.exponent_scale_view_shapes, ) assert exp_s.data is not None, "Exponential scale tensor is None." assert not exp_s.data.isnan().any(), "Exponential scale tensor contains NaN." assert not exp_s.data.isinf().any(), "Exponential scale tensor contains Inf." s = exp_s if lin_s.data is None else lin_s.extend(exp_s) else: s = lin_s assert s.data is not None, "Scale tensor is None." assert not s.data.isnan().any(), "Scale tensor contains NaN." assert not s.data.isinf().any(), "Scale tensor contains Inf." # endregion # region step 3: get the zero point if self.has_zero_point: if range_based: if self.tensor_zero_domain == ZeroPointDomain.PreScale: zero = self.tensor_quant_range.min - dynamic_range.min / s.data else: zero = self.tensor_quant_range.min * s.data - dynamic_range.min assert isinstance(zero, torch.Tensor), "Zero point must be a tensor." z = simple_quantize(zero, has_zero_point=True, quant_dtype=self.zero_quant_dtype) else: z = torch.tensor(0, dtype=s.data.dtype, device=s.data.device) assert not z.isnan().any(), "Zero point tensor contains NaN." assert not z.isinf().any(), "Zero point tensor contains Inf." # endregion return s, z ================================================ FILE: deepcompressor/quantizer/impl/simple.py ================================================ # -*- coding: utf-8 -*- """Simple quantization functions.""" import torch from ...data.dtype import QuantDataType from ...data.range import LogQuantRange, QuantRange from .ste import ste __all__ = ["simple_quantize"] def simple_quantize( tensor: torch.Tensor, *, quant_dtype: torch.dtype | QuantDataType, has_zero_point: bool, quant_range: QuantRange | None = None, round_delta: torch.Tensor | None = None, ) -> torch.Tensor: """Simple quantization function.""" requires_grad = tensor.requires_grad if isinstance(quant_dtype, torch.dtype): dtype = tensor.dtype tensor = tensor.to(dtype=quant_dtype).to(dtype=dtype) if round_delta is not None: tensor = tensor.add_(round_delta) if quant_range is not None and quant_range.is_set(): tensor = torch.clamp(tensor, min=quant_range.min, max=quant_range.max) return tensor elif isinstance(quant_dtype, QuantDataType): if quant_dtype.is_exponent: assert round_delta is None, "round_delta is not supported for exponential quantization" quant_range = LogQuantRange.construct(quant_dtype, quant_range) tensor = ste(tensor.log2(), torch.floor) if requires_grad else tensor.log2_().floor_() return tensor.clamp_(min=quant_range.min, max=quant_range.max).exp2_() elif quant_dtype.is_float_point: assert round_delta is None, "round_delta is not supported for float quantization" tensor = torch.clamp(tensor, min=quant_dtype.min_value, max=quant_dtype.max_value) tensor = ste(tensor, quant_dtype.round) if quant_range is not None and quant_range.is_set(): tensor = tensor.clamp_(min=quant_range.min, max=quant_range.max) return tensor else: quant_range = QuantRange.construct(quant_dtype, has_zero_point=has_zero_point, quant_range=quant_range) if round_delta is None: tensor = ste(tensor, torch.round) if requires_grad else tensor.round_() else: tensor = ste(tensor, torch.floor) if requires_grad else tensor.floor_() tensor = tensor.add_(round_delta) return tensor.clamp_(min=quant_range.min, max=quant_range.max) else: raise TypeError( f"quant_dtype must be either torch.dtype or QuantDataType, got {quant_dtype} ({type(quant_dtype)})" ) ================================================ FILE: deepcompressor/quantizer/impl/ste.py ================================================ # -*- coding: utf-8 -*- """Simple quantization functions.""" import typing as tp import torch __all__ = ["ste"] class STEFunction(torch.autograd.Function): """STEFunction for quantization.""" @staticmethod def forward(ctx: tp.Any, tensor: torch.Tensor, fn: tp.Callable[[torch.Tensor], torch.Tensor]) -> torch.Tensor: """Forward pass for DtypeSTEFunction.""" return fn(tensor) @staticmethod def backward(ctx: tp.Any, grad_output: torch.Tensor) -> tp.Tuple[torch.Tensor, None]: """Backward pass for DtypeSTEFunction.""" return grad_output, None def ste(tensor: torch.Tensor, fn: tp.Callable[[torch.Tensor], torch.Tensor]) -> torch.Tensor: """STE function.""" return STEFunction.apply(tensor, fn) # type: ignore ================================================ FILE: deepcompressor/quantizer/kernel/__init__.py ================================================ # -*- coding: utf-8 -*- from .gptq import QuantGptqConfig, QuantGptqKernel, gptq_quantize from .rtn import QuantRtnKernel, rtn_quantize ================================================ FILE: deepcompressor/quantizer/kernel/gptq.py ================================================ # -*- coding: utf-8 -*- """GPTQ Quantization kernel.""" import gc import math from dataclasses import dataclass import torch from omniconfig import configclass from ...data.cache import TensorCache from ...data.dtype import QuantDataType from ...data.range import QuantRange, RangeBound from ...data.zero import ZeroPointDomain from ...utils import tools from ...utils.common import num2str from ..config.kernel import BaseQuantKernel, BaseQuantKernelConfig from ..impl.simple import simple_quantize __all__ = ["gptq_quantize"] @configclass @dataclass class QuantGptqConfig(BaseQuantKernelConfig): """Configuration for GPTQ quantization. Args: damp_percentage (`float`, *optional*, defaults to `0.01`): The percentage of damping. block_size (`int`, *optional*, defaults to `128`): The block size of the GPTQ quantization. num_inv_tries (`int`, *optional*, defaults to `200`): The number of tries for the inverse. hessian_block_size (`int`, *optional*, defaults to `-1`): The block size when calculing the Hessian. """ damp_percentage: float = 0.01 block_size: int = 128 num_inv_tries: int = 200 hessian_block_size: int = -1 @property def name(self) -> str: return "GPTQ" def build(self) -> "QuantGptqKernel": return QuantGptqKernel(self) def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix for the directory names. Returns: `list[str]`: The directory names. """ name = f"gptq.d{num2str(self.damp_percentage)}.b{num2str(self.block_size)}" return [f"{prefix}.{name}" if prefix else name] class QuantGptqKernel(BaseQuantKernel): def __init__(self, config: "QuantGptqConfig"): self.config = config def quantize( self, tensor: torch.Tensor, *, view_shape: torch.Size, quant_dtype: QuantDataType, zero_domain: ZeroPointDomain | None, scale: torch.Tensor, zero: torch.Tensor, inputs: TensorCache, quant_range: QuantRange | None = None, range_bound: RangeBound | None = None, **kwargs, ) -> torch.Tensor: """Quantize the tensor. Args: tensor (`torch.Tensor`): The tensor to quantize. view_shape (`torch.Size`): The view shape when quantizing the tensor. quant_dtype (`QuantDataType`): The quantization data type. zero_domain (`ZeroPointDomain` or `None`): The zero point domain. scale (`torch.Tensor`): The scale tensor. zero (`torch.Tensor`): The zero point tensor. inputs (`TensorCache`): The input activations. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The range bound. **kwargs: Other keyword arguments. Returns: `torch.Tensor`: The quantized tensor in the shape of ``view_shape``. """ assert not tensor.requires_grad, "tensor must not require gradient." assert not scale.data.requires_grad, "scale must not require gradient." assert not zero.data.requires_grad, "zero must not require gradient." return gptq_quantize( tensor, view_shape=view_shape, quant_dtype=quant_dtype, zero_domain=zero_domain, scale=scale, zero=zero, gptq_config=self.config, inputs=inputs, quant_range=quant_range, range_bound=range_bound, ) @torch.no_grad() def gptq_quantize( # noqa: C901 tensor: torch.Tensor, *, view_shape: torch.Size, quant_dtype: QuantDataType, zero_domain: ZeroPointDomain | None, scale: torch.Tensor, zero: torch.Tensor, gptq_config: QuantGptqConfig, inputs: TensorCache, quant_range: QuantRange | None = None, range_bound: RangeBound | None = None, ) -> torch.Tensor: """Quantize the tensor using the GPTQ quantization kernel. Args: tensor (`torch.Tensor`): The tensor to quantize. view_shape (`torch.Size`): The view shape when quantizing the tensor. quant_dtype (`QuantDataType`): The quantization data type. zero_domain (`ZeroPointDomain` or `None`): The zero point domain. scale (`torch.Tensor`): The scale tensor. zero (`torch.Tensor`): The zero point tensor. inputs (`TensorCache`): The input activations. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The range bound. Returns: `torch.Tensor`: The quantized tensor in the shape of ``view_shape``. """ view_tensor = tensor.view(view_shape) view_shape = view_tensor.shape # remove any -1 in the view_shape # region step 1: reshape the tensor to (#g0 * gs0, #g1 * #g2 * ... * gs1 * gs2, ...) len_view_shape = len(view_shape) # view_tensor: (#g0, gs0, #g1, gs1, #g2, gs2, ...) -> (#g0, gs0, #g1, #g2, ..., gs1, gs2, ...) reshaped_tensor = view_tensor.permute(0, 1, *range(2, len_view_shape, 2), *range(3, len_view_shape, 2)) # reshaped_tensor: (#g0 * gs0, #g1 * #g2 * ... * gs1 * gs2 * ...) reshaped_tensor = reshaped_tensor.reshape(view_shape[0] * view_shape[1], -1) num_row_groups, num_column_groups = view_shape[0], view_shape[2::2].numel() row_group_size, column_group_size = view_shape[1], view_shape[3::2].numel() num_rows, num_columns = reshaped_tensor.shape reshaped_scale = scale.view(num_row_groups, 1, num_column_groups) zero_is_number = isinstance(zero, (int, float)) or zero.numel() == 1 reshaped_zero = zero if zero_is_number else zero.view(num_row_groups, 1, num_column_groups) # endregion # region step 2: get Hessian matrix hessian = torch.zeros((num_columns, num_columns), device=view_tensor.device, dtype=view_tensor.dtype) for x in inputs.data: x: torch.Tensor = inputs.reshape(x.view(-1, *x.shape[inputs.channels_dim :])) if gptq_config.hessian_block_size > 0 and x.shape[0] > gptq_config.hessian_block_size: for b in range(0, x.shape[0], gptq_config.hessian_block_size): _x = x[b : min(b + gptq_config.hessian_block_size, x.shape[0])] _x = math.sqrt(2 / inputs.num_samples) * _x.to(device=view_tensor.device, dtype=view_tensor.dtype) hessian += torch.matmul(_x.t(), _x) else: x = math.sqrt(2 / inputs.num_samples) * x.to(device=view_tensor.device, dtype=view_tensor.dtype) hessian += torch.matmul(x.t(), x) dead = hessian.diagonal() == 0 hessian[dead, dead] = 1 reshaped_tensor[:, dead] = 0 del x, inputs, dead gc.collect() torch.cuda.empty_cache() # endregion # region step 3: permute the Hessian matrix importance = torch.diag(hessian) # (#g1 * #g2 * ... * gs1 * gs2 * ..., ) permute = torch.argsort(importance, descending=True) hessian = hessian[permute][:, permute] reshaped_tensor = reshaped_tensor[:, permute] inverse_permute = torch.argsort(permute) del importance # endregion # region step 4: apply dampening to avoid numerical instability hessian_diag = hessian.diagonal() hessian_diag_mean = hessian_diag.mean() hessian_diag += gptq_config.damp_percentage * hessian_diag_mean # endregion # region step 5: get the inverse of the Hessian matrix stable_inv, num_inv_tries = False, 0 while (not stable_inv) and num_inv_tries < gptq_config.num_inv_tries: num_inv_tries += 1 try: hessian_inv = torch.linalg.cholesky(hessian) hessian_inv = torch.cholesky_inverse(hessian_inv) hessian_inv = torch.linalg.cholesky(hessian_inv, upper=True) except RuntimeError: hessian_diag += (gptq_config.damp_percentage * 0.1) * hessian_diag_mean continue stable_inv = True if num_inv_tries > 1: logger = tools.logging.getLogger(f"{__name__}.GPTQ") logger.debug(" - Hessian is not stable %s %d tries.", "until" if stable_inv else "after", num_inv_tries) assert not hessian_inv.isinf().any(), "Inverse of Hessian matrix contains Inf." assert not hessian_inv.isnan().any(), "Inverse of Hessian matrix contains NaN." del hessian, hessian_diag, hessian_diag_mean, num_inv_tries # endregion # region step 6: quantize the tensor qtensor = torch.zeros_like(reshaped_tensor) for c_start in range(0, num_columns, gptq_config.block_size): c_end = min(c_start + gptq_config.block_size, num_columns) block_tensor = reshaped_tensor[:, c_start:c_end].clone() block_qtensor = qtensor[:, c_start:c_end] block_hessian_inv = hessian_inv[c_start:c_end, c_start:c_end] block_error = torch.zeros_like(block_tensor) for _c in range(c_end - c_start): c = c_start + _c column = block_tensor[:, _c] # (#g0 * gs0, ) pos_diag = block_hessian_inv[_c, _c] column_group_index = permute[c] // column_group_size column_scale = reshaped_scale[:, :, column_group_index] # (#g0, 1) column_zero = reshaped_zero if zero_is_number else reshaped_zero[:, :, column_group_index] qcolumn = column.view(num_row_groups, row_group_size).clone() # (#g0, gs0) if range_bound is not None and range_bound.is_set(): qcolumn = qcolumn.clamp_(min=range_bound.min, max=range_bound.max) if zero_domain == ZeroPointDomain.PostScale: qcolumn = qcolumn.add_(column_zero) qcolumn = qcolumn.div_(column_scale) if zero_domain == ZeroPointDomain.PreScale: qcolumn = qcolumn.add_(column_zero) qcolumn = simple_quantize( qcolumn, quant_dtype=quant_dtype, has_zero_point=zero_domain is not None, quant_range=quant_range ) block_qtensor[:, _c] = qcolumn.view(-1) # ! copy the quantized column if zero_domain == ZeroPointDomain.PreScale: qcolumn = qcolumn.sub_(column_zero) qcolumn = qcolumn.mul_(column_scale) if zero_domain == ZeroPointDomain.PostScale: qcolumn = qcolumn.sub_(column_zero) column_error = column.sub_(qcolumn.view(column.shape)).div_(pos_diag) block_error[:, _c] = column_error.view(-1) block_tensor[:, _c:] -= column_error.view(-1, 1).matmul(block_hessian_inv[_c, _c:].view(1, -1)) reshaped_tensor[:, c_end:] -= block_error.matmul(hessian_inv[c_start:c_end, c_end:]) qtensor = qtensor[:, inverse_permute] # endregion # region step 7: reshape the tensor back to (#g0, gs0, #g1, gs1, #g2, gs2, ...) _view_shape = view_shape[:2] + view_shape[2::2] + view_shape[3::2] # qtensor: (#g0 * gs0, #g1 * #g2 * ... * gs1 * gs2, ...) -> (#g0, gs0, #g1, #g2, ..., gs1, gs2, ...) qtensor = qtensor.reshape(_view_shape) # qtensor: (#g0, gs0, #g1, #g2, ..., gs1, gs2, ...) -> (#g0, gs0, #g1, gs1, #g2, gs2, ...) permute_dims = [0, 1] for i in range(1, len_view_shape // 2): permute_dims.append(1 + i) permute_dims.append(len_view_shape // 2 + i) qtensor = qtensor.permute(*permute_dims).reshape(view_shape).contiguous() # endregion assert not qtensor.isnan().any(), "GPTQ Quantized tensor contains NaN." assert not qtensor.isinf().any(), "GPTQ Quantized tensor contains Inf." return qtensor ================================================ FILE: deepcompressor/quantizer/kernel/rtn.py ================================================ # -*- coding: utf-8 -*- """Round-to-nearest (RTN) quantization module.""" import torch from ...data.dtype import QuantDataType from ...data.range import QuantRange from ...data.zero import ZeroPointDomain from ..config.kernel import BaseQuantKernel from ..impl.simple import simple_quantize __all__ = ["QuantRtnKernel", "rtn_quantize"] class QuantRtnKernel(BaseQuantKernel): """Round-to-nearest (RTN) Quantization kernel.""" def quantize( self, tensor: torch.Tensor, *, view_shape: torch.Size, quant_dtype: QuantDataType, zero_domain: ZeroPointDomain | None, scale: torch.Tensor, zero: torch.Tensor, quant_range: QuantRange | None = None, round_delta: torch.Tensor | None = None, **kwargs, ) -> torch.Tensor: """Quantize the tensor. Args: tensor (`torch.Tensor`): The tensor to quantize. view_shape (`torch.Size`): The view shape when quantizing the tensor. quant_dtype (`QuantDataType`): The quantization data type. zero_domain (`ZeroPointDomain` or `None`): The zero point domain. scale (`torch.Tensor`): The scale tensor. zero (`torch.Tensor`): The zero point tensor. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. round_delta (`torch.Tensor` or `None`, *optional*, defaults to `None`): The rounding delta. **kwargs: Other keyword arguments. Returns: `torch.Tensor`: The quantized tensor in the shape of ``view_shape``. """ return rtn_quantize( tensor, view_shape=view_shape, quant_dtype=quant_dtype, zero_domain=zero_domain, scale=scale, zero=zero, quant_range=quant_range, round_delta=round_delta, ) def rtn_quantize( tensor: torch.Tensor, *, view_shape: torch.Size, quant_dtype: QuantDataType, zero_domain: ZeroPointDomain | None, scale: torch.Tensor, zero: torch.Tensor, quant_range: QuantRange | None = None, round_delta: torch.Tensor | None = None, ) -> torch.Tensor: """Quantize the tensor using the RTN quantization kernel. Args: tensor (`torch.Tensor`): The tensor to quantize. view_shape (`torch.Size`): The view shape when quantizing the tensor. quant_dtype (`QuantDataType`): The quantization data type. zero_domain (`ZeroPointDomain` or `None`): The zero point domain. scale (`torch.Tensor`): The scale tensor. zero (`torch.Tensor`): The zero point tensor. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. round_delta (`torch.Tensor` or `None`, *optional*, defaults to `None`): The rounding delta. Returns: `torch.Tensor`: The quantized tensor in the shape of ``view_shape``. """ qtensor = tensor.view(view_shape) round_delta = round_delta.view(view_shape) if round_delta is not None else None if zero_domain == ZeroPointDomain.PostScale: qtensor = qtensor.add_(zero) qtensor = qtensor.div(scale) if zero_domain == ZeroPointDomain.PreScale: qtensor = qtensor.add_(zero) qtensor = simple_quantize( qtensor, quant_dtype=quant_dtype, has_zero_point=zero_domain is not None, quant_range=quant_range, round_delta=round_delta, ) return qtensor ================================================ FILE: deepcompressor/quantizer/processor.py ================================================ # -*- coding: utf-8 -*- """Quantizer.""" import typing as tp from dataclasses import _MISSING_TYPE, MISSING, dataclass import torch from ..data.range import DynamicRange, QuantRange, RangeBound from ..data.tensor import QuantTensor from ..nn.patch.lowrank import LowRankBranch from ..utils.common import tree_map from ..utils.config import KeyEnableConfig from ..utils.hooks import BaseInputPackager, BaseOutputPackager, BaseTensorProcessor from .config.kernel import BaseKeyEnableQuantKernelConfig, BaseQuantKernel, BaseQuantKernelConfig from .config.lowrank import QuantLowRankConfig from .impl.base import QuantizerImpl from .impl.info import QuantInfo __all__ = ["Quantizer"] @dataclass class Quantizer(QuantizerImpl, BaseTensorProcessor): """Quantizer class. Args: config (`BasicQuantizerConfig` or `None`): The quantizer configuration. key (`str`, *optional*, defaults to `""`): The key of the quantizer. kernel (`BaseKeyEnableQuantKernelConfig` or `BaseQuantKernelConfig` or `BaseQuantKernel` or `None`, *optional*, defaults to `None`): The quantizer kernel configuration. channels_dim (`int` or `None`, *optional*, defaults to `None`): The dimension of channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None`, *optional*, defaults to `None`): The scale tensor. zero (`torch.Tensor` or `None`, *optional*, defaults to `None`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None`, *optional*, defaults to `None`): The dynamic range. range_bound (`RangeBound` or `None`, *optional*, defaults to `None`): The dynamic range bound. quant_range (`QuantRange` or `None`, *optional*, defaults to `None`): The quantization range. default_dtype (`torch.dtype` or `None`, *optional*, defaults to `None`): The default scale dtype develop_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The quantization development dtype. low_rank (`QuantLowRankConfig` or `None`, *optional*, defaults to `None`): The quantization low-rank branch configuration. input_packager (`BaseInputPackager` or `None`, *optional*, defaults to `None`): The input packager, used for unpacking and repacking the input tensor(s). output_packager (`BaseOutputPackager` or `None`, *optional*, defaults to `None`): The output packager, used for unpacking and repacking the output tensor(s). """ # region keyword arguments' defaults kernel: BaseKeyEnableQuantKernelConfig | BaseQuantKernelConfig | BaseQuantKernel | None = None channels_dim: int | None = None scale: torch.Tensor | tp.Sequence[torch.Tensor] | None = None zero: torch.Tensor | None = None dynamic_range: DynamicRange | tp.Sequence[DynamicRange] | None = None range_bound: RangeBound | None = None quant_range: QuantRange | None = None default_dtype: torch.dtype | None = None develop_dtype: torch.dtype = torch.float32 # endregion # region hook-related attributes low_rank: QuantLowRankConfig | None = None input_packager: BaseInputPackager | None = None output_packager: BaseOutputPackager | None = None # endregion def is_enabled_low_rank(self) -> bool: if self.low_rank is None: return False if isinstance(self.low_rank, KeyEnableConfig): return self.low_rank.is_enabled_for(self.key) return self.low_rank.is_enabled() def get_input_packager(self) -> BaseInputPackager | None: return self.input_packager def get_output_packager(self) -> BaseOutputPackager | None: return self.output_packager def process(self, tensor: torch.Tensor) -> torch.Tensor: return self.quantize(tensor).data def quantize( self, tensor: torch.Tensor, /, *, return_with_dequant: bool = True, return_with_quant: bool = False, kernel: ( BaseKeyEnableQuantKernelConfig | BaseQuantKernelConfig | BaseQuantKernel | None | _MISSING_TYPE ) = MISSING, channels_dim: int | None | _MISSING_TYPE = MISSING, # scale-based quantization arguments scale: torch.Tensor | tp.Sequence[torch.Tensor] | None | _MISSING_TYPE = MISSING, zero: torch.Tensor | None | _MISSING_TYPE = MISSING, # range-based quantization arguments dynamic_range: DynamicRange | tp.Sequence[DynamicRange] | None | _MISSING_TYPE = MISSING, range_bound: RangeBound | None | _MISSING_TYPE = MISSING, # other arguments quant_range: QuantRange | None | _MISSING_TYPE = MISSING, default_dtype: torch.dtype | None | _MISSING_TYPE = MISSING, develop_dtype: torch.dtype | _MISSING_TYPE = MISSING, **kwargs, ) -> QuantTensor: """Quantize a tensor. Args: tensor (`torch.Tensor`): The tensor to quantize. return_with_dequant (`bool`, *optional*, defaults to `True`): Whether to return the dequantized tensor. return_with_quant (`bool`, *optional*, defaults to `False`): Whether to return the quantized tensor. kernel (`BaseKeyEnableQuantKernelConfig` or `BaseQuantKernelConfig` or `BaseQuantKernel` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The quantization kernel configuration. channels_dim (`int` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The dimension of channels. scale (`torch.Tensor` or `Sequence[torch.Tensor]` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The scale tensor. zero (`torch.Tensor` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The zero point tensor. dynamic_range (`DynamicRange` or `Sequence[DynamicRange]` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The dynamic range. range_bound (`RangeBound` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The dynamic range bound. quant_range (`QuantRange` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The quantization range. default_dtype (`torch.dtype` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The default scale dtype. develop_dtype (`torch.dtype` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The quantization development dtype. **kwargs: Other keyword arguments for the quantization kernel. For example, ``inputs`` for the input tensors in GPTQ kernel, ``round_delta`` for the rounding delta in the RTN kernel. Returns: QuantTensor: The quantized tensor. """ channels_dim = self.channels_dim if channels_dim is MISSING else channels_dim scale = self.scale if scale is MISSING else scale zero = self.zero if zero is MISSING else zero dynamic_range = self.dynamic_range if dynamic_range is MISSING else dynamic_range range_bound = self.range_bound if range_bound is MISSING else range_bound quant_range = self.quant_range if quant_range is MISSING else quant_range default_dtype = self.default_dtype if default_dtype is MISSING else default_dtype develop_dtype = self.develop_dtype if develop_dtype is MISSING else develop_dtype if kernel is MISSING: kernel = self.kernel if isinstance(kernel, BaseKeyEnableQuantKernelConfig): kernel = kernel.specialize_for(self.key) elif isinstance(kernel, KeyEnableConfig): kernel = kernel if kernel.is_enabled_for(self.key) else None assert isinstance(kernel, (BaseQuantKernel, BaseQuantKernelConfig, type(None))) return super().quantize( tensor, kernel=kernel, channels_dim=channels_dim, scale=scale, zero=zero, dynamic_range=dynamic_range, range_bound=range_bound, quant_range=quant_range, return_with_dequant=return_with_dequant, return_with_quant=return_with_quant, default_dtype=default_dtype, develop_dtype=develop_dtype, **kwargs, ) def update( self, tensor_shape: torch.Size, default_dtype: torch.dtype | _MISSING_TYPE = MISSING, quant_range: QuantRange | None | _MISSING_TYPE = MISSING, range_bound: RangeBound | None | _MISSING_TYPE = MISSING, ) -> QuantInfo | None: """Update the quantization information. Args: tensor_shape (`torch.Size`): The shape of the tensor. default_dtype (`torch.dtype` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The default scale dtype. quant_range (`QuantRange` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The quantization range. range_bound (`RangeBound` or `None` or `_MISSING_TYPE`, *optional*, defaults to `MISSING`): The dynamic range bound Returns: `QuantInfo` or `None`: The updated quantization. If the quantizer is disabled, return `None`. """ return super().update( tensor_shape, default_dtype=self.default_dtype if default_dtype is MISSING else default_dtype, quant_range=self.quant_range if quant_range is MISSING else quant_range, range_bound=self.range_bound if range_bound is MISSING else range_bound, ) def quantize_with_low_rank( self, tensors: torch.Tensor | tp.Sequence[torch.Tensor], /, *, return_with_dequant: bool = True, return_with_quant: bool = False, kernel: ( BaseKeyEnableQuantKernelConfig | BaseQuantKernelConfig | BaseQuantKernel | None | _MISSING_TYPE ) = MISSING, channels_dim: int | None | _MISSING_TYPE = MISSING, # scale-based quantization arguments scale: torch.Tensor | tp.Sequence[torch.Tensor] | None | _MISSING_TYPE = MISSING, zero: torch.Tensor | None | _MISSING_TYPE = MISSING, # range-based quantization arguments dynamic_range: DynamicRange | tp.Sequence[DynamicRange] | None | _MISSING_TYPE = MISSING, range_bound: RangeBound | None | _MISSING_TYPE = MISSING, # other arguments quant_range: QuantRange | None | _MISSING_TYPE = MISSING, default_dtype: torch.dtype | None | _MISSING_TYPE = MISSING, develop_dtype: torch.dtype | _MISSING_TYPE = MISSING, **kwargs, ) -> tuple[list[QuantTensor], list[LowRankBranch] | None]: if isinstance(tensors, torch.Tensor): tensors = [tensors] qkwargs = dict( return_with_dequant=return_with_dequant, return_with_quant=return_with_quant, kernel=kernel, channels_dim=channels_dim, scale=scale, zero=zero, dynamic_range=dynamic_range, range_bound=range_bound, quant_range=quant_range, default_dtype=default_dtype, develop_dtype=develop_dtype, **kwargs, ) if self.is_enabled_low_rank(): qtensors: list[QuantTensor] = [] branches: list[LowRankBranch] = [] if len(tensors) == 1 or self.low_rank.exclusive: if self.low_rank.compensate: qkwargs["return_with_dequant"] = True for t in tensors: qt = self.quantize(t.data, **qkwargs) lb = LowRankBranch(t.shape[1], t.shape[0], rank=self.low_rank.rank, weight=t.data - qt.data) qtensors.append(qt) branches.append(lb) else: for t in tensors: lb = LowRankBranch(t.shape[1], t.shape[0], rank=self.low_rank.rank, weight=t.data) qt = self.quantize(t.data - lb.get_effective_weight().view(t.data.shape), **qkwargs) qtensors.append(qt) branches.append(lb) return qtensors, branches else: st = torch.cat([t.data for t in tensors], dim=0) if self.low_rank.compensate: qkwargs["return_with_dequant"] = True for t in tensors: qt = self.quantize(t.data, **qkwargs) qtensors.append(qt) sl = LowRankBranch( st.shape[1], st.shape[0], rank=self.low_rank.rank, weight=st - torch.cat([q.data for q in qtensors], dim=0), ) del st i = 0 for t in tensors: lb = LowRankBranch(t.shape[1], t.shape[0], rank=self.low_rank.rank) lb.a = sl.a lb.b.to(dtype=t.dtype, device=t.device) lb.b.weight.copy_(sl.b.weight[i : i + t.shape[0]]) branches.append(lb) i += t.shape[0] return qtensors, branches else: sl = LowRankBranch(st.shape[1], st.shape[0], rank=self.low_rank.rank, weight=st) del st i = 0 for t in tensors: lb = LowRankBranch(t.shape[1], t.shape[0], rank=self.low_rank.rank) lb.a = sl.a lb.b.to(dtype=t.dtype, device=t.device) lb.b.weight.copy_(sl.b.weight[i : i + t.shape[0]]) qt = self.quantize(t.data - lb.get_effective_weight(), **qkwargs) qtensors.append(qt) branches.append(lb) i += t.shape[0] return qtensors, branches else: return [self.quantize(t.data, **qkwargs) for t in tensors], None def state_dict(self, device: torch.device | str = "cpu") -> dict[str, tp.Any]: """Get the state dictionary of the quantizer. Args: device (`torch.device` or `str`, *optional*, defaults to `"cpu"`): The device to store the state dictionary. Returns: `dict[str, Any]`: The state dictionary. """ state_dict = {} def _copy_to(x): return x.to(device).clone() state_dict["channels_dim"] = self.channels_dim state_dict["scale"] = tree_map(_copy_to, self.scale) state_dict["zero"] = _copy_to(self.zero) if self.zero is not None else None if self.dynamic_range is None: state_dict["dynamic_range"] = None elif isinstance(self.dynamic_range, DynamicRange): state_dict["dynamic_range"] = tree_map(_copy_to, self.dynamic_range.to_dict()) else: state_dict["dynamic_range"] = tree_map(_copy_to, tuple(d.to_dict() for d in self.dynamic_range)) state_dict["range_bound"] = self.range_bound.to_dict() if self.range_bound is not None else None state_dict["quant_range"] = self.quant_range.to_dict() if self.quant_range is not None else None return state_dict def load_state_dict(self, state_dict: dict[str, tp.Any], device: torch.device | str = "cpu"): """Load the state dictionary. Args: state_dict (`dict[str, Any]`): The state dictionary. device (`torch.device` or `str`, *optional*, defaults to `"cpu"`): The device to load the state dictionary. """ def _move_to(x): return x.to(device) self.channels_dim = state_dict["channels_dim"] self.scale = tree_map(_move_to, state_dict["scale"]) self.zero = _move_to(state_dict["zero"]) if state_dict["zero"] is not None else None if state_dict["dynamic_range"] is None: self.dynamic_range = None elif isinstance(state_dict["dynamic_range"], dict): self.dynamic_range = DynamicRange.from_dict(tree_map(_move_to, state_dict["dynamic_range"])) else: self.dynamic_range = tuple( DynamicRange.from_dict(tree_map(_move_to, d)) for d in state_dict["dynamic_range"] ) self.range_bound = RangeBound.from_dict(state_dict["range_bound"]) self.quant_range = QuantRange.from_dict(state_dict["quant_range"]) ================================================ FILE: deepcompressor/utils/__init__.py ================================================ # -*- coding: utf-8 -*- from .common import * from .patch import * ================================================ FILE: deepcompressor/utils/common.py ================================================ # -*- coding: utf-8 -*- """Common utilities.""" import typing as tp import numpy as np import torch __all__ = [ "join_name", "join_names", "num2str", "split_sequence", "tree_map", "tree_copy_with_ref", "tree_split", "tree_collate", "hash_str_to_int", ] def join_name(prefix: str, name: str, sep: str = ".", relative: bool = True) -> str: """Join a prefix and a name with a separator. Args: prefix (`str`): Prefix. name (`str`): Name. sep (`str`, *optional*, defaults to `.`): Separator. relative (`bool`, *optional*, defaults to `True`): Whether to resolve relative name. Returns: `str`: Joined name. """ if prefix: assert not prefix.endswith(sep), f"prefix={prefix} ends with sep={sep}" if name: if name.startswith(sep) and relative: # Remove leading separator prefix_names = prefix.split(sep) unsep_name = name.lstrip(sep) num_leading_seps = len(name) - len(unsep_name) if num_leading_seps > len(prefix_names): prefix = sep * (num_leading_seps - len(prefix_names) - 1) else: prefix = sep.join(prefix_names[:-num_leading_seps]) return f"{prefix}{sep}{unsep_name}" else: return f"{prefix}{sep}{name}" else: return prefix else: return name def join_names(*names: str, sep: str = ".", relative: bool = True) -> str: """Join multiple names with a separator. Args: names (`str`): Names. sep (`str`, *optional*, defaults to `.`): Separator. relative (`bool`, *optional*, defaults to `True`): Whether to resolve relative name. Returns: `str`: Joined name. """ if not names: return "" prefix = "" for name in names: prefix = join_name(prefix, name, sep=sep, relative=relative) return prefix def num2str(num: int | float) -> str: """Convert a number to a string. Args: num (`int` or `float`): Number to convert. Returns: str: Converted string. """ s = str(num).replace("-", "n") us = s.split(".") if len(us) == 1 or int(us[1]) == 0: return us[0] else: return us[0] + "p" + us[1] def split_sequence(lst: tp.Sequence[tp.Any], splits: tp.Sequence[int]) -> list[list[tp.Any]]: """Split a sequence into multiple sequences. Args: lst (`Sequence`): Sequence to split. splits (`Sequence`): Split indices. Returns: `list[list]`: Splitted sequences. """ ret = [] start = 0 for end in splits: ret.append(lst[start:end]) start = end ret.append(lst[start:]) return ret def tree_map(func: tp.Callable[[tp.Any], tp.Any], tree: tp.Any) -> tp.Any: """Apply a function to tree-structured data.""" if isinstance(tree, dict): return {k: tree_map(func, v) for k, v in tree.items()} elif isinstance(tree, (list, tuple)): return type(tree)(tree_map(func, v) for v in tree) elif isinstance(tree, (torch.Tensor, np.ndarray)): return func(tree) else: return tree def tree_copy_with_ref( tree: tp.Any, /, ref: tp.Any, copy_func: tp.Callable[[tp.Any, tp.Any], tp.Any] | None = None ) -> tp.Any: """Copy tree-structured data with reference.""" if isinstance(tree, dict): return {k: tree_copy_with_ref(v, ref[k]) for k, v in tree.items()} elif isinstance(tree, (list, tuple)): return type(tree)(tree_copy_with_ref(v, ref[i]) for i, v in enumerate(tree)) elif isinstance(tree, torch.Tensor): assert isinstance(ref, torch.Tensor), f"source is a tensor but reference is not: {type(ref)}" assert tree.shape == ref.shape, f"source.shape={tree.shape} != reference.shape={ref.shape}" if tree.data_ptr() == ref.data_ptr() or tree.allclose(ref): return ref else: return tree elif copy_func is not None: return copy_func(tree, ref) else: return tree def tree_split(tree: tp.Any) -> list[tp.Any]: """Split tree-structured data into a list of data samples.""" def get_batch_size(tree: tp.Any) -> int | None: if isinstance(tree, dict): for v in tree.values(): b = get_batch_size(v) if b is not None: return b elif isinstance(tree, (list, tuple)): for samples in tree: b = get_batch_size(samples) if b is not None: return b elif isinstance(tree, torch.Tensor) and tree.ndim > 0: return tree.shape[0] return None def get_batch(tree: tp.Any, batch_id: int, batch_size: int) -> tp.Any: if isinstance(tree, dict): return {k: get_batch(v, batch_id, batch_size=batch_size) for k, v in tree.items()} elif isinstance(tree, (list, tuple)): return [get_batch(samples, batch_id, batch_size=batch_size) for samples in tree] elif isinstance(tree, torch.Tensor) and tree.ndim > 0: if tree.shape[0] == batch_size: return tree[batch_id : batch_id + 1] else: return tree else: return tree ret = [] batch_size = get_batch_size(tree) assert batch_size is not None, "Cannot determine batch size" for i in range(batch_size): ret.append(get_batch(tree, i, batch_size=batch_size)) return ret def tree_collate(batch: list[tp.Any] | tuple[tp.Any, ...]) -> tp.Any: """Collate function for tree-structured data.""" if isinstance(batch[0], dict): return {k: tree_collate([d[k] for d in batch]) for k in batch[0]} elif isinstance(batch[0], (list, tuple)): return [tree_collate(samples) for samples in zip(*batch, strict=True)] elif isinstance(batch[0], torch.Tensor): # if all tensors in batch are exactly the same, return the tensor itself if all(torch.equal(batch[0], b) for b in batch): return batch[0] else: return torch.cat(batch) else: return batch[0] def hash_str_to_int(s: str) -> int: """Hash a string to an integer.""" modulus = 10**9 + 7 # Large prime modulus hash_int = 0 for char in s: hash_int = (hash_int * 31 + ord(char)) % modulus return hash_int ================================================ FILE: deepcompressor/utils/config/__init__.py ================================================ from .base import EnableConfig, IncludeBasedConfig, KeyEnableConfig, SkipBasedConfig ================================================ FILE: deepcompressor/utils/config/base.py ================================================ # -*- coding: utf-8 -*- import typing as tp from abc import ABC, abstractmethod from dataclasses import dataclass, field import omniconfig from omniconfig import configclass __all__ = ["EnableConfig", "KeyEnableConfig", "SkipBasedConfig", "IncludeBasedConfig"] class EnableConfig(ABC): @abstractmethod def is_enabled(self) -> bool: """Whether the configuration is enabled.""" return True class KeyEnableConfig(ABC): @abstractmethod def is_enabled_for(self, key: str) -> bool: """Whether the configuration is enabled for the given key.""" return True @configclass @dataclass class SkipBasedConfig(KeyEnableConfig, EnableConfig): """Skip-based configration. Args: skips (`list[str]`, *optional*, defaults to `[]`): The keys of the modules to skip. """ skips: list[str] = field(default_factory=list) def __post_init__(self) -> None: __post_init__ = getattr(super(), "__post_init__", None) if __post_init__: __post_init__() self.skips = sorted(set(self.skips or [])) def is_enabled(self) -> bool: """Whether the configuration is enabled.""" return super().is_enabled() def is_enabled_for(self, key: str) -> bool: """Whether the configuration is enabled for the given key. Args: key (`str`): The key. Returns: `bool`: Whether the configuration is enabled for the given key. """ return self.is_enabled() and key not in self.skips def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix of the directory names. Returns: `list[str]`: The directory names of the configuration. """ names = [*super().generate_dirnames(**kwargs), "skip.[{}]".format("+".join(self.skips))] # type: ignore if prefix: names = [f"{prefix}.{name}" for name in names] return names @classmethod def update_get_arguments( cls: type["SkipBasedConfig"], *, overwrites: dict[str, tp.Callable[[omniconfig.Arguments], None] | None] | None = None, defaults: dict[str, tp.Any] | None = None, ) -> tuple[dict[str, tp.Callable[[omniconfig.Arguments], None] | None], dict[str, tp.Any]]: """Get the arguments for the quantization configuration.""" update_get_arguments = getattr(super(), "update_get_arguments", None) if update_get_arguments: overwrites, defaults = update_get_arguments(overwrites=overwrites, defaults=defaults) overwrites = overwrites or {} defaults = defaults or {} collect_fn = omniconfig.ADD_PREFIX_BOOL_FIELDS("skip", **defaults) def add_skips_argument(parser): collect_fn(parser) parser.add_argument("--skips", nargs="+", default=[], help="The keys of the modules to skip.") overwrites.setdefault("skips", add_skips_argument) return overwrites, defaults @classmethod def update_from_dict( cls: type["SkipBasedConfig"], *, parsed_args: dict[str, tp.Any], overwrites: dict[str, tp.Any] ) -> tuple[dict[str, tp.Any], dict[str, tp.Any]]: """Update the arguments settings for the quantization configuration.""" update_from_dict = getattr(super(), "update_from_dict", None) if update_from_dict: parsed_args, overwrites = update_from_dict(parsed_args=parsed_args, overwrites=overwrites) parsed_args.setdefault("skips", []).extend(omniconfig.COLLECT_PREFIX_BOOL_FIELDS(parsed_args, "skip")) return parsed_args, overwrites @configclass @dataclass class IncludeBasedConfig(KeyEnableConfig, EnableConfig): """Include-based configuration. Args: includes (`list[str]`, *optional*, defaults to `[]`): The keys of the modules to include. """ includes: list[str] = field(default_factory=list) def __post_init__(self) -> None: __post_init__ = getattr(super(), "__post_init__", None) if __post_init__: __post_init__() self.includes = sorted(set(self.includes or [])) def is_enabled(self) -> bool: """Whether the kernel is enabled.""" return super().is_enabled() and bool(self.includes) def is_enabled_for(self, key: str) -> bool: """Whether the config is enabled for the module key. Args: key (`str`): The key. Returns: `bool`: Whether the config is needed. """ return self.is_enabled() and key in self.includes def generate_dirnames(self, *, prefix: str = "", **kwargs) -> list[str]: """Generate the directory names of the configuration. Args: prefix (`str`, *optional*, defaults to `""`): The prefix of the directory names. Returns: `list[str]`: The directory names. The last directory name is the modules to include. """ names = [] if self.includes: names = super().generate_dirnames(**kwargs) # type: ignore names.append("include.[{}]".format("+".join(self.includes))) if prefix: names = [f"{prefix}.{name}" for name in names] return names @classmethod def update_get_arguments( cls: type["IncludeBasedConfig"], *, overwrites: dict[str, tp.Callable[[omniconfig.Arguments], None] | None] | None = None, defaults: dict[str, tp.Any] | None = None, ) -> tuple[dict[str, tp.Any], dict[str, tp.Any]]: """Update the arguments settings for the quantization configuration.""" update_get_arguments = getattr(super(), "update_get_arguments", None) if update_get_arguments: overwrites, defaults = update_get_arguments(overwrites=overwrites, defaults=defaults) overwrites = overwrites or {} defaults = defaults or {} collect_fn = omniconfig.ADD_PREFIX_BOOL_FIELDS("include", **defaults) def add_includes_argument(parser): collect_fn(parser) parser.add_argument("--includes", nargs="+", default=[], help="The keys of the modules to include.") overwrites.setdefault("includes", add_includes_argument) return overwrites, defaults @classmethod def update_from_dict( cls: type["IncludeBasedConfig"], *, parsed_args: dict[str, tp.Any], overwrites: dict[str, tp.Any] ) -> tuple[dict[str, tp.Any], dict[str, tp.Any]]: """Update the arguments settings for the quantization configuration.""" update_from_dict = getattr(super(), "update_from_dict", None) if update_from_dict: parsed_args, overwrites = update_from_dict(parsed_args=parsed_args, overwrites=overwrites) parsed_args.setdefault("includes", []).extend(omniconfig.COLLECT_PREFIX_BOOL_FIELDS(parsed_args, "include")) return parsed_args, overwrites ================================================ FILE: deepcompressor/utils/config/model.py ================================================ # -*- coding: utf-8 -*- """Net configurations.""" import os import typing as tp from abc import ABC, abstractmethod from dataclasses import dataclass from omniconfig import configclass __all__ = ["BaseModelConfig"] @configclass @dataclass class BaseModelConfig(ABC): """Base class for all model configs. Args: name (`str`): Name of the model. family (`str`, *optional*, defaults to `""`): Family of the model. If not specified, it will be inferred from the name. path (`str`, *optional*, defaults to `""`): Path of the model. root (`str`, *optional*, defaults to `""`): Root directory path for models. local_path (`str`, *optional*, defaults to `""`): Local path of the model. local_root (`str`, *optional*, defaults to `""`): Local root directory path for models. """ name: str family: str = "" path: str = "" root: str = "" local_path: str = "" local_root: str = "" def __post_init__(self): if not self.family: self.family = self.name.split("-")[0] self.local_root = os.path.expanduser(self.local_root) if not self.local_path: self.local_path = os.path.join(self.local_root, self.family, self.name) if not self.path: self.path = os.path.join(self.root, self.family, self.name) if os.path.exists(self.local_path): self.path = self.local_path @abstractmethod def build(self, *args, **kwargs) -> tp.Any: """Build model from config.""" ... ================================================ FILE: deepcompressor/utils/config/output.py ================================================ # -*- coding: utf-8 -*- """Output configuration.""" import os from dataclasses import dataclass, field from datetime import datetime as DateTime from omniconfig import configclass __all__ = ["OutputConfig"] @configclass @dataclass class OutputConfig: """Output configuration. Args: root (`str`, *optional*, defaults to `"runs"`): The output root directory. dirname (`str`, *optional*, defaults to `"default"`): The output directory name. job (`str`, *optional*, defaults to `"run"`): The job name. Attributes: dirpath (`str`): The output directory path. timestamp (`str`): The timestamp. """ root: str = "runs" dirname: str = "default" job: str = "run" dirpath: str = field(init=False) timestamp: str = field(init=False) def __post_init__(self): self.timestamp = self.generate_timestamp() self.dirpath = os.path.join(self.root, self.dirname) @property def running_dirpath(self) -> str: """Get the running directory path.""" return f"{self.dirpath}.RUNNING" @property def error_dirpath(self) -> str: """Get the error directory path.""" return f"{self.dirpath}.ERROR" @property def job_dirname(self) -> str: """Get the job directory name.""" return f"{self.job}-{self.timestamp}" @property def job_dirpath(self) -> str: """Get the job directory path.""" return os.path.join(self.dirpath, self.job_dirname) @property def running_job_dirname(self) -> str: """Get the running job directory name.""" return f"{self.job_dirname}.RUNNING" @property def error_job_dirname(self) -> str: """Get the error job directory name.""" return f"{self.job_dirname}.ERROR" @property def running_job_dirpath(self) -> str: """Get the running job directory path.""" return os.path.join(self.running_dirpath, self.running_job_dirname) def lock(self) -> None: """Lock the running (job) directory.""" try: if os.path.exists(self.dirpath): os.rename(self.dirpath, self.running_dirpath) elif os.path.exists(self.error_dirpath): os.rename(self.error_dirpath, self.running_dirpath) except Exception: pass os.makedirs(self.running_job_dirpath, exist_ok=True) def unlock(self, error: bool = False) -> None: """Unlock the running (job) directory.""" job_dirpath = os.path.join(self.running_dirpath, self.error_job_dirname if error else self.job_dirname) os.rename(self.running_job_dirpath, job_dirpath) if not self.is_locked_by_others(): os.rename(self.running_dirpath, self.error_dirpath if error else self.dirpath) def is_locked_by_others(self) -> bool: """Check if the running directory is locked by others.""" running_job_dirname = self.running_job_dirname for dirname in os.listdir(self.running_dirpath): if dirname.endswith(".RUNNING") and dirname != running_job_dirname: return True return False def get_running_path(self, filename: str) -> str: """Get the file path in the running directory.""" name, ext = os.path.splitext(filename) return os.path.join(self.running_dirpath, f"{name}-{self.timestamp}{ext}") def get_running_job_path(self, filename: str) -> str: """Get the file path in the running job directory.""" name, ext = os.path.splitext(filename) return os.path.join(self.running_job_dirpath, f"{name}-{self.timestamp}{ext}") @staticmethod def generate_timestamp() -> str: """Generate a timestamp.""" return DateTime.now().strftime("%y%m%d.%H%M%S") ================================================ FILE: deepcompressor/utils/config/path.py ================================================ # -*- coding: utf-8 -*- """Path configuration.""" import os import typing as tp from ..dataclass import get_fields __all__ = ["BasePathConfig"] class BasePathConfig: """Base path configuration.""" def is_all_set(self) -> bool: """Check if the path configuration is all set. Returns: `bool`: Whether the path configuration is all set. """ fields = get_fields(self) for f in fields: if not getattr(self, f.name): return False return True def is_all_empty(self) -> bool: """Check if the path configuration is all empty. Returns: `bool`: Whether the path configuration is all empty. """ fields = get_fields(self) for f in fields: if getattr(self, f.name): return False return True def clone(self) -> tp.Self: """Clone the path configuration. Returns: `Self`: The cloned path configuration. """ fields = get_fields(self) return self.__class__(**{f.name: getattr(self, f.name) for f in fields}) def add_parent_dirs(self, *parent_dirs: str) -> tp.Self: """Add the parent directories to the paths. Args: parent_dirs (`str`): The parent directories. """ fields = get_fields(self) for f in fields: path = getattr(self, f.name) if path: setattr(self, f.name, os.path.join(*parent_dirs, path)) return self def add_children(self, *children: str) -> tp.Self: """Add the children to the paths. Args: children (`str`): The children paths. """ fields = get_fields(self) for f in fields: path = getattr(self, f.name) if path: setattr(self, f.name, os.path.join(path, *children)) return self def to_dirpath(self) -> tp.Self: """Convert the paths to directory paths.""" fields = get_fields(self) for f in fields: path = getattr(self, f.name) if path: setattr(self, f.name, os.path.dirname(path)) return self def apply(self, fn: tp.Callable) -> tp.Self: """Apply the function to the paths. Args: fn (`Callable`): The function to apply. """ fields = get_fields(self) for f in fields: path = getattr(self, f.name) if path: setattr(self, f.name, fn(path)) return self ================================================ FILE: deepcompressor/utils/dataclass.py ================================================ # -*- coding: utf-8 -*- """Dataclass utilities.""" from dataclasses import _FIELD, _FIELD_CLASSVAR, _FIELD_INITVAR, _FIELDS, Field __all__ = ["get_fields"] def get_fields(class_or_instance, *, init_vars: bool = False, class_vars: bool = False) -> tuple[Field, ...]: """Get the fields of the dataclass. Args: class_or_instance: The dataclass type or instance. init_vars (`bool`, *optional*, defaults to `False`): Whether to include the init vars. class_vars (`bool`, *optional*, defaults to `False`): Whether to include the class vars. Returns: tuple[Field, ...]: The fields. """ try: fields = getattr(class_or_instance, _FIELDS) except AttributeError: raise TypeError("must be called with a dataclass type or instance") from None return tuple( v for v in fields.values() if v._field_type is _FIELD or (init_vars and v._field_type is _FIELD_INITVAR) or (class_vars and v._field_type is _FIELD_CLASSVAR) ) ================================================ FILE: deepcompressor/utils/hooks/__init__.py ================================================ from .branch import AccumBranchHook from .hook import EarlyStopException, EarlyStopHook, Hook, IOHook from .packager import ( BaseInputPackager, BaseOutputPackager, KeyedInputPackager, KeyedOutputPackager, SimpleInputPackager, SimpleOutputPackager, ) from .processor import BaseTensorProcessor, ProcessHook ================================================ FILE: deepcompressor/utils/hooks/branch.py ================================================ # -*- coding: utf-8 -*- """Branch hook module.""" import typing as tp import torch import torch.nn as nn from .hook import IOHook from .packager import BaseInputPackager, BaseOutputPackager __all__ = ["AccumBranchHook"] class AccumBranchHook(IOHook): branch: nn.Module | None def __init__( self, branch: nn.Module | None, input_packager: BaseInputPackager | None = None, output_packager: BaseOutputPackager | None = None, ): super().__init__(pre=True, post=True, input_packager=input_packager, output_packager=output_packager) self.branch = branch self.tensor = None def pre_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any] ) -> None: """Pre-forward function. Args: module (nn.Module): Module. input_args (tuple[torch.Tensor, ...]): Input arguments. input_kwargs (dict[str, tp.Any]): Input keyword arguments. """ tensors = self.input_packager.unpack(module, input_args, input_kwargs) assert len(tensors) == 1, "BranchHook only supports single input tensor" self.tensor = next(iter(tensors.values())) return None def post_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tuple[torch.Tensor, ...], ) -> tp.Any: """Post-forward function. Args: module (nn.Module): Module. input_args (tuple[torch.Tensor, ...]): Input arguments. input_kwargs (dict[str, tp.Any]): Input keyword arguments. output (tuple[torch.Tensor, ...]): Output. """ output_tensors = self.output_packager.unpack(module, input_args, input_kwargs, output) assert len(output_tensors) == 1, "LoRAHook only supports single output tensor" output_key, output_tensor = next(iter(output_tensors.items())) if self.branch is not None: output_tensor = output_tensor + self.branch(self.tensor) self.tensor = None return self.output_packager.repack({output_key: output_tensor}, module, input_args, input_kwargs, output) ================================================ FILE: deepcompressor/utils/hooks/hook.py ================================================ # -*- coding: utf-8 -*- """nn.Module Hook.""" import typing as tp from collections import defaultdict import torch import torch.ao.quantization import torch.nn as nn import torch.utils.hooks from .packager import BaseInputPackager, BaseOutputPackager, SimpleInputPackager, SimpleOutputPackager __all__ = ["Hook", "EarlyStopException", "EarlyStopHook", "IOHook"] class Hook: """Base class for hook.""" handles: dict[nn.Module, list[torch.utils.hooks.RemovableHandle]] pre: bool post: bool activated: bool def __init__(self, *, pre: bool, post: bool) -> None: """Initialize the hook. Args: pre (`bool`): Whether the hook should be called before the forward pass. post (`bool`): Whether the hook should be called after the forward pass. Raises: AssertionError: If both `pre` and `post` are `False`. """ self.handles = defaultdict(list) self.pre = pre self.post = post self.activated = True assert self.pre or self.post, "At least one of pre and post must be True." def is_in_hook(self) -> bool: """Whether the hook is an in-hook.""" return self.pre and not self.post def is_out_hook(self) -> bool: """Whether the hook is an out-hook.""" return not self.pre and self.post def is_inout_hook(self) -> bool: """Whether the hook is an in-out-hook.""" return self.pre and self.post def activate(self) -> tp.Self: """Activate the hook.""" self.activated = True return self def deactivate(self) -> tp.Self: """Deactivate the hook.""" self.activated = False return self def pre_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any] ) -> tp.Any: """Pre-forward function. Args: module (`nn.Module`): Module to process. input_args (`tuple[torch.Tensor, ...]`): Input arguments. input_kwargs (`dict[str, tp.Any]`): Input keyword arguments. """ return None def post_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tuple[torch.Tensor, ...], ) -> tp.Any: """Post-forward function. Args: module (`nn.Module`): Module to process. input_args (`tuple[torch.Tensor, ...]`): Input arguments. input_kwargs (`dict[str, tp.Any]`): Input keyword arguments. output (`tuple[torch.Tensor, ...]`): Output. """ return None def __call__(self, *args, **kwargs) -> tp.Any: if not self.activated: return None n = len(args) + len(kwargs) if n == 3: return self.pre_forward(*args, **kwargs) elif n == 4: return self.post_forward(*args, **kwargs) else: raise ValueError(f"Invalid number of arguments: {n}") def register( self, module: nn.Module | tp.Iterable[nn.Module], prepend: bool | tuple[bool, bool] = False, always_call: bool = False, ) -> tp.Self: """Register the hook to the module(s). Args: module (`nn.Module` or `Iterable[nn.Module]`): The module(s). prepend (`bool` or `tuple[bool, bool]`, *optional*, defaults to `False`): Whether to prepend the hook. If a tuple, the first element is for pre-hook and the second element is for post-hook. always_call (`bool`, *optional*, defaults to `False`): Whether to always call the hook. This is only used for post-hooks. """ if isinstance(module, nn.Module): module = [module] prepends = (prepend, prepend) if isinstance(prepend, bool) else prepend if self.pre: for mod in module: self.handles[mod].append(mod.register_forward_pre_hook(self, prepend=prepends[0], with_kwargs=True)) if self.post: for mod in module: self.handles[mod].append( mod.register_forward_hook(self, prepend=prepends[1], with_kwargs=True, always_call=always_call) ) return self def remove(self, module: nn.Module | tp.Iterable[nn.Module] | None = None) -> tp.Self: """Remove the hook from the module(s). Args: module (`nn.Module` or `Iterable[nn.Module]`, *optional*, defaults to `None`): The module(s) to remove the hook from. If `None`, remove the hook from all modules. """ if module is None: for handles in self.handles.values(): for handle in handles: handle.remove() handles.clear() self.handles.clear() return self if isinstance(module, nn.Module): module = [module] for mod in module: handles = self.handles.pop(mod, []) for handle in handles: handle.remove() handles.clear() return self class EarlyStopException(Exception): """Early stop exception.""" pass class EarlyStopHook(Hook): def __init__(self): super().__init__(pre=False, post=True) def pre_forward(self, *args, **kwargs) -> None: raise EarlyStopException() class IOHook(Hook): """Base class for IO hooks.""" input_packager: BaseInputPackager """Input packager, used to unpack and repack the input arguments.""" output_packager: BaseOutputPackager """Output packager, used to unpack and repack the output.""" def __init__( self, *, pre: bool, post: bool, input_packager: BaseInputPackager | None = None, output_packager: BaseOutputPackager | None = None, ): """Initialize the IO hook. Args: pre (`bool`): Whether the hook should be called before the forward pass. post (`bool`): Whether the hook should be called after the forward pass. input_packager (`BaseInputPackager`, *optional*, defaults to `None`): Input packager, used to unpack and repack the input arguments. output_packager (`BaseOutputPackager`, *optional*, defaults to `None`): Output packager, used to unpack and repack the output. """ super().__init__(pre=pre, post=post) if pre: self.input_packager = input_packager or SimpleInputPackager() assert isinstance(self.input_packager, BaseInputPackager) else: self.input_packager = None if post: self.output_packager = output_packager or SimpleOutputPackager() assert isinstance(self.output_packager, BaseOutputPackager) else: self.output_packager = None ================================================ FILE: deepcompressor/utils/hooks/packager.py ================================================ # -*- coding: utf-8 -*- """Packagers for input and output tensors in hooks.""" import functools import inspect import typing as tp from abc import ABC, abstractmethod import torch import torch.ao.quantization import torch.nn as nn import torch.utils.hooks __all__ = [ "BaseInputPackager", "SimpleInputPackager", "KeyedInputPackager", "BaseOutputPackager", "SimpleOutputPackager", "KeyedOutputPackager", ] class BaseInputPackager(ABC): """Base class for input packagers.""" @abstractmethod def unpack( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any] ) -> dict[int | str, torch.Tensor]: """Unpack inputs in inputs packager. Args: module (`nn.Module`): Module. input_args (`tuple[torch.Tensor, ...]`): Input arguments. input_kwargs (`dict[str, tp.Any]`): Input keyword arguments. Returns: `dict[int | str, torch.Tensor]`: The unpacked input tensors. """ ... @abstractmethod def repack( self, tensors: dict[int | str, torch.Tensor], module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], ) -> tuple[tuple[torch.Tensor, ...], dict[str, tp.Any]]: """Repack inputs in inputs packager. Args: tensors (`dict[int | str, torch.Tensor]`): The input tensors. module (`nn.Module`): Module. input_args (`tuple[torch.Tensor, ...]`): Input arguments. input_kwargs (`dict[str, tp.Any]`): Input keyword arguments. Returns: `tuple[tuple[torch.Tensor, ...], dict[str, tp.Any]]`: The repacked input arguments and keyword arguments. """ ... class SimpleInputPackager(BaseInputPackager): def unpack( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any] ) -> dict[int | str, torch.Tensor]: return {0: input_args[0]} def repack( self, tensors: dict[int | str, torch.Tensor], module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], ) -> tuple[tuple[torch.Tensor, ...], dict[str, tp.Any]]: return (tensors[0], *input_args[1:]), input_kwargs class KeyedInputPackager(BaseInputPackager): def __init__(self, module: nn.Module, index_or_keys: list[int | str]): forward_name = "forward" if isinstance(module.forward, functools.partial): if hasattr(module, "_deepcompressor_orig_forward"): forward_name = "_deepcompressor_orig_forward" else: # this module has been wrapped in `accelerate` package assert hasattr(module, "_old_forward") assert module._old_forward is module.forward.__wrapped__ # type: ignore forward_name = "_old_forward" signature = inspect.signature(getattr(module, forward_name)) args, kwargs = [], [] for key, param in signature.parameters.items(): if param.kind == inspect.Parameter.POSITIONAL_ONLY: args.append(key) elif param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD: args.append(key) kwargs.append(key) elif param.kind == inspect.Parameter.KEYWORD_ONLY: kwargs.append(key) self.index_key_pairs: list[tuple[int | None, str | None]] = [] for index_or_key in index_or_keys: if isinstance(index_or_key, int): index = index_or_key if index >= len(args) or args[index] not in kwargs: self.index_key_pairs.append((index, None)) else: self.index_key_pairs.append((index, args[index])) else: key = index_or_key if key in args: self.index_key_pairs.append((args.index(key), key)) else: self.index_key_pairs.append((None, key)) self.index_or_keys = index_or_keys def unpack( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any] ) -> dict[int | str, torch.Tensor]: tensors = {} for index_or_key, (index, key) in zip(self.index_or_keys, self.index_key_pairs, strict=True): if index is not None and index < len(input_args): tensors[index_or_key] = input_args[index] else: assert key is not None tensors[index_or_key] = input_kwargs.get(key, None) return tensors def repack( self, tensors: dict[int | str, torch.Tensor], module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], ) -> tuple[tuple[torch.Tensor, ...], dict[str, tp.Any]]: _args, _kwargs = list(input_args), dict(input_kwargs) for index_or_key, (index, key) in zip(self.index_or_keys, self.index_key_pairs, strict=True): if index is not None and index < len(_args): _args[index] = tensors[index_or_key] else: assert key is not None _kwargs[key] = tensors[index_or_key] return tuple(_args), _kwargs class BaseOutputPackager(ABC): """Base class for output packagers.""" @abstractmethod def unpack( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tp.Any, ) -> dict[int | str, torch.Tensor]: """Unpack outputs in outputs packager. Args: module (`nn.Module`): Module. input_args (`tuple[torch.Tensor, ...]`): Input arguments. input_kwargs (`dict[str, tp.Any]`): Input keyword arguments. output (`Any`): Output. Returns: `dict[int | str, torch.Tensor]`: The unpacked output tensors. """ ... @abstractmethod def repack( self, tensors: dict[int | str, torch.Tensor], module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tp.Any, ) -> tp.Any: """Repack outputs in outputs packager. Args: tensors (`dict[int | str, torch.Tensor]`): The output tensors. module (`nn.Module`): Module. input_args (`tuple[torch.Tensor, ...]`): Input arguments. input_kwargs (`dict[str, tp.Any]`): Input keyword arguments. output (`Any`): Output. Returns: `Any`: The repacked output. """ ... class SimpleOutputPackager(BaseOutputPackager): def unpack( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tp.Any, ) -> dict[int | str, torch.Tensor]: if not isinstance(output, torch.Tensor): output = output[0] return {0: output} def repack( self, tensors: dict[int | str, torch.Tensor], module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tp.Any, ) -> tp.Any: if isinstance(output, torch.Tensor): return tensors[0] else: return (tensors[0], *output[1:]) class KeyedOutputPackager(BaseOutputPackager): def __init__(self, index_or_keys: list[int | str]): self.index_or_keys = index_or_keys def unpack( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tp.Any, ) -> dict[int | str, torch.Tensor]: tensors = {} if isinstance(output, (tuple, list)): for index_or_key in self.index_or_keys: assert isinstance(index_or_key, int) and index_or_key < len(output) tensors[index_or_key] = output[index_or_key] elif isinstance(output, dict): for index_or_key in self.index_or_keys: assert isinstance(index_or_key, str) and index_or_key in output tensors[index_or_key] = output[index_or_key] else: assert isinstance(output, torch.Tensor) assert len(self.index_or_keys) == 1 assert self.index_or_keys[0] == 0 tensors[0] = output return tensors def repack( self, tensors: dict[int | str, torch.Tensor], module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tp.Any, ) -> tp.Any: if isinstance(output, (tuple, list)): _output = list(output) for index_or_key in self.index_or_keys: assert isinstance(index_or_key, int) and index_or_key < len(_output) _output[index_or_key] = tensors[index_or_key] return tuple(_output) elif isinstance(output, dict): _output = dict(output) for index_or_key in self.index_or_keys: assert isinstance(index_or_key, str) and index_or_key in _output _output[index_or_key] = tensors[index_or_key] return _output else: assert isinstance(output, torch.Tensor) assert len(self.index_or_keys) == 1 assert self.index_or_keys[0] == 0 return tensors[0] ================================================ FILE: deepcompressor/utils/hooks/processor.py ================================================ # -*- coding: utf-8 -*- """Tensor processor.""" import abc import typing as tp import torch import torch.ao.quantization import torch.nn as nn import torch.utils.hooks from .hook import IOHook from .packager import BaseInputPackager, BaseOutputPackager __all__ = ["BaseTensorProcessor", "ProcessHook"] class BaseTensorProcessor(abc.ABC): @abc.abstractmethod def is_enabled(self) -> bool: ... @abc.abstractmethod def get_input_packager(self) -> BaseInputPackager | None: ... @abc.abstractmethod def get_output_packager(self) -> BaseOutputPackager | None: ... @abc.abstractmethod def process(self, tensor: torch.Tensor) -> torch.Tensor: ... def as_hook( self, func: tp.Callable[[torch.Tensor], torch.Tensor] | None = None, *, is_output: bool = False ) -> "ProcessHook": """Convert the processor to a hook. Args: func (`Callable[[torch.Tensor], torch.Tensor]` or `None`, *optional*, defaults to `None`): Function to process the tensors. is_output (`bool`, *optional*, defaults to `False`): Whether to process the output tensors. Returns: `ProcessHook`: The hook for processing the tensor. """ return ProcessHook(self, func, is_output=is_output) class ProcessHook(IOHook): def __init__( self, processor: BaseTensorProcessor, func: tp.Callable[[torch.Tensor], torch.Tensor] | None = None, is_output: bool = False, ): super().__init__( pre=not is_output, post=is_output, input_packager=processor.get_input_packager(), output_packager=processor.get_output_packager(), ) self.processor = processor self.func = func def process(self, tensors: dict[int | str, torch.Tensor]) -> dict[int | str, torch.Tensor]: for k, x in tensors.items(): assert isinstance(x, torch.Tensor) if self.func is not None: tensors[k] = self.func(x) else: tensors[k] = self.processor.process(x) return tensors def pre_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any] ) -> tuple[tuple[torch.Tensor, ...], dict[str, tp.Any]]: if not self.processor.is_enabled(): return input_args, input_kwargs return self.input_packager.repack( self.process(self.input_packager.unpack(module, input_args, input_kwargs)), module, input_args, input_kwargs ) def post_forward( self, module: nn.Module, input_args: tuple[torch.Tensor, ...], input_kwargs: dict[str, tp.Any], output: tuple[torch.Tensor, ...], ) -> tp.Any: if not self.processor.is_enabled(): return output return self.output_packager.repack( self.process(self.output_packager.unpack(module, input_args, input_kwargs, output)), module, input_args, input_kwargs, output, ) ================================================ FILE: deepcompressor/utils/math/__init__.py ================================================ # -*- coding: utf-8 -*- from .functional import * from .hadamard import * ================================================ FILE: deepcompressor/utils/math/functional.py ================================================ # -*- coding: utf-8 -*- """Math utility functions.""" import torch __all__ = ["is_pow2", "root_"] def is_pow2(n: int) -> bool: """Check if a number is a power of 2. Args: n (`int`): The number to check. Returns: `bool`: Whether the number is a power of 2. """ return (n & (n - 1) == 0) and (n > 0) def root_(y: torch.Tensor, index: float) -> torch.Tensor: """In-place compute the root of a tensor element-wise. Args: y (`torch.Tensor`): The input tensor. index (`float`): The root index. Returns: `torch.Tensor`: The output tensor. """ return y.pow_(1 / index) if index != 2 else y.sqrt_() ================================================ FILE: deepcompressor/utils/math/hadamard.py ================================================ # -*- coding: utf-8 -*- """Utility functions for quantization hadamard transformation.""" import typing as tp import scipy.linalg import torch from .functional import is_pow2 # Adapted from https://github.com/Cornell-RelaxML/quip-sharp/blob/main/lib/utils/matmul_had.py # Copied from https://github.com/spcl/QuaRot/blob/main/fake_quant/hadamard_utils.py __all__ = ["random_hadamard_matrix", "HadamardMatrix", "hardmard_transform"] def _matmul_hadU(X: torch.Tensor, hadamard_K: torch.Tensor | None, K: int) -> torch.Tensor: """Apply Hadamard matrix to the input tensor.""" n = X.shape[-1] input = X.clone().view(-1, n, 1) output = input.clone() while input.shape[1] > K: input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2]) output = output.view(input.shape) output[:, :, 0, :] = input[:, :, 0, :] + input[:, :, 1, :] output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :] output = output.view(input.shape[0], input.shape[1], -1) (input, output) = (output, input) del output if K > 1: assert hadamard_K is not None input = hadamard_K.view(1, K, K).to(input) @ input return input.view(X.shape) / torch.tensor(n).sqrt() def random_hadamard_matrix(size: int) -> torch.Tensor: """Generate a random Hadamard matrix of size `size`.""" # See https://cornell-relaxml.github.io/quip-sharp/ , Section "Randomized Hadamard Transformation" Q = torch.randint(low=0, high=2, size=(size,)).to(torch.float64) Q = Q * 2 - 1 Q = torch.diag(Q) hadamard_K, K = HadamardMatrix.get_lhs(size) return _matmul_hadU(Q, hadamard_K=hadamard_K, K=K) def hardmard_transform( x: torch.Tensor, hadamard_rhs: torch.Tensor, hadamard_lhs: torch.Tensor | None, lhs_k: int, scaled: bool = False ) -> torch.Tensor: """Apply Hadamard matrix to the input tensor.""" shape = x.shape n = shape[-1] rhs_k = n // lhs_k x = x.view(-1, lhs_k, rhs_k) x = x @ hadamard_rhs # hadamard_rhs is symmetric since rhs_k is a power of 2 if not scaled: x = x.mul_((1.0 / torch.tensor(n, dtype=torch.float64).sqrt()).to(device=x.device, dtype=x.dtype)) if lhs_k > 1: x = hadamard_lhs @ x return x.view(shape) # region hadamard matrices class HadamardMatrix: # hadamard matrices for had12, had36.pal2, had52,will, had60.pal, had108.pal, had140.pal, had156.will, had172.will: # http://www.neilsloane.com/hadamard/index.html _cache: tp.ClassVar[dict[tuple[int, bool, torch.dtype, torch.device], tuple[torch.Tensor, torch.Tensor, int]]] = {} @staticmethod def get( n: int, *, scale: bool, dtype: torch.dtype = torch.float32, device: torch.device | str = "cpu" ) -> tuple[torch.Tensor, torch.Tensor, int]: """Get the Hadamard matrix and left-hand-size dimension for a given input size.""" device = torch.device(device) key = (n, scale, dtype, device) if key not in HadamardMatrix._cache: lhs, k = HadamardMatrix.get_lhs(n) rhs = torch.FloatTensor(scipy.linalg.hadamard(n // k)) if scale: rhs = rhs.to(dtype=torch.float64).mul_(1.0 / torch.tensor(n, dtype=torch.float64).sqrt_()) lhs = lhs.to(dtype=dtype, device=device) rhs = rhs.to(dtype=dtype, device=device) HadamardMatrix._cache[key] = rhs, lhs, k return HadamardMatrix._cache[key] @staticmethod def get_lhs(n: int) -> tuple[torch.FloatTensor, int]: """Get the Hadamard matrix (left-hand-side) and its dimension for a given input size.""" for k in [172, 156, 140, 108, 60, 52, 36, 28, 40, 20, 12]: if n % k == 0: assert is_pow2(n // k) return HadamardMatrix._get_hadamard_k(k), k assert is_pow2(n) return torch.FloatTensor([1]), 1 @staticmethod def _get_hadamard_k(k: int) -> torch.FloatTensor: if is_pow2(k): return torch.FloatTensor(scipy.linalg.hadamard(k)) else: return getattr(HadamardMatrix, f"_get_hadamard_{k}")() @staticmethod def _get_hadamard_12() -> torch.FloatTensor: return torch.FloatTensor( [ [+1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [+1, +1, -1, +1, -1, -1, -1, +1, +1, +1, -1, +1], [+1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, -1], [+1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1], [+1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1], [+1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1], [+1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1], [+1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1], [+1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1], [+1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1], [+1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1], [+1, -1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1], ] ) @staticmethod def _get_hadamard_40() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, ], [ +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, ], [ +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, ], ] ) @staticmethod def _get_hadamard_20() -> torch.FloatTensor: return torch.FloatTensor( [ [+1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [+1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1], [+1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1], [+1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1], [+1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1], [+1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1], [+1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1], [+1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1], [+1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1], [+1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1], [+1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1], [+1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1], [+1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1], [+1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1], [+1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1], [+1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1], [+1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1], [+1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1], [+1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1], [+1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1], ] ) @staticmethod def _get_hadamard_28() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, ], [ -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, ], [ +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, ], [ +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, ], [ +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, -1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, ], ] ) @staticmethod def _get_hadamard_36() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, ], [ -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, ], [ +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, ], [ +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, ], [ +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, ], [ +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, ], [ +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, ], [ +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, ], [ +1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, ], [ +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, ], ] ) @staticmethod def _get_hadamard_60() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, ], ] ) @staticmethod def _get_hadamard_52() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, ], [ -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, ], [ -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, ], [ -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, ], [ +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, ], [ +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, ], [ -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, ], [ -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, ], [ -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, ], [ -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, ], [ +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, ], [ +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, ], [ -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, ], [ -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, ], [ -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, ], [ -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, ], [ -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, ], [ -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, ], [ +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, ], [ +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, ], [ -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, ], [ -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, ], [ +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, ], [ -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, ], [ -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, ], [ -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, ], [ -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, ], [ -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, ], [ -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, ], [ -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, ], [ -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, ], [ +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, ], [ +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, ], ] ) @staticmethod def _get_hadamard_108() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, ], ] ) @staticmethod def _get_hadamard_140() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, ], [ +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, ], ] ) @staticmethod def _get_hadamard_156() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, ], [ +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, ], [ -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, ], [ -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, ], [ -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, ], [ -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, ], [ -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, ], [ -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, ], [ -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, ], [ -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, ], [ +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, ], [ -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, ], [ -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, ], [ -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, ], [ -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, ], [ +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, ], [ -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, ], [ -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, ], [ -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, ], [ -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, ], [ -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, ], [ -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, ], [ -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, ], [ -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, ], [ +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, ], [ -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, ], [ -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, ], [ +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, ], [ -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, ], [ -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, ], [ -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, ], [ -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, ], [ -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, ], [ +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, ], [ +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, ], [ -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, ], [ -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, ], [ -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, ], [ -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, ], [ -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, ], [ +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, ], [ -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, ], [ +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, ], [ -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, ], [ -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, ], [ -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, ], [ -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, ], [ -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, ], [ -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, ], [ -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, ], [ -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, ], [ -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, ], [ -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, ], [ +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, ], [ -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, ], [ +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, ], [ -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, ], [ +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, ], [ -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, ], [ -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, ], [ -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, ], [ +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, ], [ -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, ], [ -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, ], [ -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, +1, ], [ +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, ], [ -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, -1, -1, ], [ -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, ], [ +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, ], [ +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, ], [ -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, ], [ -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, ], [ -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, ], [ -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, ], [ +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, ], [ +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, ], [ +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, ], [ -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, ], [ -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, ], [ -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, ], [ -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, ], [ -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, ], [ -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, ], [ -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, ], [ -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, ], [ +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, ], [ +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, ], [ +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, ], [ -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, ], [ -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, +1, ], [ -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, -1, ], [ -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, ], ] ) @staticmethod def _get_hadamard_172() -> torch.FloatTensor: return torch.FloatTensor( [ [ +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, ], [ -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, ], [ -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, ], [ -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, ], [ -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, ], [ -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, ], [ +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, ], [ +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, ], [ -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, ], [ -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, ], [ -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, ], [ +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, ], [ -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, ], [ -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, ], [ -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, ], [ +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, ], [ -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, ], [ +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, ], [ -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, ], [ -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, ], [ -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, ], [ +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, ], [ -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, ], [ -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, ], [ -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, ], [ -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, ], [ -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, ], [ +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, ], [ -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, ], [ -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, ], [ -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, ], [ -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, ], [ -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, ], [ -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, ], [ +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, ], [ +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, ], [ -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, ], [ +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, ], [ +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, ], [ -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, ], [ -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, ], [ -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, ], [ -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, ], [ -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, ], [ -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, ], [ +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, ], [ -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, ], [ +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, ], [ +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, ], [ +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, ], [ -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, ], [ -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, ], [ -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, ], [ -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, +1, ], [ -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, ], [ -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, +1, ], [ +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, ], [ -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, ], [ -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, ], [ -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, ], [ +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, ], [ -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, ], [ +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, ], [ -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, ], [ -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, ], [ -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, ], [ -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, ], [ +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, ], [ -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, ], [ -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, ], [ -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, ], [ -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, ], [ -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, ], [ +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, ], [ +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, ], [ +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, ], [ -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, ], [ +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, ], [ -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, ], [ -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, ], [ -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, ], [ -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, ], [ +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, ], [ -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, +1, ], [ +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, ], [ +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, ], [ -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, ], [ -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, -1, ], [ -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, -1, ], [ +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, +1, ], [ -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, -1, ], [ -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, -1, -1, -1, -1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, -1, -1, +1, -1, -1, ], [ -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, ], [ -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, ], [ +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, ], [ -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, ], [ -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, ], [ -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, ], [ -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, ], [ +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, ], [ -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, ], [ +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, ], [ +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, ], [ -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, ], [ -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, ], [ -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, ], [ +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, ], [ +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, ], [ +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, ], [ +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, ], [ +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, ], [ +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, ], [ +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, ], [ +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, ], [ -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, ], [ -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, ], [ +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, ], [ +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, ], [ -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, ], [ +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, ], [ +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, ], [ -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, ], [ +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, +1, ], [ -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, -1, ], [ -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, -1, ], [ -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, +1, ], [ -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, +1, ], [ +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, -1, ], [ +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, +1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, -1, ], [ +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, -1, ], [ -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, +1, +1, +1, -1, +1, -1, -1, -1, -1, +1, -1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, +1, -1, +1, +1, +1, +1, +1, +1, -1, -1, -1, -1, +1, -1, +1, -1, -1, +1, +1, -1, +1, +1, -1, +1, +1, -1, -1, +1, -1, +1, -1, -1, -1, -1, +1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1, -1, -1, +1, +1, +1, +1, -1, +1, -1, +1, +1, +1, -1, +1, +1, -1, -1, +1, +1, -1, +1, +1, +1, -1, +1, -1, +1, +1, +1, +1, -1, -1, +1, +1, -1, -1, -1, +1, ], ] ) # endregion # endregion # endregion ================================================ FILE: deepcompressor/utils/patch.py ================================================ # -*- coding: utf-8 -*- """Monkey-patching utilities.""" import copy import functools import types import typing import torch.nn as nn __all__ = ["copy_func", "get_module_parents_map"] def copy_func(f: types.FunctionType, globals: dict[str, typing.Any] | None = None): """Copied from https://stackoverflow.com/a/13503277/2988730 (@unutbu) and https://github.com/spcl/QuaRot/blob/main/fake_quant/monkeypatch.py. Copy a function. Args: f (`types.FunctionType`): Function to be copied. globals (`dict[str, typing.Any]` or `None`, *optional*, defaults to `None`): Globals. Returns: `types.FunctionType`: Copied function. """ if globals is None: globals = f.__globals__ g = types.FunctionType(f.__code__, globals, name=f.__name__, argdefs=f.__defaults__, closure=f.__closure__) g = functools.update_wrapper(g, f) g.__module__ = f.__module__ g.__kwdefaults__ = copy.copy(f.__kwdefaults__) # type: ignore return g def get_module_parents_map( module: nn.Module, name: str = "", parents_map: dict[nn.Module, list[tuple[str, nn.Module, str]]] | None = None ) -> dict[nn.Module, list[tuple[str, nn.Module, str]]]: """Get module parents map. Args: module (`nn.Module`): Module. name (`str`, *optional*, defaults to `""`): Name. parents_map (`dict[nn.Module, list[tuple[str, nn.Module, str]]]`, *optional*, defaults to `None`): Parents map. Returns: `dict[nn.Module, list[tuple[str, nn.Module, str]]]`: Module parents map. The key is the child module and the value is a list of tuples. Each tuple contains the name of the parent module, the parent module, and the child module name in the parent module. """ if parents_map is None: parents_map = {} for child_name, child_module in module._modules.items(): if child_module is None: continue parents_map.setdefault(child_module, []).append((name, module, child_name)) get_module_parents_map(child_module, f"{name}.{child_name}" if name else child_name, parents_map) return parents_map ================================================ FILE: deepcompressor/utils/tools/__init__.py ================================================ # -*- coding: utf-8 -*- from . import logging, sys ================================================ FILE: deepcompressor/utils/tools/logging.py ================================================ # -*- coding: utf-8 -*- """Logging tools.""" import logging import sys import typing as tp from tqdm.contrib.logging import logging_redirect_tqdm __all__ = [ "CRITICAL", "FATAL", "ERROR", "WARNING", "WARN", "INFO", "DEBUG", "NOTSET", "log", "info", "debug", "warning", "error", "critical", "Formatter", "basicConfig", "setup", "getLogger", "redirect_tqdm", ] CRITICAL = logging.CRITICAL FATAL = logging.FATAL ERROR = logging.ERROR WARNING = logging.WARNING WARN = logging.WARN INFO = logging.INFO DEBUG = logging.DEBUG NOTSET = logging.NOTSET redirect_tqdm = logging_redirect_tqdm shutdown = logging.shutdown Logger = logging.Logger def getLogger(name: str | None = None) -> logging.Logger: """Get a logger with the given name. Args: name (`str` or `None`, *optional*, defaults to `None`): The name of the logger. Returns: logging.Logger: The logger. """ return logging.getLogger(name) def log(level: int, msg: str, logger: logging.Logger | None = None) -> None: """Log a message with the given level. Args: level (`int`): The logging level. msg (`str`): The message to log. logger (`logging.Logger` or `None`, *optional*, defaults to `None`): The logger to use. If `None`, the root logger is used. """ if logger is None: logger = logging.getLogger() if not logger.isEnabledFor(level): return msg = str(msg) if "\n" in msg: for line in msg.split("\n"): log(level, line, logger) else: logger.log(level, msg) def info(msg: str, logger: logging.Logger | None = None): """Log a message with the INFO level. Args: msg (`str`): The message to log. logger (`logging.Logger` or `None`, *optional*, defaults to `None`): The logger to use. If `None`, the root logger is used. """ log(logging.INFO, msg, logger) def debug(msg: str, logger: logging.Logger | None = None): """Log a message with the DEBUG level. Args: msg (`str`): The message to log. logger (`logging.Logger` or `None`, *optional*, defaults to `None`): The logger to use. If `None`, the root logger is used. """ log(logging.DEBUG, msg, logger) def warning(msg: str, logger: logging.Logger | None = None): """Log a message with the WARNING level. Args: msg (`str`): The message to log. logger (`logging.Logger` or `None`, *optional*, defaults to `None`): The logger to use. If `None`, the root logger is used. """ log(logging.WARNING, msg, logger) def error(msg: str, logger: logging.Logger | None = None): """Log a message with the ERROR level. Args: msg (`str`): The message to log. logger (`logging.Logger` or `None`, *optional*, defaults to `None`): The logger to use. If `None`, the root logger is used. """ log(logging.ERROR, msg, logger) def critical(msg: str, logger: logging.Logger | None = None): """Log a message with the CRITICAL level. Args: msg (`str`): The message to log. logger (`logging.Logger` or `None`, *optional*, defaults to `None`): The logger to use. If `None`, the root logger is used. """ log(logging.CRITICAL, msg, logger) class Formatter(logging.Formatter): """A custom formatter for logging.""" indent = 0 def __init__(self, fmt: str | None = None, datefmt: str | None = None, style: tp.Literal["%", "{", "$"] = "%"): """Initialize the formatter. Args: fmt (`str` or `None`, *optional*, defaults to `None`): The format string. datefmt (`str` or `None`, *optional*, defaults to `None`): The date format string. style (`str`, *optional*, defaults to `"%"`): The format style. """ super().__init__(fmt, datefmt, style) def format(self, record: logging.LogRecord) -> str: """Format the record. Args: record (`logging.LogRecord`): The log record. Returns: str: The formatted record. """ record.message = " " * self.indent + record.getMessage() if self.usesTime(): record.asctime = self.formatTime(record, self.datefmt) s = self.formatMessage(record) if record.exc_info: if not record.exc_text: record.exc_text = self.formatException(record.exc_info) if record.exc_text: if s[-1:] != "\n": s = s + "\n" s = s + record.exc_text if record.stack_info: if s[-1:] != "\n": s = s + "\n" s = s + self.formatStack(record.stack_info) return s @staticmethod def indent_inc(delta: int = 2): """Increase the indent.""" Formatter.indent += delta @staticmethod def indent_dec(delta: int = 2): """Decrease the indent.""" Formatter.indent -= delta @staticmethod def indent_reset(indent: int = 0): """Reset the indent.""" Formatter.indent = indent def basicConfig(**kwargs) -> None: """Configure the root logger.""" fmt = kwargs.pop("format", None) datefmt = kwargs.pop("datefmt", None) style = kwargs.pop("style", "%") logging.basicConfig(**kwargs) for h in logging.root.handlers[:]: h.setFormatter(Formatter(fmt, datefmt, style)) def setup( path: str | None = None, level: int = logging.DEBUG, format: str = "%(asctime)s | %(levelname).1s | %(message)s", datefmt: str = "%y-%m-%d %H:%M:%S", **kwargs, ) -> None: """Setup the default logging configuration. Args: path (`str` | `None`, *optional*, defaults to `None`): The path to the log file. If `None`, only the console is used. level (`int`, *optional*, defaults to `logging.DEBUG`): The logging level. format (`str`, *optional*, defaults to `"%(asctime)s | %(levelname).1s | %(message)s"`): The format string. datefmt (`str`, *optional*, defaults to `"%y-%m-%d %H:%M:%S"`): The date format string. **kwargs: Additional keyword arguments. """ handlers = kwargs.pop("handlers", None) force = kwargs.pop("force", True) if handlers is None: handlers = [logging.StreamHandler(sys.stdout)] if path is not None: handlers.append(logging.FileHandler(path, mode="w")) basicConfig( level=level, format=format, datefmt=datefmt, handlers=handlers, force=force, ) ================================================ FILE: deepcompressor/utils/tools/sys.py ================================================ # -*- coding: utf-8 -*- """System tools.""" import psutil import torch __all__ = ["get_max_memory_map"] def _get_visible_gpu_capacity_list() -> list[int]: """Get visible GPU capacity list. Returns: `list[int]`: Visible GPU capacity list. """ return [torch.cuda.get_device_properties(i).total_memory // 1024**3 for i in range(torch.cuda.device_count())] def _get_ram_capacity() -> int: """Get RAM capacity. Returns: `int`: RAM capacity in GiB. """ return psutil.virtual_memory().total // 1024**3 # in GiB def get_max_memory_map(ratio: float = 0.9) -> dict[str, str]: """Get maximum memory map. Args: ratio (`float`, *optional*, defaults to `0.9`): The ratio of the maximum memory to use. Returns: `dict[str, str]`: Maximum memory map. """ gpu_capacity_list = _get_visible_gpu_capacity_list() ram_capacity = _get_ram_capacity() gpu_capacity_list = [str(int(c * ratio)) + "GiB" for c in gpu_capacity_list] ram_capacity = str(int(ram_capacity * ratio)) + "GiB" ret_dict = {str(idx): gpu_capacity_list[idx] for idx in range(len(gpu_capacity_list))} ret_dict["cpu"] = ram_capacity return ret_dict ================================================ FILE: deepcompressor/version.py ================================================ # -*- coding: utf-8 -*- """Version information.""" __version__ = "0.0.2" ================================================ FILE: environment.yml ================================================ channels: - defaults dependencies: - python=3.12 - pip - pip: - poetry ================================================ FILE: examples/diffusion/.gitignore ================================================ .tmp .tmp/ baselines baselines/ benchmarks benchmarks/ caches caches/ datasets datasets/ visualize/runs visualize/runs/ *.pdf ================================================ FILE: examples/diffusion/README.md ================================================ # SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models [[Website](https://hanlab.mit.edu/projects/svdquant)][[Paper](http://arxiv.org/abs/2411.05007)][[Nunchaku Inference System](https://github.com/mit-han-lab/nunchaku)] Diffusion models have been proven highly effective at generating high-quality images. However, as these models grow larger, they require significantly more memory and suffer from higher latency, posing substantial challenges for deployment. In this work, we aim to accelerate diffusion models by quantizing their weights and activations to 4 bits. At such an aggressive level, both weights and activations are highly sensitive to quantization, where conventional post-training quantization methods for large language models like smoothing become insufficient. To overcome this limitation, we propose **SVDQuant**, a new 4-bit quantization paradigm. Different from smoothing which redistributes outliers between weights and activations, our approach *absorbs* these outliers using a low-rank branch. We first shift the outliers from activations into the weights, then employ a high-precision low-rank branch to take in the outliers in the weights with SVD. This process eases the quantization on both sides. However, naively running the low-rank branch independently incurs significant overhead due to extra data movement of activations, negating the quantization speedup. To address this, we co-design an inference engine **Nunchaku** that fuses the kernels in the low-rank branch into thosein the low-bit branch to cut off redundant memory access. It can also seamlessly support off-the-shelf low-rank adapters (LoRAs) without the requantization. Extensive experiments on SDXL, PixArt-Sigma, and FLUX.1 validate the effectiveness of SVDQuant in preserving image quality. We reduce the memory usage for the 12B FLUX.1 models by 3.6×, achieving 3.5× speedup over the 4-bit weight-only quantized baseline on a 16GB RTX-4090 GPU, paving the way for more interactive applications on PCs. ![Teaser](/assets/diffusion/svdquant/teaser.jpg) ![SVDQuant](/assets/diffusion/svdquant/svdquant.gif) ## Usage We use Flux.1-schnell as an example. ### Step 1: Evaluation Baselines Preparation In order to evaluate the similarity metrics, we have to first prepare the reference images generated by unquantized models by running the following command: ```bash python -m deepcompressor.app.diffusion.ptq configs/model/flux.1-schnell.yaml --output-dirname reference ``` In this command, - [`configs/model/flux.1-schnell.yaml`](configs/model/flux.1-schnell.yaml) specifies the model configurations including evaluation setups. - By setting flag `--output-dirname` to `reference`, the output directory will be automatically redirect to the [`ref_root`](configs/__default__.yaml#26) in the evaluation configuration. ### Step 2: Calibration Dataset Preparation Before quantizing diffusion models, we randomly sample 128 prompts in COCO Captions 2024 to generate calibration dataset by running the following command: ```bash python -m deepcompressor.app.diffusion.dataset.collect.calib \ configs/model/flux.1-schnell.yaml configs/collect/qdiff.yaml ``` In this command, - [`configs/collect/qdiff.yaml`](configs/collect/qdiff.yaml) specifies the calibration dataset configurations, including the path to the prompt yaml (i.e., `--collect-prompt-path prompts/qdiff.yaml`), the number of prompts to be sampled (i.e., `--collect-num-samples 128`), and the root directory of the calibration datasets (which should be in line with the [quantization configuration](configs/__default__.yaml#38)). ### Step 3: Model Quantization The following command will perform INT4 SVDQuant and evaluate the quantized model on 1024 samples from MJHQ-30K: ```bash python -m deepcompressor.app.diffusion.ptq \ configs/model/flux.1-schnell.yaml configs/svdquant/int4.yaml \ --eval-benchmarks MJHQ --eval-num-samples 1024 ``` In this command, - The positional arguments are configuration files which are loaded in order. [`configs/svdquant/int4.yaml`](configs/svdquant/int4.yaml) contains the quantization configurations specialized in INT4 SVDQuant. Please make sure all configuration files are under a subfolder of the working directory where you run the command. + You can add [`configs/svdquant/fast.yaml`](configs/svdquant/fast.yaml) to for faster quantization, i.e., ```bash python -m deepcompressor.app.diffusion.ptq \ configs/model/flux.1-schnell.yaml configs/svdquant/int4.yaml configs/svdquant/fast.yaml \ --eval-benchmarks MJHQ --eval-num-samples 1024 ``` + You can add [`configs/svdquant/gptq.yaml`](configs/svdquant/gptq.yaml) to perform gptq after svdquant, i.e., ```bash python -m deepcompressor.app.diffusion.ptq \ configs/model/flux.1-schnell.yaml configs/svdquant/int4.yaml configs/svdquant/gptq.yaml \ --eval-benchmarks MJHQ --eval-num-samples 1024 ``` - All configurations can be directly set in either YAML file or command line. Please refer to [`configs/__default__.yaml`](configs/llm.yaml) and `python -m deepcompressor.app.diffusion.ptq -h`. - The default evaluation datasets are [1024](configs/__default__.yaml#14) samples from [MJHQ](configs/__default__.yaml#33) and [DCI](configs/__default__.yaml#34). - If you would like to save quantized model checkpoint, please add `--save-model true` or `--save-model /PATH/TO/CHECKPOINT/DIR` in the command. ## Deployment If you save the SVDQuant W4A4 quantized model checkpoint, you can easily to deploy quantized model with [`Nunchaku`](https://github.com/mit-han-lab/nunchaku) engine. Please run the following command to convert the saved checkpoint to Nunchaku-compatible checkpoint: ```bash python -m deepcompressor.backend.nunchaku.convert \ --quant-path /PATH/TO/CHECKPOINT/DIR \ --output-root /PATH/TO/OUTPUT/ROOT \ --model-name MODEL_NAME ``` After we have the Nunchaku-compatible checkpoint, please switch to Nunchaku conda environment and refer to [`Nunchaku`](https://github.com/mit-han-lab/nunchaku) for further deployment on GPU system. If you want to integrate LoRA, please run the following command to convert LoRA to Nunchaku-compatible checkpoint: ```bash python -m deepcompressor.backend.nunchaku.convert_lora \ --quant-path /PATH/TO/NUNCHAKU/TRANSFORMER_BLOCKS/SAFETENSORS_FILE \ --lora-path /PATH/TO/DIFFUSERS/LORA/SAFETENSORS_FILE \ --output-root /PATH/TO/OUTPUT/ROOT \ --lora-name LORA_NAME ``` WARNING: current LoRA conversion script only supports `deepcompressor` v0.0.1 checkpoints generated with [`fuse_when_possible` set to `False`](https://github.com/mit-han-lab/deepcompressor/blob/main/examples/diffusion/configs/svdquant/__default__.yaml#L16) (i.e., `--smooth-fuse-when-possible false`). ## Evaluation Resutls ### Quality Evaluation Below is the quality and similarity evaluated with 5000 samples from MJHQ-30K dataset. IR means ImageReward. Our 4-bit results outperform other 4-bit baselines, effectively preserving the visual quality of 16-bit models. | Model | Precision | Method | FID ($\downarrow$) | IR ($\uparrow$) | LPIPS ($\downarrow$) | PSNR( $\uparrow$) | |----------------------------|-----------|-----------|--------------------|-----------------|----------------------|-------------------| | FLUX.1-dev (50 Steps) | BF16 | -- | 20.3 | 0.953 | -- | -- | | | INT W8A8 | SVDQ | 20.4 | 0.948 | 0.089 | 27.0 | | | W4A16 | NF4 | 20.6 | 0.910 | 0.272 | 19.5 | | | INT W4A4 | | 20.2 | 0.908 | 0.322 | 18.5 | | | INT W4A4 | SVDQ | 20.1 | 0.926 | 0.256 | 20.1 | | | INT W4A4 | SVDQ+GPTQ | 19.9 | 0.935 | 0.223 | 21.0 | | | NVFP4 | | 20.3 | 0.926 | 0.242 | 20.4 | | | NVFP4 | SVDQ | 20.3 | 0.944 | 0.224 | 20.8 | | | NVFP4 | SVDQ+GPTQ | 20.3 | 0.945 | 0.203 | 21.5 | | FLUX.1-schnell (4 Steps) | BF16 | -- | 19.2 | 0.938 | -- | -- | | | INT W8A8 | SVDQ | 19.2 | 0.966 | 0.120 | 22.9 | | | W4A16 | NF4 | 18.9 | 0.943 | 0.257 | 18.2 | | | INT W4A4 | | 18.1 | 0.962 | 0.345 | 16.3 | | | INT W4A4 | SVDQ | 18.3 | 0.957 | 0.289 | 17.6 | | | INT W4A4 | SVDQ+GPTQ | 18.3 | 0.951 | 0.257 | 18.3 | | | NVFP4 | | 17.6 | 0.956 | 0.277 | 17.6 | | | NVFP4 | SVDQ | 18.7 | 0.979 | 0.247 | 18.4 | | | NVFP4 | SVDQ+GPTQ | 18.9 | 0.966 | 0.228 | 19.0 | | SANA-1.6b (20 Steps) | BF16 | -- | 20.6 | 0.952 | -- | -- | | | INT W4A4 | | 20.5 | 0.894 | 0.339 | 15.3 | | | INT W4A4 | GPTQ | 19.9 | 0.881 | 0.288 | 16.4 | | | INT W4A4 | SVDQ | 19.9 | 0.922 | 0.234 | 17.4 | | | INT W4A4 | SVDQ+GPTQ | 19.3 | 0.935 | 0.220 | 17.8 | | | NVFP4 | | 19.7 | 0.929 | 0.236 | 17.4 | | | NVFP4 | GPTQ | 19.7 | 0.925 | 0.202 | 18.3 | | | NVFP4 | SVDQ | 20.2 | 0.951 | 0.190 | 18.6 | | | NVFP4 | SVDQ+GPTQ | 20.2 | 0.941 | 0.176 | 19.0 | | PixArt-Sigma (20 Steps) | FP16 | -- | 16.6 | 0.944 | -- | -- | | | INT W8A8 | ViDiT-Q | 15.7 | 0.944 | 0.137 | 22.5 | | | INT W8A8 | SVDQ | 16.3 | 0.955 | 0.109 | 23.7 | | | INT W4A8 | ViDiT-Q | 37.3 | 0.573 | 0.611 | 12.0 | | | INT W4A4 | SVDQ | 19.9 | 0.858 | 0.356 | 17.0 | | | INT W4A4 | SVDQ+GPTQ | 19.2 | 0.878 | 0.323 | 17.6 | | | NVFP4 | | 31.8 | 0.660 | 0.517 | 14.8 | | | NVFP4 | GPTQ | 27.2 | 0.691 | 0.482 | 15.6 | | | NVFP4 | SVDQ | 17.3 | 0.945 | 0.290 | 18.0 | | | NVFP4 | SVDQ+GPTQ | 16.6 | 0.940 | 0.271 | 18.5 | ## Reference If you find `deepcompressor` useful or relevant to your research, please kindly cite our paper: ```bibtex @inproceedings{ li2024svdquant, title={SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models}, author={Li*, Muyang and Lin*, Yujun and Zhang*, Zhekai and Cai, Tianle and Li, Xiuyu and Guo, Junxian and Xie, Enze and Meng, Chenlin and Zhu, Jun-Yan and Han, Song}, booktitle={The Thirteenth International Conference on Learning Representations}, year={2025} } ``` ================================================ FILE: examples/diffusion/configs/__default__.yaml ================================================ seed: 12345 enable_cache: true cache: root: runs output: root: runs dirname: default pipeline: dtype: torch.float16 device: cuda shift_activations: false eval: num_samples: 5000 height: null width: null guidance_scale: null num_steps: null gt_metrics: ["clip_iqa", "clip_score", "image_reward", "fid"] ref_metrics: ["psnr", "lpips", "ssim", "fid"] gen_root: "{output}/{job}" ref_root: baselines/{dtype}/{model}/{protocol} gt_stats_root: benchmarks/stats num_gpus: 8 batch_size_per_gpu: 1 chunk_start: 0 chunk_step: 1 benchmarks: - "MJHQ" - "DCI" control_root: "benchmarks" quant: calib: data: qdiff path: datasets/{dtype}/{model}/{protocol}/{data}/s128 num_samples: 128 num_workers: 8 wgts: dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null skips: [] enable_calib_range: true calib_range: degree: 2 objective: OutputsError strategy: Manual granularity: Layer element_batch_size: 64 sample_batch_size: 64 element_size: 512 sample_size: -1 ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skips: [] low_rank: rank: 32 exclusive: false compensate: false early_stop: false degree: 2 objective: OutputsError sample_batch_size: 64 sample_size: -1 num_iters: 1 skips: [] ipts: static: false dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null allow_unsigned: false skips: [] enable_calib_range: false calib_range: degree: 2 objective: OutputsError strategy: Manual granularity: Layer element_batch_size: 64 sample_batch_size: 64 element_size: 512 sample_size: -1 ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skips: [] enable_smooth: false smooth: enable_proj: false proj: degree: 2 objective: OutputsError strategy: Manual granularity: Layer element_batch_size: -1 sample_batch_size: 64 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu spans: - - AbsMax - AbsMax alpha: 0.5 beta: -1 num_grids: 20 skips: [] develop_dtype: torch.float32 ================================================ FILE: examples/diffusion/configs/collect/qdiff.yaml ================================================ collect: root: datasets dataset_name: qdiff data_path: prompts/qdiff.yaml num_samples: 128 ================================================ FILE: examples/diffusion/configs/lora/__default__.yaml ================================================ pipeline: enable_lora: true skip_eval: true ================================================ FILE: examples/diffusion/configs/lora/flux.1-dev/anime.yaml ================================================ # https://huggingface.co/alvdansen/sonny-anime-fixed # alvdansen/sonny-anime-fixed # separate, rank=16 eval: benchmarks: - prompts/lora/anime.yaml num_steps: 28 pipeline: lora: alpha: 1 path: alvdansen/sonny-anime-fixed weight_name: araminta_k_sonnyanime_fluxd_fixed.safetensors output: job: anime-1.0 ================================================ FILE: examples/diffusion/configs/lora/flux.1-dev/ghibsky.yaml ================================================ # https://huggingface.co/aleksa-codes/flux-ghibsky-illustration # aleksa-codes/flux-ghibsky-illustration # separate, rank=16 eval: benchmarks: - prompts/lora/ghibsky.yaml num_steps: 28 pipeline: lora: alpha: 1 path: aleksa-codes/flux-ghibsky-illustration weight_name: lora.safetensors output: job: ghibsky-1.0 ================================================ FILE: examples/diffusion/configs/lora/flux.1-dev/realism.yaml ================================================ # https://huggingface.co/XLabs-AI/flux-RealismLora # XLabs-AI/flux-RealismLora # qkv fused, rank=16, only joint blocks eval: benchmarks: - prompts/lora/realism.yaml num_steps: 25 pipeline: lora: alpha: 0.9 path: mit-han-lab/FLUX.1-dev-LoRA-Collections weight_name: realism.safetensors output: job: realism-0.9 ================================================ FILE: examples/diffusion/configs/lora/flux.1-dev/sketch.yaml ================================================ # https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch/tree/main # Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch # pretrained/converted/drawing.safetensors # fused, rank=64 eval: benchmarks: - prompts/lora/sketch.yaml num_steps: 24 pipeline: lora: alpha: 1 path: mit-han-lab/FLUX.1-dev-LoRA-Collections weight_name: sketch.safetensors output: job: sketch-1.0 ================================================ FILE: examples/diffusion/configs/lora/flux.1-dev/yarn.yaml ================================================ # https://huggingface.co/linoyts/yarn_art_Flux_LoRA # linoyts/yarn_art_Flux_LoRA # separate, rank=4, both joint and single blocks eval: benchmarks: - prompts/lora/yarn.yaml num_steps: 28 pipeline: lora: alpha: 1 path: linoyts/yarn_art_Flux_LoRA weight_name: pytorch_lora_weights.safetensors output: job: yarn-1.0 ================================================ FILE: examples/diffusion/configs/model/flux.1-dev.yaml ================================================ pipeline: name: flux.1-dev dtype: torch.bfloat16 eval: num_steps: 50 guidance_scale: 3.5 protocol: fmeuler{num_steps}-g{guidance_scale} quant: calib: batch_size: 16 wgts: calib_range: element_batch_size: 64 sample_batch_size: 16 element_size: 512 sample_size: -1 low_rank: sample_batch_size: 16 sample_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - down_sample - up_sample ipts: calib_range: element_batch_size: 64 sample_batch_size: 16 element_size: 512 sample_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - down_sample - up_sample opts: calib_range: element_batch_size: 64 sample_batch_size: 16 element_size: 512 sample_size: -1 smooth: proj: element_batch_size: -1 sample_batch_size: 16 element_size: -1 sample_size: -1 attn: sample_batch_size: 16 sample_size: -1 ================================================ FILE: examples/diffusion/configs/model/flux.1-schnell.yaml ================================================ pipeline: name: flux.1-schnell dtype: torch.bfloat16 eval: num_steps: 4 guidance_scale: 0 protocol: fmeuler{num_steps}-g{guidance_scale} quant: calib: batch_size: 16 wgts: calib_range: element_batch_size: 64 sample_batch_size: 32 element_size: 512 sample_size: -1 low_rank: sample_batch_size: 32 sample_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - down_sample - up_sample ipts: calib_range: element_batch_size: 64 sample_batch_size: 32 element_size: 512 sample_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - down_sample - up_sample opts: calib_range: element_batch_size: 64 sample_batch_size: 32 element_size: 512 sample_size: -1 smooth: proj: element_batch_size: -1 sample_batch_size: 32 element_size: -1 sample_size: -1 attn: sample_batch_size: 32 sample_size: -1 ================================================ FILE: examples/diffusion/configs/model/pixart-sigma.yaml ================================================ pipeline: name: pixart-sigma eval: num_steps: 20 guidance_scale: 4.5 protocol: dpm{num_steps}-g{guidance_scale} quant: calib: batch_size: 256 wgts: calib_range: sample_batch_size: -1 low_rank: sample_batch_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - attn_add - ffn_add - down_sample - up_sample ipts: calib_range: sample_batch_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - attn_add - ffn_add - down_sample - up_sample opts: calib_range: sample_batch_size: -1 smooth: proj: sample_batch_size: -1 attn: sample_batch_size: -1 ================================================ FILE: examples/diffusion/configs/model/sana-1.6b.yaml ================================================ pipeline: name: sana-1.6b-1024px-bf16-ch5632 path: Lawrence-cj/Sana_1600M_1024px_BF16_diffusers_ch5632 dtype: torch.bfloat16 eval: num_steps: 20 guidance_scale: 4.5 protocol: flowdpm{num_steps}-g{guidance_scale} quant: calib: batch_size: 256 wgts: calib_range: element_batch_size: 64 sample_batch_size: 32 element_size: 512 sample_size: -1 low_rank: sample_batch_size: 32 sample_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - attn_add - ffn_add - down_sample - up_sample ipts: calib_range: element_batch_size: 64 sample_batch_size: 32 element_size: 512 sample_size: -1 skips: - embed - resblock_shortcut - resblock_time_proj - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - attn_add - ffn_add - down_sample - up_sample opts: calib_range: element_batch_size: 64 sample_batch_size: 32 element_size: 512 sample_size: -1 smooth: proj: element_batch_size: -1 sample_batch_size: 32 element_size: -1 sample_size: -1 attn: sample_batch_size: 32 sample_size: -1 ================================================ FILE: examples/diffusion/configs/svdquant/__default__.yaml ================================================ quant: enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: GridSearch granularity: Layer spans: - - AbsMax - AbsMax alpha: 0.5 beta: -2 num_grids: 20 allow_low_rank: true fuse_when_possible: false skips: - embed - resblock - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - down_sample - up_sample wgts: enable_low_rank: true low_rank: rank: 32 early_stop: true degree: 2 objective: OutputsError num_iters: 100 skips: - embed - resblock - transformer_proj_in - transformer_proj_out - transformer_norm - transformer_add_norm - down_sample - up_sample ================================================ FILE: examples/diffusion/configs/svdquant/fast.yaml ================================================ quant: smooth: proj: num_grids: 10 calib: num_samples: 64 ================================================ FILE: examples/diffusion/configs/svdquant/gptq.yaml ================================================ quant: wgts: enable_kernel_gptq: true kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 ================================================ FILE: examples/diffusion/configs/svdquant/int4.yaml ================================================ quant: wgts: dtype: sint4 group_shapes: - - 1 - 64 - 1 - 1 - 1 scale_dtypes: - null ipts: static: false dtype: sint4 group_shapes: - - 1 - 64 - 1 - 1 - 1 scale_dtypes: - null allow_unsigned: true pipeline: shift_activations: true ================================================ FILE: examples/diffusion/configs/svdquant/nvfp4.yaml ================================================ quant: wgts: dtype: sfp4_e2m1_all group_shapes: - - -1 - -1 - - 1 - 16 - 1 - 1 - 1 scale_dtypes: - null - sfp8_e4m3_nan ipts: static: false dtype: sfp4_e2m1_all group_shapes: - - 1 - 16 - 1 - 1 - 1 scale_dtypes: - sfp8_e4m3_nan enable_extra_wgts: true extra_wgts: dtype: sint4 group_shapes: - - 1 - 64 - 1 - 1 - 1 scale_dtypes: - null includes: - transformer_norm - transformer_add_norm ================================================ FILE: examples/diffusion/configs/text/__default__.yaml ================================================ # copied from projects/llm/configs/__default__.yaml enable_text: true text: calib: data: pileval path: mit-han-lab/pile-val-backup num_samples: 128 seq_length: 1024 min_seq_length: 0 max_seq_length: 0 develop_dtype: torch.float32 wgts: dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null intermediate_dtypes: [] intermediate_levels: [] needs_dequant_saturation: false enable_kernel_gptq: false kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 enable_calib_range: true calib_range: objective: OutputsError strategy: Manual granularity: Group degree: 2 element_batch_size: 64 sample_batch_size: -1 element_size: 512 sample_size: -1 pre_reshape: true outputs_device: cpu ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skip_qkv_proj: false skip_out_proj: false skip_up_proj: false skip_down_proj: false skip_qkv_proj: false skip_out_proj: false skip_up_proj: false skip_down_proj: false ipts: static: false dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null enable_calib_range: false calib_range: objective: OutputsError strategy: GridSearch granularity: ChannelGroup degree: 2 element_batch_size: 64 sample_batch_size: -1 element_size: 512 sample_size: -1 pre_reshape: true outputs_device: cpu ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skip_qkv_proj: false skip_out_proj: false skip_up_proj: false skip_down_proj: false skip_qkv_proj: false skip_out_proj: false skip_up_proj: false skip_down_proj: false opts: static: false dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null enable_calib_range: false calib_range: objective: OutputsError strategy: GridSearch granularity: ChannelGroup degree: 2 element_batch_size: 64 sample_batch_size: -1 element_size: 512 sample_size: -1 pre_reshape: true outputs_device: cpu ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skip_attn_q: false skip_attn_k: false skip_attn_v: false skip_attn_q: false skip_attn_k: false skip_attn_v: false enable_rotation: false rotation: random: false transform_out_proj: false transform_down_proj: false enable_reorder: false reorder: strategy: Manual degree: 2 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu channel_metric: InputsAbsMax channel_index: Sequential dynamic: false skip_residual: true skip_out_proj: false skip_down_proj: false enable_smooth: false smooth: enable_proj: false proj: objective: OutputsError strategy: GridSearch granularity: Layer degree: 2 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu spans: - - AbsMax - AbsMax alpha: -3 beta: -3 num_grids: 20 skip_qkv_proj: false skip_out_proj: false skip_up_proj: false skip_down_proj: false enable_attn: false attn: objective: OutputsError strategy: Manual granularity: Layer degree: 2 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu spans: - - AbsMax - AbsMax alpha: 0.5 beta: 0 num_grids: 20 ================================================ FILE: examples/diffusion/configs/text/awq.yaml ================================================ # copied from projects/llm/configs/awq.yaml text: calib: num_samples: 128 seq_length: 512 min_seq_length: 0 max_seq_length: 512 wgts: dtype: uint4 zero_point: PostScale group_shapes: - - 1 - 128 scale_dtypes: - torch.float16 enable_calib_range: true calib_range: objective: ProductsError strategy: GridSearch granularity: Group degree: 2 max_shrink: 0.8 max_expand: 1.0 num_grids: 20 skip_qkv_proj: true ipts: static: false dtype: null group_shapes: - - 1 - -1 scale_dtypes: - null opts: static: false dtype: null group_shapes: - - 1 - -1 scale_dtypes: - null enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: GridSearch granularity: Layer spans: - - AbsMax - AbsMax alpha: 0.5 beta: 0 num_grids: 20 enable_attn: false ================================================ FILE: examples/diffusion/prompts/lora/anime.yaml ================================================ 00-anime: a chibi astronaut, nm22 minimalist style 01-anime: a pink frog, nm22 minimalist style 02-anime: a girl wearing a flower crown, nm22 anime style 03-anime: a girl wearing a flower crown, nm22 style 04-anime: a cute creature, nm22 style 05-anime: a cat with a tiny sword, nm22 illustration style 06-anime: a girl with brown-blonde hair and big round glasses, tired, white tank top, jeans, nm22 style 07-anime: a girl with brown-blonde hair and big round glasses, tired, white tank top, jeans, nm22 illustration style 08-anime: girl, neck tuft, white hair, sheep horns, blue eyes, nm22 style 09-anime: a boy in a sailor suit frowning, nm22 illustration style 10-anime: girl, neck tuft, white hair, sheep horns, blue eyes, nm22 style 11-anime: a little witch, nm22 minimalist style, pastelcore 12-anime: a chibi astronaut, nm22 minimalist anime style, crayon style 13-anime: a dog wearing a wizard hat, nm22 anime style 14-anime: a girl with wings, nm22 illustration style 15-anime: a robot with a heart on its chest, nm22 style 16-anime: a dancing mushroom, nm22 anime style 17-anime: a knight in a rainbow-colored armor, nm22 style 18-anime: a sleepy cat on a pillow, nm22 pastelcore style 19-anime: a bunny reading a book, nm22 minimalist style 20-anime: a little girl holding a balloon, nm22 crayon style 21-anime: a boy wearing a hoodie and headphones, nm22 lofi style 22-anime: a dragon flying in the night sky, nm22 style 23-anime: a fox wearing glasses, nm22 illustration style 24-anime: a cat sleeping in a hammock, nm22 minimalist style 25-anime: a deer in a forest at night, nm22 anime style 26-anime: a robot watering plants, nm22 style 27-anime: a girl with a magic staff, nm22 anime style 28-anime: a girl riding a bicycle, nm22 minimalist style 29-anime: a frog playing the guitar, nm22 pastelcore style 30-anime: a squirrel eating a nut, nm22 style 31-anime: a penguin with a scarf, nm22 anime style 32-anime: a bird sitting on a branch, nm22 minimalist style 33-anime: a bear wearing a jacket, nm22 illustration style 34-anime: a boy playing the piano, nm22 lofi style 35-anime: a mouse holding a flower, nm22 pastelcore style 36-anime: a cat sitting in a teacup, nm22 style 37-anime: a girl with pink hair and headphones, nm22 anime style 38-anime: a dog wearing sunglasses, nm22 minimalist style 39-anime: a raccoon with a backpack, nm22 illustration style 40-anime: a girl standing under a tree, nm22 lofi style 41-anime: a fish swimming in a bowl, nm22 pastelcore style 42-anime: a girl wearing a cloak, nm22 anime style 43-anime: a bear holding a cup of coffee, nm22 minimalist style 44-anime: a rabbit wearing a crown, nm22 illustration style 45-anime: a fox curled up sleeping, nm22 lofi style 46-anime: a girl sitting on a cloud, nm22 pastelcore style 47-anime: a dog holding a balloon, nm22 style 48-anime: a boy with wings, nm22 anime style 49-anime: a cat playing with yarn, nm22 minimalist style 50-anime: a dragon blowing bubbles, nm22 illustration style 51-anime: a girl sitting on the moon, nm22 lofi style 52-anime: a panda eating bamboo, nm22 pastelcore style 53-anime: a lion wearing glasses, nm22 anime style 54-anime: a girl dancing in the rain, nm22 minimalist style 55-anime: a girl in a witch's hat, nm22 illustration style 56-anime: a bunny riding a bicycle, nm22 pastelcore style 57-anime: a bear holding an umbrella, nm22 style 58-anime: a fox with a scarf, nm22 anime style 59-anime: a cat chasing a butterfly, nm22 minimalist style 60-anime: a boy with a skateboard, nm22 lofi style 61-anime: a girl with braids, nm22 pastelcore style 62-anime: a giraffe with a long scarf, nm22 style 63-anime: a cat wearing a wizard's hat, nm22 anime style 64-anime: a squirrel holding a nut, nm22 minimalist style 65-anime: a dragon with a crown, nm22 illustration style 66-anime: a girl holding a lantern, nm22 lofi style 67-anime: a bunny in a field of flowers, nm22 pastelcore style 68-anime: a cat in space, nm22 style 69-anime: a girl with short hair, nm22 anime style 70-anime: a dog sitting by a campfire, nm22 minimalist style 71-anime: a robot playing chess, nm22 illustration style 72-anime: a girl looking out the window, nm22 lofi style 73-anime: a rabbit holding a cupcake, nm22 pastelcore style 74-anime: a fish swimming in a pond, nm22 style 75-anime: a boy reading a book, nm22 anime style 76-anime: a bear wearing a hat, nm22 minimalist style 77-anime: a cat playing the piano, nm22 illustration style 78-anime: a girl sitting by a river, nm22 lofi style 79-anime: a fox holding a flower, nm22 pastelcore style 80-anime: a dog running in a field, nm22 style 81-anime: a bird flying in the sky, nm22 anime style 82-anime: a girl holding a teddy bear, nm22 minimalist style 83-anime: a mouse with a piece of cheese, nm22 illustration style 84-anime: a girl riding a dragon, nm22 lofi style 85-anime: a frog sitting on a lily pad, nm22 pastelcore style 86-anime: a lion with a crown, nm22 style 87-anime: a boy with a cape, nm22 anime style 88-anime: a penguin sliding on ice, nm22 minimalist style 89-anime: a squirrel with a tiny backpack, nm22 illustration style 90-anime: a girl sitting under a tree, nm22 lofi style 91-anime: a cat chasing a butterfly, nm22 pastelcore style 92-anime: a dragon flying over a city, nm22 style 93-anime: a boy holding a kite, nm22 anime style 94-anime: a dog sitting by a fire, nm22 minimalist style 95-anime: a girl with long flowing hair, nm22 illustration style 96-anime: a boy with a magic wand, nm22 lofi style 97-anime: a cat wearing glasses, nm22 pastelcore style 98-anime: a bear holding a balloon, nm22 style 99-anime: a girl looking at the stars, nm22 anime style ================================================ FILE: examples/diffusion/prompts/lora/ghibsky.yaml ================================================ 00-ghibsky: GHIBSKY style, a cat on a windowsill gazing out at a starry night sky and distant city lights 01-ghibsky: GHIBSKY style, a fisherman casting a line into a peaceful village lake surrounded by quaint cottages 02-ghibsky: GHIBSKY style, cozy mountain cabin covered in snow, with smoke curling from the chimney and a warm, inviting light spilling through the windows 03-ghibsky: GHIBSKY style, Mykonos 04-ghibsky: GHIBSKY style, an orange Lamborghini driving down a hill road at night with a beautiful ocean view in the background, side view, no text 05-ghibsky: GHIBSKY style, a small Yorkie on a windowsill during a snowy winter night, with a warm, cozy glow from inside and soft snowflakes drifting outside 06-ghibsky: GHIBSKY style, serene Japanese garden with a koi pond and a traditional tea house, nestled under a canopy of cherry blossoms in full bloom 07-ghibsky: GHIBSKY style, the most beautiful place in the universe 08-ghibsky: GHIBSKY style, a peaceful autumn park, with golden leaves falling slowly from the trees and a wooden bench inviting passersby to rest 09-ghibsky: GHIBSKY style, a snowy alpine village at twilight, with the soft glow of lanterns and smoke rising from chimneys 10-ghibsky: GHIBSKY style, a small fishing boat bobbing on a crystal-clear lake, surrounded by towering mountains covered in snow 11-ghibsky: GHIBSKY style, a cozy living room with a crackling fireplace, large windows looking out at a snow-covered forest 12-ghibsky: GHIBSKY style, a serene beach at sunrise, with gentle waves lapping the shore and seagulls soaring overhead 13-ghibsky: GHIBSKY style, an ancient castle perched atop a cliff, with storm clouds gathering in the distance and waves crashing far below 14-ghibsky: GHIBSKY style, a peaceful Japanese garden under the soft glow of lanterns, with a small bridge over a koi pond and a traditional tea house nearby 15-ghibsky: GHIBSKY style, an enchanted forest at night, with glowing mushrooms and fireflies lighting up the underbrush 16-ghibsky: GHIBSKY style, a serene mountain lake with crystal-clear water, surrounded by towering pine trees and rocky cliffs 17-ghibsky: GHIBSKY style, a quiet village at dusk, with warm light spilling from windows and the sound of distant laughter in the air 18-ghibsky: GHIBSKY style, a quaint countryside cottage surrounded by blooming gardens, with a small path leading to a wooden gate 19-ghibsky: GHIBSKY style, a quiet harbor at sunset, with fishing boats gently bobbing on the water and the sky painted in shades of pink and orange 20-ghibsky: GHIBSKY style, a lush meadow filled with wildflowers, with mountains rising in the distance and a soft breeze rustling the grass 21-ghibsky: GHIBSKY style, an ancient oak tree in the center of a small village, its branches stretching wide and casting a cool shade on the villagers below 22-ghibsky: GHIBSKY style, a small boat floating on a calm river at dawn, with mist rising from the water and trees lining the shore 23-ghibsky: GHIBSKY style, a serene valley surrounded by towering mountains, with a small stream winding through the green grass 24-ghibsky: GHIBSKY style, a peaceful beach town with colorful houses lining the shore and a calm ocean stretching out into the horizon 25-ghibsky: GHIBSKY style, a small cabin in the woods, with smoke rising from the chimney and the soft glow of lanterns inside 26-ghibsky: GHIBSKY style, a quiet street in a small town, with colorful flowers blooming in window boxes and a cat lounging on a doorstep 27-ghibsky: GHIBSKY style, a quiet forest clearing at twilight, with a soft mist rising from the ground and fireflies glowing in the trees 28-ghibsky: GHIBSKY style, a small rowboat tied to a wooden dock, with a calm lake reflecting the orange and pink hues of the sunset 29-ghibsky: GHIBSKY style, a tranquil Japanese temple surrounded by cherry blossoms in full bloom, with the sound of a distant waterfall 30-ghibsky: GHIBSKY style, a peaceful riverside village at sunrise, with boats gently bobbing in the water and the sound of birds chirping in the distance 31-ghibsky: GHIBSKY style, an old stone cottage nestled in a meadow of wildflowers, with butterflies fluttering in the breeze 32-ghibsky: GHIBSKY style, a quiet forest path covered in fallen leaves, with the sun shining through the trees and a soft breeze rustling the branches 33-ghibsky: GHIBSKY style, a cozy cabin in the snow, with smoke curling from the chimney and the warm glow of lights inside 34-ghibsky: GHIBSKY style, a peaceful mountain village at dusk, with lanterns lighting up the streets and the sound of distant music 35-ghibsky: GHIBSKY style, a small cottage by a tranquil lake, with a wooden dock leading out to the water and mountains rising in the distance 36-ghibsky: GHIBSKY style, a misty morning in a quiet forest, with the soft light of dawn filtering through the trees and the sound of birds singing 37-ghibsky: GHIBSKY style, a quiet garden at twilight, with blooming flowers and the soft glow of lanterns lighting up the path 38-ghibsky: GHIBSKY style, a cozy cabin in the mountains, with a fire roaring in the fireplace and snow gently falling outside 39-ghibsky: GHIBSKY style, a serene countryside farm, with a barn and silo standing tall against the backdrop of rolling hills 40-ghibsky: GHIBSKY style, a peaceful village on the edge of a forest, with thatched-roof cottages and a small stream running through the center 41-ghibsky: GHIBSKY style, a calm lake at sunset, with the reflection of the mountains and trees mirrored in the still water 42-ghibsky: GHIBSKY style, a quiet park at dusk, with soft light filtering through the trees and the sound of children playing in the distance 43-ghibsky: GHIBSKY style, a cozy living room with a view of a snow-covered forest, the fireplace crackling and a blanket draped over a comfy chair 44-ghibsky: GHIBSKY style, a peaceful beach at twilight, with the last rays of the sun casting a warm glow over the ocean 45-ghibsky: GHIBSKY style, a tranquil garden pond, with lily pads floating on the water and a small wooden bridge crossing over it 46-ghibsky: GHIBSKY style, a quiet mountain village at dawn, with the soft light of morning illuminating the rooftops and the sound of birds chirping in the distance 47-ghibsky: GHIBSKY style, a serene forest clearing, with wildflowers blooming in the grass and sunlight streaming through the trees 48-ghibsky: GHIBSKY style, a peaceful river winding through a quiet countryside, with small boats tied to the shore and the sound of water gently flowing 49-ghibsky: GHIBSKY style, a cozy cabin in a snowy forest, with smoke rising from the chimney and the soft glow of lights inside 50-ghibsky: GHIBSKY style, a quiet village street at night, with the soft light of lanterns and the sound of distant footsteps echoing through the air 51-ghibsky: GHIBSKY style, a peaceful mountain lake at sunrise, with the reflection of the mountains mirrored in the still water 52-ghibsky: GHIBSKY style, a serene countryside farm, with a barn and fields of golden wheat stretching out into the distance 53-ghibsky: GHIBSKY style, a cozy living room with a large window overlooking a snowy forest, the fire crackling and the smell of fresh coffee in the air 54-ghibsky: GHIBSKY style, a peaceful village by the sea, with colorful houses and fishing boats tied to the dock 55-ghibsky: GHIBSKY style, a tranquil garden at dusk, with lanterns lighting up the path and the sound of crickets in the distance 56-ghibsky: GHIBSKY style, a small cabin in the woods, with the soft glow of a lantern inside and the sound of a nearby stream 57-ghibsky: GHIBSKY style, a peaceful mountain village at sunset, with the last rays of the sun casting a warm glow over the rooftops 58-ghibsky: GHIBSKY style, a serene forest clearing at dawn, with sunlight streaming through the trees and the sound of birds chirping in the distance 59-ghibsky: GHIBSKY style, a quiet beach at twilight, with the soft glow of the setting sun reflecting on the ocean 60-ghibsky: GHIBSKY style, a tranquil garden pond, with lily pads floating on the water and the soft sound of a waterfall in the distance 61-ghibsky: GHIBSKY style, a cozy cabin in the snow, with smoke curling from the chimney and the warm glow of lights inside 62-ghibsky: GHIBSKY style, a peaceful forest path, with sunlight filtering through the trees and the sound of leaves rustling in the breeze 63-ghibsky: GHIBSKY style, a quiet street in a small town, with colorful flowers blooming in window boxes and the soft sound of distant laughter 64-ghibsky: GHIBSKY style, a peaceful village at dusk, with lanterns lighting up the streets and the soft sound of distant music 65-ghibsky: GHIBSKY style, a cozy living room with a view of a snow-covered forest, the fireplace crackling and a blanket draped over a comfy chair 66-ghibsky: GHIBSKY style, a serene countryside farm, with fields of golden wheat stretching out into the distance and the soft sound of the wind 67-ghibsky: GHIBSKY style, a quiet forest path at dusk, with the soft light of lanterns guiding the way and the sound of distant footsteps 68-ghibsky: GHIBSKY style, a peaceful village on the edge of a forest, with thatched-roof cottages and a small stream running through the center 69-ghibsky: GHIBSKY style, a calm lake at sunset, with the reflection of the mountains and trees mirrored in the still water 70-ghibsky: GHIBSKY style, a quiet park at dusk, with soft light filtering through the trees and the sound of children playing in the distance 71-ghibsky: GHIBSKY style, a cozy cabin in the snow, with smoke curling from the chimney and the warm glow of lights inside 72-ghibsky: GHIBSKY style, a peaceful mountain village at dawn, with the soft light of morning illuminating the rooftops 73-ghibsky: GHIBSKY style, a quiet street in a small town, with colorful flowers blooming in window boxes and a cat lounging on a doorstep 74-ghibsky: GHIBSKY style, a small cabin in the woods, with the soft glow of a lantern inside and the sound of a nearby stream 75-ghibsky: GHIBSKY style, a quiet forest clearing at twilight, with a soft mist rising from the ground and fireflies glowing in the trees 76-ghibsky: GHIBSKY style, a cozy cabin in the mountains, with a fire roaring in the fireplace and snow gently falling outside 77-ghibsky: GHIBSKY style, a peaceful village on a riverbank, with boats gently swaying in the water and the sound of a distant bell 78-ghibsky: GHIBSKY style, a serene valley surrounded by towering mountains, with a small stream winding through the green grass 79-ghibsky: GHIBSKY style, a quiet mountain road winding through a dense forest, with the soft sound of birds singing in the trees 80-ghibsky: GHIBSKY style, a peaceful beach town at dusk, with the last rays of the sun casting a warm glow over the ocean 81-ghibsky: GHIBSKY style, a calm lake at sunrise, with the reflection of the mountains and trees mirrored in the still water 82-ghibsky: GHIBSKY style, a tranquil garden at twilight, with the soft glow of lanterns lighting up the path and the sound of distant music 83-ghibsky: GHIBSKY style, a cozy cabin in a snowy forest, with smoke rising from the chimney and the soft glow of lights inside 84-ghibsky: GHIBSKY style, a quiet village street at night, with the soft light of lanterns and the sound of distant footsteps echoing through the air 85-ghibsky: GHIBSKY style, a serene countryside farm, with fields of golden wheat stretching out into the distance and the soft sound of the wind 86-ghibsky: GHIBSKY style, a tranquil mountain lake, with crystal-clear water reflecting the surrounding trees and cliffs 87-ghibsky: GHIBSKY style, a peaceful forest glade at dusk, with the soft light of the setting sun filtering through the trees 88-ghibsky: GHIBSKY style, a quiet mountain road winding through a dense forest, with the soft sound of leaves rustling in the breeze 89-ghibsky: GHIBSKY style, a cozy cabin in the mountains, with a fire roaring in the fireplace and snow gently falling outside 90-ghibsky: GHIBSKY style, a quiet riverside village at sunrise, with boats gently bobbing in the water and the sound of birds chirping in the distance 91-ghibsky: GHIBSKY style, a peaceful beach at twilight, with the soft glow of the setting sun reflecting on the ocean 92-ghibsky: GHIBSKY style, a serene forest clearing at dawn, with sunlight streaming through the trees and the sound of birds chirping in the distance 93-ghibsky: GHIBSKY style, a small boat floating on a calm river at dawn, with mist rising from the water and trees lining the shore 94-ghibsky: GHIBSKY style, a quiet street in an old European town, with ivy-covered buildings and a warm glow from the streetlights 95-ghibsky: GHIBSKY style, a cozy living room with a large window overlooking a snow-covered forest, the fire crackling and the smell of fresh coffee in the air 96-ghibsky: GHIBSKY style, a peaceful village by the sea, with colorful houses and fishing boats tied to the dock 97-ghibsky: GHIBSKY style, a tranquil garden pond, with lily pads floating on the water and the soft sound of a waterfall in the distance 98-ghibsky: GHIBSKY style, a cozy cabin in the snow, with smoke curling from the chimney and the warm glow of lights inside 99-ghibsky: GHIBSKY style, a calm lake at sunset, with the reflection of the mountains and trees mirrored in the still water ================================================ FILE: examples/diffusion/prompts/lora/realism.yaml ================================================ 00-realism: a man in armor with a beard and a sword 01-realism: A handsome man in a suit, 25 years old, cool, futuristic 02-realism: A girl in a suit covered with bold tattoos and holding a vest pistol, beautiful woman, 25 years old, cool, future fantasy, turquoise & light orange ping curl hair 03-realism: A rugged cowboy in a wide-brimmed hat, holding a rifle, standing in a desert at sunset 04-realism: A scientist in a white lab coat examining a holographic display, futuristic lab environment 05-realism: A knight with a glowing sword riding a white horse in a medieval battlefield, dust in the air 06-realism: A young woman wearing a cyberpunk outfit with neon lights reflecting on her, urban cityscape at night 07-realism: An astronaut floating in deep space, Earth in the background, helmet reflecting distant stars 08-realism: A warrior princess holding a spear, standing on a cliff overlooking a vast ocean 09-realism: A male detective in a trench coat, standing in a rainy alleyway with neon signs illuminating the surroundings 10-realism: A futuristic robot holding a plasma rifle, standing in front of a burning city 11-realism: A samurai in full armor, sword drawn, standing in a bamboo forest during autumn 12-realism: A woman wearing a futuristic exoskeleton suit, preparing for battle in a high-tech bunker 13-realism: A pirate captain with a hook hand, standing at the helm of a ship during a storm 14-realism: A young boy holding a wooden sword, exploring a magical forest with glowing mushrooms 15-realism: An elderly wizard casting a spell with a glowing staff in a stone tower filled with books 16-realism: A female assassin with a hooded cloak, leaping across rooftops in a moonlit city 17-realism: A giant mech suit piloted by a young soldier, walking through a war-torn city 18-realism: A knight fighting a fire-breathing dragon in front of a medieval castle, flames and smoke 19-realism: A post-apocalyptic survivor wearing makeshift armor, scavenging for supplies in a desolate wasteland 20-realism: A superhero flying over a futuristic city skyline at dusk, cape trailing behind 21-realism: A medieval archer aiming a bow at a distant target, standing in a green forest clearing 22-realism: A woman in a flowing red dress dancing under a full moon on the beach 23-realism: A cybernetic soldier holding dual pistols, standing in a neon-lit futuristic city street 24-realism: A male warrior with tribal tattoos holding a glowing axe, standing in front of a waterfall 25-realism: A female pilot in a sleek spacefighter cockpit, preparing for takeoff from a space station 26-realism: A steampunk inventor working on a mechanical bird, surrounded by gears and blueprints 27-realism: A sorceress with glowing eyes, summoning fire in her hands, standing in a dark cave 28-realism: A Viking warrior holding a large shield and axe, standing on a snowy battlefield 29-realism: A futuristic hacker wearing a virtual reality headset, interacting with holographic data streams 30-realism: A futuristic cityscape with flying cars, neon lights, and towering skyscrapers at sunset 31-realism: A knight in shining armor kneeling before a queen on a golden throne, royal court scene 32-realism: A female space explorer walking on the surface of an alien planet with strange rock formations 33-realism: A ninja in black garb, jumping through the air with a katana drawn, cherry blossoms in the background 34-realism: A male rockstar with long hair playing an electric guitar on a stage with a cheering crowd 35-realism: A medieval blacksmith hammering a sword on an anvil, sparks flying in a dimly lit forge 36-realism: A futuristic spaceship flying through an asteroid field, dodging incoming debris 37-realism: A samurai meditating on a wooden deck overlooking a serene garden pond with koi fish 38-realism: A pirate ship battling a naval fleet on the high seas, cannon fire and explosions 39-realism: A female warrior in golden armor wielding a spear, leading an army in a large battlefield 40-realism: A fantasy elf archer wearing green armor, standing in a mystical forest with glowing plants 41-realism: A space marine holding a plasma rifle, standing on the surface of a desolate moon with craters 42-realism: A young wizard apprentice studying a glowing ancient book in a library with floating candles 43-realism: A cyberpunk biker riding a neon-lit motorcycle through a rainy city street at night 44-realism: A male gladiator in a Roman arena, fighting a wild beast with a trident and net 45-realism: A female secret agent in a sleek black suit, holding a silenced pistol, standing in a high-rise building 46-realism: A futuristic racing car speeding through a neon-lit tunnel at night 47-realism: A male soldier in camouflage gear, crouched in a jungle, aiming a sniper rifle 48-realism: A battle-hardened female warrior with scars, holding a massive war hammer in a post-apocalyptic wasteland 49-realism: A robotic android walking through a dystopian city, people staring in awe 50-realism: A young prince in royal robes, standing in a grand hall with stained glass windows 51-realism: A female bounty hunter with a plasma blaster, tracking her target in a bustling alien marketplace 52-realism: A male samurai warrior performing a ritual in a traditional Japanese temple 53-realism: A futuristic drone flying through a dense jungle, scanning the area with a laser grid 54-realism: A male wizard with a long white beard casting a lightning spell in the middle of a storm 55-realism: A futuristic skyscraper with glass walls, towering above a sprawling city below 56-realism: A medieval king standing on a balcony overlooking his kingdom, wearing a golden crown 57-realism: A female vampire with glowing red eyes, standing in front of a Gothic castle under a blood moon 58-realism: A futuristic police officer in power armor, patrolling the streets of a cyberpunk city 59-realism: A gladiator holding a sword and shield, standing victorious in a Roman arena with cheering crowds 60-realism: A young woman with long flowing hair, standing on a mountain peak at dawn, overlooking a misty valley 61-realism: A male firefighter in full gear, rescuing a child from a burning building 62-realism: A futuristic train speeding through a city filled with towering skyscrapers and neon lights 63-realism: A knight in rusty armor, standing in front of a crumbling castle, holding a broken sword 64-realism: A female archer wearing a hooded cloak, aiming an arrow at a distant enemy in a dark forest 65-realism: A male pilot flying a fighter jet in a dogfight, with missiles and explosions in the sky 66-realism: A cybernetic humanoid, standing in a high-tech lab, with wires and machines connected to its body 67-realism: A female martial artist performing a high kick, surrounded by a crowd in an underground fight club 68-realism: A futuristic skyscraper with solar panels on every surface, towering over a green city filled with plants 69-realism: A male cowboy riding a horse across a vast desert landscape, with mountains in the distance 70-realism: A futuristic robot cleaning the streets of a city with sleek, minimalist architecture 71-realism: A female astronaut standing on a space station, looking out at a distant galaxy 72-realism: A medieval blacksmith working in his forge, hammering a glowing sword 73-realism: A male secret agent in a tuxedo, holding a gun, standing in front of a burning building 74-realism: A futuristic racing car with glowing neon lights, speeding down a highway at night 75-realism: A cyberpunk hacker surrounded by floating holographic displays, typing rapidly on a keyboard 76-realism: A male samurai with a katana, standing in front of a traditional Japanese pagoda 77-realism: A futuristic soldier in power armor, standing in front of a war-torn battlefield 78-realism: A female warrior in dragon armor, holding a flaming sword, standing on a mountain peak 79-realism: A knight holding a shield and sword, standing in front of a castle under siege 80-realism: A cybernetic assassin in a dark alley, holding a silenced pistol, glowing red eyes 81-realism: A futuristic city with flying cars, massive holographic billboards, and neon lights everywhere 82-realism: A pirate ship sailing through a stormy sea, with lightning striking in the distance 83-realism: A samurai meditating in a serene garden, with cherry blossoms falling around him 84-realism: A female knight in silver armor, standing on a battlefield at sunrise, holding a banner 85-realism: A futuristic spaceship entering a wormhole, surrounded by swirling colors and stars 86-realism: A male explorer in a jungle, cutting through dense vegetation with a machete 87-realism: A robot chef preparing a meal in a futuristic kitchen with robotic arms 88-realism: A gladiator fighting a lion in an ancient Roman coliseum 89-realism: A steampunk airship floating over a vast desert landscape with mountains in the distance 90-realism: A medieval knight charging into battle on horseback, holding a lance 91-realism: A female superhero flying through the air, cape billowing behind her, over a futuristic city 92-realism: A pirate captain standing on the deck of a ship, looking out at the horizon with a spyglass 93-realism: A male samurai in full armor, standing on a battlefield with cherry blossoms falling around him 94-realism: A futuristic skyscraper with massive solar panels, surrounded by flying drones 95-realism: A male gladiator fighting a lion in an ancient Roman coliseum 96-realism: A female vampire standing in front of a Gothic castle, glowing red eyes 97-realism: A pirate ship sailing through a stormy sea with lightning flashing in the sky 98-realism: A knight in shining armor, standing in front of a castle under siege 99-realism: A cybernetic assassin in a dark alley, holding a glowing red sword ================================================ FILE: examples/diffusion/prompts/lora/sketch.yaml ================================================ 00-drawing: sketched style, A stick-figure-style robot, and some sci-fi machines, with pastel colors, highlight the innocence and romance of children 01-drawing: sketched style, A joyful girl with balloons floats above a city wearing a hat and striped pants 02-drawing: sketched style, Some happy children stand ready to take pictures 03-drawing: sketched style, A vintage airplane soaring over rolling hills, with a bright sun in the background 04-drawing: sketched style, A whimsical treehouse with ladders, rope swings, and flags in a dreamy forest setting 05-drawing: sketched style, A group of animals having a tea party under a rainbow, with each holding a tiny cup 06-drawing: sketched style, A young boy flying a kite shaped like a dragon over a grassy field 07-drawing: sketched style, A family of cats lounging in a cozy living room, with soft pillows and a fireplace 08-drawing: sketched style, A bicycle with a basket full of flowers parked near a small cafe in a European town 09-drawing: sketched style, Two best friends sharing an ice cream cone on a hot summer day, with bright skies and trees 10-drawing: sketched style, A little girl playing hopscotch on a sidewalk with colorful chalk drawings around her 11-drawing: sketched style, A retro car cruising down a coastal road with the ocean waves crashing nearby 12-drawing: sketched style, A young wizard casting a spell in a magical library filled with floating books and potions 13-drawing: sketched style, A superhero cat flying over the rooftops of a bustling city at sunset 14-drawing: sketched style, A cozy log cabin surrounded by snowy mountains and tall pine trees 15-drawing: sketched style, A pirate ship sailing under the stars, with the moon reflecting on the calm sea 16-drawing: sketched style, A garden full of colorful butterflies and blooming flowers with a gentle breeze blowing 17-drawing: sketched style, An astronaut floating in space, surrounded by twinkling stars and distant planets 18-drawing: sketched style, A fairytale castle perched on a hill, with a dragon flying in the distance 19-drawing: sketched style, A beach scene with kids building sandcastles and seagulls flying overhead 20-drawing: sketched style, A mermaid swimming with dolphins in a vibrant underwater world 21-drawing: sketched style, A hot air balloon drifting peacefully over a patchwork of fields and forests below 22-drawing: sketched style, A group of kids roasting marshmallows around a campfire under the night sky 23-drawing: sketched style, A train passing through a snowy landscape, with smoke billowing from its chimney 24-drawing: sketched style, A fairy flying above a mushroom village, sprinkling sparkles in the air 25-drawing: sketched style, A whimsical clock tower with gears and cogs exposed, ticking away in a steampunk world 26-drawing: sketched style, A sunny meadow with a girl in a flowy dress chasing butterflies 27-drawing: sketched style, A magical doorway hidden in the roots of a giant tree, leading to a mystical land 28-drawing: sketched style, A group of kids riding their bikes down a country road, with golden fields on either side 29-drawing: sketched style, A lighthouse standing tall on a cliff, with waves crashing against the rocks below 30-drawing: sketched style, A friendly robot watering a garden of colorful flowers 31-drawing: sketched style, A squirrel wearing glasses and reading a tiny book under an oak tree 32-drawing: sketched style, A dragon curled up like a cat, taking a nap on a pile of gold 33-drawing: sketched style, A carousel with brightly colored horses spinning in a town square 34-drawing: sketched style, A group of penguins having a snowball fight in the Arctic 35-drawing: sketched style, A fantasy village with windmills and stone cottages under a pink sunset 36-drawing: sketched style, A child looking out of a train window at a beautiful snowy landscape 37-drawing: sketched style, A small boat floating on a peaceful lake, surrounded by trees and mountains 38-drawing: sketched style, A bustling farmer's market with people buying fresh fruits and vegetables 39-drawing: sketched style, A futuristic city with flying cars and towering skyscrapers in the distance 40-drawing: sketched style, A young adventurer discovering an ancient treasure chest in a hidden cave 41-drawing: sketched style, A playful panda climbing a tree surrounded by bamboo 42-drawing: sketched style, A colorful carnival with a Ferris wheel, tents, and lights twinkling in the evening 43-drawing: sketched style, A train crossing a bridge over a deep canyon with birds flying in the sky 44-drawing: sketched style, A chef in a cozy kitchen preparing a giant cake with frosting and sprinkles 45-drawing: sketched style, A robot dog playing fetch in a futuristic park with holographic trees 46-drawing: sketched style, A family having a picnic in a field of wildflowers on a sunny afternoon 47-drawing: sketched style, A group of astronauts exploring a mysterious planet with alien landscapes 48-drawing: sketched style, A little girl reading a book under a tree, with magical creatures surrounding her 49-drawing: sketched style, A cozy village covered in snow, with people skating on a frozen pond 50-drawing: sketched style, A space explorer planting a flag on a distant planet with towering mountains 51-drawing: sketched style, A hot air balloon festival with vibrant balloons floating against a bright blue sky 52-drawing: sketched style, A magical forest with glowing trees and strange creatures walking around 53-drawing: sketched style, A little boy dressed as a pirate, steering a toy ship on a small stream 54-drawing: sketched style, A city park full of people flying kites, having picnics, and playing games 55-drawing: sketched style, A snowy forest with a family of deer standing among the trees 56-drawing: sketched style, A group of kids playing on a giant, whimsical playground with slides and swings 57-drawing: sketched style, A tiny mouse wearing glasses, painting a landscape on a miniature canvas 58-drawing: sketched style, A dragonfly flying above a pond filled with lily pads and colorful fish 59-drawing: sketched style, A lighthouse shining its light across the ocean during a storm 60-drawing: sketched style, A space shuttle taking off from a futuristic spaceport, with stars in the background 61-drawing: sketched style, A little girl riding a unicorn through a magical meadow 62-drawing: sketched style, A group of friends camping in the woods, roasting marshmallows under a starry sky 63-drawing: sketched style, A whimsical bakery with giant cupcakes and candy decorations on the windows 64-drawing: sketched style, A happy family playing in the snow, building snowmen and having snowball fights 65-drawing: sketched style, A colorful rainbow arching over a field of blooming flowers after a rainstorm 66-drawing: sketched style, A young artist sitting at an easel, painting a vibrant landscape 67-drawing: sketched style, A group of dolphins jumping out of the ocean under the bright sun 68-drawing: sketched style, A city skyline at sunset, with colorful clouds reflecting in the skyscrapers 69-drawing: sketched style, A futuristic robot delivering mail in a peaceful suburban neighborhood 70-drawing: sketched style, A magical crystal cave with glowing gems and sparkles in the air 71-drawing: sketched style, A cozy bookstore with books piled high and a friendly cat sleeping on a chair 72-drawing: sketched style, A child looking up at the stars through a telescope on a quiet hill 73-drawing: sketched style, A giant tree with a door at its base, leading to an underground world 74-drawing: sketched style, A dog wearing a superhero cape, running through a park on a sunny day 75-drawing: sketched style, A train station in the countryside, with people waiting for the next train to arrive 76-drawing: sketched style, A group of friends exploring an enchanted forest filled with glowing flowers 77-drawing: sketched style, A futuristic cityscape with tall buildings and flying vehicles zooming by 78-drawing: sketched style, A garden full of exotic plants and butterflies flying in the warm sunlight 79-drawing: sketched style, A small boat sailing on a calm river under a bright full moon 80-drawing: sketched style, A cozy living room with a fireplace, bookshelves, and a sleeping cat on a rug 81-drawing: sketched style, A mountain range with a hiker standing on a peak, looking out at the view 82-drawing: sketched style, A group of robots having a picnic in a futuristic park 83-drawing: sketched style, A family of otters swimming in a crystal-clear river with rocks and plants below 84-drawing: sketched style, A whimsical train traveling through the clouds in a bright, blue sky 85-drawing: sketched style, A small cottage in the middle of a lush forest with birds flying around 86-drawing: sketched style, A little girl dressed as a princess, walking through a magical garden 87-drawing: sketched style, A friendly robot building sandcastles on a beach with a bucket and shovel 88-drawing: sketched style, A group of animals hiking through the forest, carrying tiny backpacks 89-drawing: sketched style, A steampunk airship floating over a futuristic cityscape with gears turning 90-drawing: sketched style, A child planting seeds in a garden while butterflies flutter around 91-drawing: sketched style, A magical waterfall surrounded by lush greenery and glowing flowers 92-drawing: sketched style, A young boy catching fireflies in a jar during a summer evening 93-drawing: sketched style, A rocket launching into space from a futuristic city with skyscrapers in the background 94-drawing: sketched style, A dragon soaring over a village at sunrise, casting a shadow on the rooftops 95-drawing: sketched style, A peaceful garden with a pond, koi fish swimming, and cherry blossoms falling 96-drawing: sketched style, A group of friends playing soccer in a park with colorful leaves falling from the trees 97-drawing: sketched style, A group of kids sailing paper boats down a stream on a sunny day 98-drawing: sketched style, A young boy dressed as an astronaut, exploring an alien planet filled with strange creatures 99-drawing: sketched style, A family of ducks swimming in a pond with lily pads and frogs jumping around ================================================ FILE: examples/diffusion/prompts/lora/yarn.yaml ================================================ 00-yarn: yoda, yarn art style 01-yarn: cookie monster, yarn art style 02-yarn: a dragon spewing fire, yarn art style 03-yarn: albert einstein, yarn art style 04-yarn: a panda riding a rocket, yarn art style 05-yarn: the joker, yarn art style 06-yarn: a unicorn dancing in the rain, yarn art style 07-yarn: a medieval castle on a hill, yarn art style 08-yarn: a mermaid swimming in the ocean, yarn art style 09-yarn: an astronaut floating in space, yarn art style 10-yarn: a cat playing the piano, yarn art style 11-yarn: a robot serving coffee, yarn art style 12-yarn: a futuristic city skyline at night, yarn art style 13-yarn: a ship sailing through a storm, yarn art style 14-yarn: a phoenix rising from the ashes, yarn art style 15-yarn: an ancient temple in the jungle, yarn art style 16-yarn: a lion roaring on top of a mountain, yarn art style 17-yarn: a ballerina performing on stage, yarn art style 18-yarn: a butterfly flying over a flower field, yarn art style 19-yarn: a snowman melting in the sun, yarn art style 20-yarn: a wizard casting a spell, yarn art style 21-yarn: a dragon curled up sleeping, yarn art style 22-yarn: a viking ship on a river, yarn art style 23-yarn: a knight in shining armor, yarn art style 24-yarn: a tiger prowling in the jungle, yarn art style 25-yarn: an owl perched on a tree branch, yarn art style 26-yarn: a circus clown juggling, yarn art style 27-yarn: a bear fishing by a stream, yarn art style 28-yarn: a fox in the forest, yarn art style 29-yarn: a wolf howling at the moon, yarn art style 30-yarn: a race car speeding on a track, yarn art style 31-yarn: a waterfall cascading over rocks, yarn art style 32-yarn: a hot air balloon flying over mountains, yarn art style 33-yarn: a peacock spreading its feathers, yarn art style 34-yarn: a fisherman on a quiet lake, yarn art style 35-yarn: a galaxy with swirling stars, yarn art style 36-yarn: a samurai with a sword, yarn art style 37-yarn: a monkey swinging from vines, yarn art style 38-yarn: a ninja jumping between buildings, yarn art style 39-yarn: a squirrel collecting acorns, yarn art style 40-yarn: a zebra running through the savannah, yarn art style 41-yarn: a pirate ship sailing the seas, yarn art style 42-yarn: a garden full of colorful flowers, yarn art style 43-yarn: a spider weaving a web, yarn art style 44-yarn: a bat flying through a cave, yarn art style 45-yarn: a scuba diver exploring a coral reef, yarn art style 46-yarn: a balloon animal in the shape of a dog, yarn art style 47-yarn: a hedgehog curled up in a ball, yarn art style 48-yarn: a koala climbing a tree, yarn art style 49-yarn: a genie emerging from a lamp, yarn art style 50-yarn: a ghost haunting a haunted house, yarn art style 51-yarn: a fairy sprinkling magic dust, yarn art style 52-yarn: a robot dancing in the street, yarn art style 53-yarn: a lighthouse by the ocean, yarn art style 54-yarn: a train crossing a bridge, yarn art style 55-yarn: a dragonfly hovering over a pond, yarn art style 56-yarn: a panda eating bamboo, yarn art style 57-yarn: a giraffe reaching for the tallest leaves, yarn art style 58-yarn: a snail crawling on a leaf, yarn art style 59-yarn: a kangaroo hopping across the outback, yarn art style 60-yarn: a dog catching a frisbee, yarn art style 61-yarn: a knight slaying a dragon, yarn art style 62-yarn: a ballerina twirling under the stars, yarn art style 63-yarn: a treehouse in a giant oak tree, yarn art style 64-yarn: a treasure chest filled with gold, yarn art style 65-yarn: a magician pulling a rabbit from a hat, yarn art style 66-yarn: a dolphin jumping out of the water, yarn art style 67-yarn: a pirate holding a treasure map, yarn art style 68-yarn: a cowboy riding a horse, yarn art style 69-yarn: a sunflower turning towards the sun, yarn art style 70-yarn: a chameleon blending into the leaves, yarn art style 71-yarn: a whale breaching the ocean surface, yarn art style 72-yarn: a dragon protecting its hoard, yarn art style 73-yarn: a soccer player scoring a goal, yarn art style 74-yarn: a polar bear walking across ice, yarn art style 75-yarn: a butterfly landing on a flower, yarn art style 76-yarn: a magician performing a card trick, yarn art style 77-yarn: a frog sitting on a lily pad, yarn art style 78-yarn: a hedgehog in a garden, yarn art style 79-yarn: a rainbow over a waterfall, yarn art style 80-yarn: a squirrel eating an acorn, yarn art style 81-yarn: a toucan in the rainforest, yarn art style 82-yarn: a hamster running on a wheel, yarn art style 83-yarn: a jellyfish floating in the ocean, yarn art style 84-yarn: a sun rising over a mountain, yarn art style 85-yarn: a clownfish swimming through coral, yarn art style 86-yarn: a musician playing the violin, yarn art style 87-yarn: a panda playing in the snow, yarn art style 88-yarn: a dolphin swimming with fish, yarn art style 89-yarn: a cat chasing a butterfly, yarn art style 90-yarn: a wolf stalking its prey, yarn art style 91-yarn: a bunny hopping through a meadow, yarn art style 92-yarn: a cityscape at sunset, yarn art style 93-yarn: a hawk soaring above a canyon, yarn art style 94-yarn: a beach with palm trees, yarn art style 95-yarn: a penguin sliding on ice, yarn art style 96-yarn: a lioness with her cubs, yarn art style 97-yarn: a city skyline with fireworks, yarn art style 98-yarn: a campfire under the stars, yarn art style 99-yarn: a sea turtle swimming through a coral reef, yarn art style ================================================ FILE: examples/diffusion/prompts/qdiff.yaml ================================================ '0000': 'three people riding on the backs of elephants ' '0001': Person driving a plated motorcycle on a track with people watching. '0002': A cat sitting in front of a flat screen TV. '0003': A person is on a rail performing stunts on a skateboard. '0004': A person on surfboard riding on a small wave. '0005': A cat eating a dead bird on the ground. '0006': Two sinks in a kitchen with dirty dishes in them. '0007': A couple of people that are standing near a train. 0008: A young boy is performing tricks on a skateboard. 0009: A tan bathroom with a toliet and a tub. '0010': A bean and corn mixture, rice, and broccoli on a plate '0011': a little dog jumping up towards a frisbee someone is holding '0012': A modest plate with grains and broccoli and almonds '0013': 4 different colored sea horses flying with 4 birds. '0014': People are walking by a blue train next to a mountain. '0015': a guy looking to the side and smiling '0016': Two people using an interactive gaming system while a person observes them from a couch. '0017': A fancy bathroom with a clean toilet and piping. 0018: 'Man laughing standing next to his motorcycle with his bicycle attached to it. ' 0019: The school bus is colored yellow and pink. '0020': 'An Orange bus is parked next to the people. ' '0021': A man wearing a suit and tie and red hat with a silver buckle. '0022': A woman sitting in front of a giant pizza. '0023': 'Bedside with lamp, large bound book, and humidifier. ' '0024': Two stuffed animals posed looking at book with other animals. '0025': A person cutting a multi layer cake with a knife. '0026': A tray filled with a cut in half sandwich and a cup of coffee. '0027': The view of a metropolitan area behind many buses. 0028: A car that is trying to drive through a flood. 0029: a person riding a surf board on a wave '0030': 'A fried egg with a runny yolk tops a mini whole wheat pizza made with melted cheese and spinach. ' '0031': A girl walking a path holding an orange umbrella. '0032': Some cows that are wandering around a lot of pigeons. '0033': A view of a street intersection through a window. '0034': 'Horse in a large corral eating grass and trees in the back. ' '0035': People are sitting on elephants with a little chair. '0036': A giraffe is walking in some tall grass '0037': A yellow commuter train parked at a train station. 0038: 'A woman stands beside her luggage at an airport. ' 0039: A skier in the snow in a wooded area. '0040': A woman twirling an umbrella with flowers on it. '0041': A black bear is surrounded by black birds on grass. '0042': a girl with her cell phone tie and blue hair '0043': 'a parking meter on the side of a road with a red light showing ' '0044': A black cat with arched back walking past a motorcycle '0045': 'When looking at mannequins without heads the clothing is superficial. ' '0046': A view of a stoplight by a street. '0047': a bunch of animals by a nasty street 0048: A couple posing on a horse statue in a courtyard. 0049: The boy is enjoying playing the game on the computer. '0050': A woman in gear skiing down a snowy slope '0051': A statue of a cow with a subway map on its side. '0052': A Beanie Baby beside a vintage photo of a man and a woman. '0053': A pair of red scissors on top of a desk. '0054': A man walking with a goat on his shoulders '0055': desktop picture with laptop and food container full of food '0056': People are skiing on the snowy slopes in a designated area. '0057': Pit bull playing with soccer ball in the grass. 0058: A cat is looking up at an oven. 0059: A young woman sitting on a curb next to a fire hydrant writing on a notepad. '0060': Three zebras stand side by side looking at the camera. '0061': 'A living room with a sofa, a bookcase, and a laptop charging. ' '0062': a green orange and silver train and some cars '0063': Square pastries are assorted on a white platter. '0064': A 787 airplane is landing at the airport. '0065': Breakfast foods served on a tray on a table. '0066': A double decker bus is parked in the parking lot. '0067': Outside view of people walking in front of a store 0068: A dinner plate with a colorful salad and grilled meat. 0069: A small bathroom with two containers catching plumbing drips. '0070': Hey man that is opening up a bottle of wine. '0071': A clock fastened to a brick store front reads 10 after 10 '0072': An outdoor baseball game with a runner being tagged at base. '0073': A chef is pulling a pizza out of a pizza oven '0074': Black cows standing in the grass of a pasture. '0075': A woman in blue jacket standing on a snowboard. '0076': A guy doing tricks on his kate board '0077': A man in suit taking a photograph with a cellphone. 0078: 'A horse pulling a wagon with a conductor down the road. ' 0079: A person in a giraffe costume in a crowd of people. 0080: A bus stopped on the side of the road. 0081: 'Three men sitting at a table eating breakfast. ' 0082: Family and friends are together at the beach. 0083: 'A door opens to a plain, white bathroom. ' 0084: young male preparing to throw a white plastic Frisbee 0085: People cross country skiing on a path between trees. 0086: A grey and orange fire hydrant next to a street. 0087: An airplane landing on a runway in an airport 0088: A red bus driving down the road with its lights on. 0089: 'Blurry picture of a man looking into a mirror which has something on it ' 0090: a plane lands on a big wide run way 0091: A pink bike in a bike shop with hardwood floors. 0092: Black and white photograph of a man playing soccer. 0093: A woman standing on a tennis court holding a racquet. 0094: The head beams from the vehicle can be seen clearly as the vehicle makes it's way up the street under the street lights. 0095: A bathroom with ski equipment laid out on the floor. 0096: a small wooden cart with a bunch of bananas on it 0097: A boy is standing in a field, playing frisbee. 0098: 'there are hundreds of black face sheep all laying in the pasture ' 0099: A man is in mid air doing a skateboard trick. '0100': A train passes by in an empty station. '0101': 'the front door to the house is open ' '0102': 'A plate on a table is filled with carrots and beans. ' '0103': 'A man standing near a table with video equipment. ' '0104': A skater losing his balance while squatting on his board '0105': a couple of young kids are sitting together '0106': Stuffed teddy bear placed in a lighted Christmas tree '0107': 'A white plate sitting on a table filled with food. ' 0108: A smiling girl has a small plate of pizza. 0109: A small black dog standing behind a crowd of people. '0110': Two elephants that are standing next to each other. '0111': a close up of a baseball glove on the ground with a hat '0112': A small TV is affixed to the wall with shelves below it. '0113': an old man sitting on top of a horse next to the mountains '0114': a person riding skis on a snowy slope '0115': 'The living room is empty with the television on. ' '0116': Two airplanes are flying in the cloudy sky. '0117': A soda can, a pen, a cell phone and a fruit drink mix packet sitting on a table with a computer keyboard. 0118: A dish on a blue plate contains carrots, onions, broccoli and rice. 0119: a computer desk with a laptop another monitor with a keyboard and mouse '0120': A small kitchen with stainless steel appliances and white cupboards. '0121': a male in a brown shirt sitting on a bench with a laptop '0122': A man riding a motorcycle with a woman on the back. '0123': A standing zebra and its reflection in water. '0124': A one way sign and two other signs mounted to a pole '0125': a cat laying on a table next to a stack of papers '0126': A boy baseball player running to steal a base while another boy stops him. '0127': Male surfer demonstrating skills on small breaking wave. 0128: The bed has been neatly made and the pillows arranged. 0129: A long table accommodating many people while eating '0130': An almost empty plate with a lemon slice on it '0131': Two zebras in a jungle area fighting in a dirt area. '0132': a teddy bear dressed in prison stripes sitting on top of a trash can '0133': A cat laying on top of a wooden computer desk. '0134': a tennis player on a court with a racket '0135': a woman sitting at an outdoor table with two pizzas on it '0136': A mascot is posing next to Christmas decorations at the amusement park. '0137': a white stuffed bear is dressed up in some gear 0138: a brown teddy bear and some wooden block toys 0139: A man has a red water bottle up to his mouth. '0140': 'There is a horse pulling a carriage. ' '0141': Various different pictures of food in a bowl. '0142': Cars are parked on the street next to an old fire hydrant. '0143': A dog sits on a seat in a truck. '0144': A tennis match takes place on an orange court with full stands. '0145': 'The Norfolk and Cambridge road signs have fallen to the ground while a van drives by. ' '0146': A plate of food with meat, eggs and potatoes. '0147': An open laptop computer sitting on a desk next to a lamp. 0148: A big, yellow police motorcycle parked on a city street. 0149: 'A dirty dog sits on the front patio of a home. ' '0150': The TV is placed in a well decorated room. '0151': A small boy holding a bat with a hat pulled over his face. '0152': a person riding a surf board on a wave '0153': A red stop sign sitting on top of a metal pole. '0154': A newly married couple sharing a piece of there wedding cake. '0155': The blue, yellow, and purple train is running down the track. '0156': The photo is a collection of brightly painted fire hydrants. '0157': There is a young man standing on a skate board. 0158: 'a woman looking a mirror whiile brushing her teeth ' 0159: a couple of elephants make their way through the trees '0160': 'white cabinets silver oven sink and refrigerator ' '0161': Two men in bow ties standing next to steel rafter. '0162': 'A room with a fire extinguisher, mugs hanging from a shelf and several lights. ' '0163': A man wearing a white shirt and tie standing in a room. '0164': A woman wearing a dress is riding a bike. '0165': A stop sign in front of two buildings on a street. '0166': The cross country skiers are enjoying their run. '0167': A jet pilot sits in a plane on a tarmac. 0168: a person on a bike that is by some water 0169: Surfers holding boards standing in water at ocean. '0170': A couple of sandwiches that are on a paper napkin. '0171': three large dogs sitting outside near a forested area '0172': a close up of a toilet with a pink seat and lid '0173': Vase full of feathers sitting on a table next to a floral drape. '0174': A jockey rides a horse in a field '0175': A person in a black wetsuit surfing on a wave alone '0176': A piece of cake sitting on a square plate. '0177': a person is in the air on a snowboard 0178: An adult and a juvenile zebra in a wooded area. 0179: A laptop computer sitting on top of a wooden table. 0180: An orange train is on the track with many cars behind it. 0181: A group of people gaze at laptop screens. 0182: A man surfing in the ocean as the sun sets. 0183: A giraffe stands in a rocky area, larger rocks behind him. 0184: A group of three people sitting next to each other on a cement wall. 0185: An older person standing inside of a kitchen. 0186: two women and two girls in makeup and one is talking on a cellphone 0187: a person holding a knife and fork over a pizza 0188: There is a table set with stuffed animals 0189: an elephant in a zoo walks around slowly 0190: There is a bird standing on the back of the cow. 0191: A red motorcycle parked by paint chipped doors. 0192: A dog is opening his mouth to catch a Frisbee 0193: The old, adult elephant stands near a wire fence. 0194: A young boy riding a skateboard on a sidewalk 0195: a person with a black umbrella standing in the middle of the road 0196: A man jumps to catch a Frisbee flying through the air. 0197: Two giraffes with the backs turned to the camera next to a wall. 0198: This girl is looking down at her shoes. 0199: 'Small children in green uniforms running on a soccer field. ' '0200': Man cross country skiing on slight down hill slope. '0201': A living room filled with furniture and a table. '0202': A clock on a bell tower of an old church. '0203': A row of foreign motorcycles is lined up in front of a wooden fence. '0204': 'A bright pink toilet seat with the lid open. ' '0205': Dimly lit part of a house with light entering window. '0206': 'Two animals standing in a grassy field by trees. ' '0207': A couple of giraffe standing around palm trees. 0208: A man in a black tie and white shirt has a stern look on his face. 0209: A baseball player holding a bat next to home plate. '0210': a kid stands in the snow on his skiis '0211': A baseball bat is in a window looking out over the street. '0212': A car is parked on dry grass with kites overhead. '0213': Woman dressed in black, smiling and brushing her teeth '0214': 'a kid rides a skate board up a wall ' '0215': The spaceous living room has a large television and a fireplace. '0216': 'An elephant with red on it''s face in the water. ' '0217': Orange train engine pulling a train in a freight yard. 0218: A small giraffe walking around in an exhibit. 0219: Two tall giraffes are next to bare trees. '0220': 'A man reaches out to give the elephant something. ' '0221': A teddy bear under some furniture that appears to be turned on it's side. '0222': A bathroom with a sink and a toilet '0223': A girl swinging a tennis racket in a match. '0224': An elephant statue with an opening of various drawings on it. '0225': a close up of a person standing holding a frisbee '0226': 'a man flying a small plane in the clear weather ' '0227': 'Two college age boys playing Wii while others look on ' 0228: A living area with two chairs and a coffee table. 0229: The dork with the earring stands next to the Asian beauty who is way out of his league. '0230': A desk has two monitors and other electronics equipment. '0231': 'Several men stand outside around a motorcycle. ' '0232': Steak and crab cakes served with grilled peaches. '0233': Set of toy animals sitting in front of a red wooden wagon. '0234': A single horse leaning against a wooden fence. '0235': A table that has several plates of breakfast food on it. '0236': A dog laying on the ground its leash tied to a fire hydrant. '0237': 'Automobiles stopped at an intersection because of a passing train. ' 0238: A metallic refrigerator freezer next to a microwave oven. 0239: 'a herd of giraffes eat on some tree leafs ' '0240': A man raising a foot over a brief case '0241': A long locomotive train parked in a station next to a person. '0242': View of a highway near a city at sunset '0243': A man with a tennis racket and ball is on a tennis court. '0244': A giraffe in a pen at a zoo '0245': Young people dressed in costume carry black umbrellas. '0246': A man in a suit tossing a frisbee '0247': I am unable to see the image above. 0248: A woman is sitting holding a bug swatter shaped like a tennis racket. 0249: 'A city street lit up in a night scene with cars in the background. ' '0250': Several zebras eat the green grass in the pasture. '0251': 'A group of people with one person holding up a huge different color umbrella. ' '0252': an image of a woman doing arts and crafts with kids '0253': The gourmet pizza includes several very special ingredients. '0254': A bedroom with a large bed sitting next to a black dresser. '0255': 'A middle aged man dressed in a blue suit smiling at the camera. ' '0256': A guy holding a pair of metal scissors in front of his eye. '0257': A kitchen with its light's off, with light coming from an open door. 0258: This bathroom has a pattern of blue tiles on the floor 0259: A light pole that has a street sign. '0260': Several people are swimming in the ocean with two empty chairs and an umbrella on the beach. '0261': A group of horses and a baby horse eat hay at a farm. '0262': the bathroom has a mirror and a tiled sink '0263': A little girl is holding an umbrella on a wet day. '0264': two people are playing a video game and a male is watching '0265': 'a reflection of a dog looking out a window in a side view mirror ' '0266': Several cars parked along the side of a street next to a street sign. '0267': A man flying through the air while skiing. 0268: People playing tennis on a court surrounded by green hedges. 0269: A couple of horses standing in a lush green field. '0270': A woman on ski's standing in the snow. '0271': A snow boarder is going down the slope on his board. '0272': A red car next to a gas station sign and parked blue and silver motorcycles. '0273': A women looking over at something while talking on the phone . '0274': A motorbike parked, with several bicycles stacked on on the back of it. '0275': Two street signs are sitting under power lines and a palm tree. '0276': a black and white photo of a person holding a sign '0277': a couple of tennis players on a tennis court 0278: A yellow sign that is at the top of a pole. 0279: A pair of zebras grazing on grass in a zoo. 0280: A pizza with tomatoes on it sitting on a table in a restaurant. 0281: Many images form a picture of a computer monitor on a desk. 0282: Four older men sitting on a wooden bench. 0283: 'Someone in sandals is standing over a broken cell phone in pieces. ' 0284: A young boy who is holding a kite in his hands. 0285: a couple of people are eating at a table 0286: Two people posing next to a giant statue with a suit case. 0287: Two young boys playing t-ball at ball park 0288: a small child wearing a tie and holding a suitcase with other people in the background 0289: A man holds pizza crust in his mouth. 0290: People with drinks standing around a kitchen island. 0291: A man prepares to cross the street at a crosswalk 0292: there is a young girl sitting at a table eating 0293: A boy is skateboarding on a pole at a park. 0294: A man with a helmet is riding a motorcycle on it's side. 0295: This girl is laying on the bed reading. 0296: bowl of breakfast oatmeal with apples and a spoon 0297: two people sitting on benches with trees in the background 0298: A person carrying a surfboard on the beach at sunset. 0299: A teddy bear sits by a keyboards and microphone. '0300': Man with a backpack carrying his dog in the pouch. '0301': A clean industrial kitchen with no one in it '0302': 'A zebra that only has a striped neck and face, the rest of it is white. ' '0303': Plates of hamburger and pizza in takeaway baskets set on a table. '0304': 'An airplane just landed on the runway ' '0305': a few people that are standing on a beach '0306': 'A woman is walking two dogs in the snow. ' '0307': There is a hotdog sitting among many condiments. 0308: view from below of a clock tower in a building 0309: Corner of Broadway, West 32nd Street, and Korea Way. '0310': 'A group of giraffes gather under a tree. ' '0311': 'Small kitten sitting on top of a wooden crate. ' '0312': 'An orange cat looks through a glass plate. ' '0313': A blue water hydrant on a pavement near the road '0314': A pizza sits half eaten on a plate. '0315': The beach is filled with people, whom are expecting someone. '0316': 'A kale and sweet pea home garden getting the last rays of sunlight. ' '0317': A man is kneeling and holding on to a toilet. 0318: A plate topped with a half eaten corn cob and meat. 0319: Two people are riding elephants beside some trees. '0320': A couple of zebra standing next to a tree. '0321': A box of donuts that is opened up. '0322': Blurry image of men in living room playing a stand-up video game. '0323': 'A man with a bald head and glasses is sitting while holding up a cell phone. ' '0324': A group of people on horses on a beach. '0325': Many caps and gloves sit in front of the players in a baseball dugout. '0326': A skateboarder is performing a tick in a skate park. '0327': A red train parked under a sheltered station area. 0328: a big hill that has a bunch of snow on it 0329: Black and white cat, with TV remote, laying on couch. '0330': 'Off white toilet with a faucet and controls. ' '0331': A baseball player taking a swing at a ball '0332': Double-decker buses sit at the curb in front of an old building. '0333': Four giraffes poking their heads out from behind a rock. '0334': some luggage and a back back sitting on teh ground next to a lap top '0335': 'a shadow of a man on an umbrella ' '0336': An empty street at night with lots of lights in the background. '0337': A glass vase full of feathers on a table in front of a window. 0338: A group of people wait near a wedding procession of cars, one of them holding two apples. 0339: A group of people enjoying the Nintendo Wii. '0340': A large clock standing in front a building with lots of windows. '0341': A trio of zebras stand together in the grass. '0342': A group of people trying to feed giraffes at the zoo '0343': 'THERE ARE DIFFRENT TROPHIES ON DISPLAY ON THE WALL ' '0344': A red toilet in a very small bathroom. '0345': There is a vase filled with water that has rocks and a plant in it '0346': The bottom view of an airplane flying in the air. '0347': A person doing a trick on a skateboard 0348: Women in warm weather clothes with multi-colored parasols. 0349: A small bathroom where the vanity is over the sink and toilet. '0350': A couple of glass bowls or oranges sitting on a metal counter. '0351': A man is nearly sideways while racing a motorcycle around a track. '0352': The back of a Volvo bus approaching a hotel '0353': Batter takes a swing at the incoming ball during the baseball game '0354': 'There is a picture of an outside territory. ' '0355': a close up of a plate of food with broccoli '0356': A skateboarder jumping off a small ramp places on the street. '0357': Many people are driving motorcycles through an intersection. 0358: There are two metal benches on the patio. 0359: A hand is slicing a pizza on a table. '0360': Cooked broccoli in serving dish sitting on cloth hot pad. '0361': A guy rides a skateboard in an overflow ditch '0362': Cows graze an open field next to the ocean. '0363': Two people sit facing each other under an umbrella. '0364': A farmer inspecting cattle at a livestock auction. '0365': Various pots and plates displayed next to a woman. '0366': 'A young girl with fluffy hair holds a tennis racket. ' '0367': 'The woman is looking at the elephant in amazement. ' 0368: A man stands beside a bus in a snowy forest at night. 0369: Three empty park benches sitting next to orange and pink walls. '0370': 'a couple of zebras graze on some grass ' '0371': A couple of windows sitting inside of a room. '0372': A large long train on a steel track. '0373': A woman is standing in a professional kitchen. '0374': A harbor with several boats floating in it. '0375': A man starting to stand up on a surfboard in the ocean. '0376': A biplane leaves a smoke trail while doing a trick. '0377': A young woman with a tennis racket celebrates a good play. 0378: a person that is standing on a skateboard 0379: a large group of people are gathered around the table 0380: A laptop that is sitting on a bed. 0381: The large bear is made up of clay. 0382: A pair of scissors and fabric on a wood table. 0383: A skier standing on a snowy mountain with trees. 0384: A black cat sitting under a park bench. 0385: a couple of carrots sit next to a spoon 0386: 'A man is on the court holding his racket. ' 0387: A train is coming down the tracks near a building. 0388: an elephant in captivity with trees in the background 0389: 'A large display of fruit: applies, grapes, oranges, lemons, limes and grapefruit' 0390: A stop sign has graffiti on that changes the sign to say, don't stop believing. 0391: 'A green bench with graffiti in an area with trees. ' 0392: Two teddy ears are hanging in a window. 0393: A cluttered living room with figurines on a display case and photographs on the wall. 0394: A man rides a yellow motorcycle down a street. 0395: A man riding a skateboard on top of a wooden bench. 0396: A train going back to its coarse filled with people. 0397: A woman putting a hot dog on a bun under a blue tent. 0398: An old train makes its way down the track in the country. 0399: 'a person with a frisbe near many trees ' '0400': A dog is sitting on an armchair next to a fridge. '0401': A man standing on his skiesat night with a light on his head. a city is close behind him. '0402': Guy jumps high doing skateboard flip off a ramp '0403': A young man in a red shirt is throwing a frisbee. '0404': 'a person with a shopping cart on a city street ' '0405': A clock that has a second clock for the second hand. '0406': 'A father with a braided beard is looking at his child. ' '0407': A red stop sign that reads " Eating Animals " below it. 0408: A young ball player poses with his baseball glove. 0409: Sheep are standing together outside in a field '0410': 'A pot full of vegetables and ready to be cooked. ' '0411': a pack of elephants standing next to each other in a pen '0412': A man attempting to lift up a toilet off the floor. '0413': A traffic light suspended over a rural road. '0414': 'A canopy bed with white see through curtains. ' '0415': a group of people listening to music or possible playing video games '0416': 'THERE ARE PEOPLE PLAYING SKATE BOARD ON THE STREET ' '0417': The dual image shows people carrying their surfboards under their arms. 0418: A very dimly lit room with a laptop open. 0419: there is a small puppy on the kitchen floor '0420': 'A kitchen with dark green cabinets and personal items on the windowsill. ' '0421': A woman holding a tennis racket in her hand. '0422': A sign that is on the side of a pole. '0423': some men are looking at a design of a shark '0424': A small toy sits on a plate with pizza and crusts. '0425': a bunch of pizzas are on display under a case '0426': Sheep stand and lay in hay strewn around a barn. '0427': 'A pair of black boots stand next to a red umbrella. ' 0428: A woman holding a Nintendo Wii game controller in her hand. 0429: A clock on a building next to a building with a "Bart" logo. '0430': A rooster walking on a beach near the ocean. '0431': A cat lying in the sun on a table. '0432': 'A dish covered in aluminum foil is baking in an oven. ' '0433': A cat sitting on a bench in front of a building. '0434': A stop sign in front of a road in a small town in the hills. '0435': A bunch of cakes are sitting on the counter. '0436': A cat is lying down in a chair. '0437': a man is walking down the sidewalk next to a bus on the street 0438: A man is doing a trick on a sidewalk curb. 0439: A kitten is trying to interact with a kitten that is on television. '0440': A man standing on the beach watching some sea bird feed. '0441': this is a pizza that is sliced up in pieces '0442': Several commercial planes sit on a runway next to an airport. '0443': Two men at a table under a tent selling neckties. '0444': ' couple of white trucks are parked outside together' '0445': A group of women sitting on a full bench. '0446': A man and woman sitting on a couch playing a video game. '0447': a cow standing in some sand next to some water 0448: a surf board in the sand near a body of water 0449: A person stretches to catch a red Frisbee. '0450': People riding bikes between a food truck and a building. '0451': Lifesaving devices are stowed along a beach while a lifeguard is elsewhere. '0452': 'A very young girl is about to grab and upside down umbrella. ' '0453': A dog lays in a room with a desk and shelves of files and books. '0454': A small bird standing on a rocky ground. '0455': A surfer is riding a wave in light blue water. '0456': Looking down at a computer keyboard and mouse '0457': A carved bear that has a ribbon around the neck. 0458: a man that is sailing on a boat in the water 0459: a room with a tv and some different types of couches '0460': Two men are posing for a photo, one man is holding a slice of pizza on a plate, and they are surrounded by other people sitting at tables. '0461': A man carries a bag of food and a drink away from a roadside eatery. '0462': a small stereo a banana and a flashlight '0463': A windsurfer watches people kite surfing at the beach '0464': A man standing on top of a river next to the sun and a flying kite. '0465': A couple of elephants walking across a lush green field. '0466': A man that is standing in the dirt with a bat. '0467': A person is holding an HP CD above a laptop computer. 0468: A brown cow sniffing on a person hand 0469: many kites flying in the sky with cars parked '0470': A large zebra and baby zebra standing inside an enclosure eating. '0471': A piece of cake that is sitting on foil. '0472': A flock of sheep lie down in a field. '0473': A woman holds a racket in front of a net. '0474': 'Plates of food with onion rings and cheesecake and a cup of cocoa on a table ' '0475': 'The kid is playing a game of tennis on the court. ' '0476': a motorcycle with a boot on the back wheel '0477': many people sitting at different tables under a tent 0478: A brick clock tower ascending towards the heavens 0479: Two people standing next to a life size replica of a suitcase. 0480: Skier performing aerial jump during outdoor competition gathering. 0481: 'The toilet is across from a bidet in the small bathroom. ' 0482: Street vendors shows off their selection of ties. 0483: A woman helping another woman with her tie. 0484: Two men at a table with ties sitting near a laptop. 0485: A cake with blue, yellow and green fondant stars on it. 0486: A man holding a kite on a beach during the day. 0487: People stand near some Canadian flags at the base of a mountain. 0488: A man pouring wine into two other mens wine glasses 0489: A hotel room with a small television and a work desk. 0490: A large truck on a city street with two works sitting on top and one worker climbing in through door. 0491: 'A kitchen with a counter, window, stove and cutlery. ' 0492: 'A black cat sitting on a table in front of a laptop. ' 0493: some kind of cake that is on a white plate 0494: a woman is sitting with an umbrella outside 0495: A group of two people waiting to cross the street under an umbrella. 0496: A man on a court with a tennis racket. 0497: An empty field with older structure in the background. 0498: 'two gray jets are flying next to each other ' 0499: 'A smiling man is behind the counter at a restaurant. ' '0500': A large building is shown with clocks on the side. '0501': Grey and white cat sitting in a small sink. '0502': a vandalized stop sign in the dark with a sky background '0503': a woman watching two people play a wii game with wii motes '0504': an image of a baseball player about to bat '0505': A table topped with two wine glasses and a centerpiece. '0506': A bird that is sitting on a rock near the water. '0507': 'A man with a helmet on riding a skateboard in the street. ' 0508: three people are standing under the arch for a ski race 0509: An empty boat in the water near a tree '0510': 'A group of zebras are bending down and eating a box full of grass. ' '0511': A small cat is standing on a table '0512': A deer standing next to a small deer in a forest. '0513': A girl taking a swing at a baseball during a game. '0514': A white bed sitting in the corner of a room next to windows. '0515': A group of giraffe Standing up against a dirt wall in front of a crowd of children. '0516': 'Cargo train is traveling on a track next to a forest. ' '0517': A person standing at a table in a room. 0518: A police officer is riding a motorcycle down the street. 0519: a large cheesy pizza on a wooden counter '0520': Two cats by a tree in the dirt '0521': A black dual oven stove with controls on the top. '0522': A bus drives on a paved road with markings. '0523': A close-up of a brown and white cow in an enclosure. '0524': Two steel benches next to a walk way. '0525': 'Two zebras grazing while another horse standing and staring. ' '0526': A bunch of bananas on a plate with a sign on it. '0527': 'A woman is playing video games on a small television ' 0528: Someone pointing at their cell phone in a case on their hip. 0529: A collection of computers and a printer sit on an office desk. '0530': A herd of three horses standing on top of a green field. '0531': A dog rides on a surfboard with a person. '0532': Parents watching young boys playing soccer on a green field '0533': a person on skis with ski poles standing at the top of a hill '0534': Two people standing on a beach next to the ocean. '0535': A woman standing next to a table covered with plates of mostly eaten food. '0536': A woman is riding a horse as it jumps over a bar. '0537': Bicycles lined up on the side of the road. 0538: The pizza is beginning to melt in the oven. 0539: A young girl is eating out of a big bowl '0540': a person is going up a snow covered mountain '0541': A girl in a jacket and boots with a black umbrella. '0542': There is some food in the baking pan on the counter. '0543': A bird that is sitting on a bird feeder. '0544': A man in black sweater feeding a giraffe through a fence. '0545': A black and silver fridge next to a large mirror. '0546': A person wearing a red biker shirt stands next to his bike. '0547': Two adults with helmets beside a motorcycle on pavement. 0548: These people are going to have pizza and wine. 0549: a woman sitting next to a fire hydrant with a notebook in hand '0550': A group of bikers riding motorcycles across a bridge. '0551': Signs along a street on a rainy day. '0552': 'a woman wearing skis on a snowy mountain posing for the camera. ' '0553': Elvis impersonator sitting atop a metal sculpture of a bull. '0554': A white sink in a small, tiled bathroom. '0555': Photograph of an outdoor place with a bus. '0556': A messy bedroom has one red brick wall. '0557': Three giraffes, one young, in a natural looking habitat. 0558: We see a double decker bus wit a compartment in the side. 0559: Broccoli and cauliflower florets cut into small pieces and spread around. '0560': 'A pink smartphone with Windows 8 on the screen. ' '0561': 'toys on a beach made to look like he military ' '0562': a tall tower with a clock on top '0563': A cat that is laying on the side of a computer. '0564': A bathroom that has a broken wall in the shower. '0565': A good luck plant is in a round vase. '0566': a white male taking pictures with his cellphone '0567': 'An over head shot of a young person surfing on a surfboard. ' 0568: There are two sandwiches on a starbucks plate 0569: a table thath as some pizza and wine on it '0570': man holding a baby and petting a horse '0571': Two zebras, one grazing and one staring straight ahead in a grassy field '0572': a bunch of kids walking through some grass '0573': Cute little boy with hat playing on laptop computer '0574': A boy holding a game controller and playing a video game. '0575': A man on ski's that is in the air. '0576': A pizza on a wooden plate near glasses of wine. '0577': A man that has glasses and a hat. 0578: a street sign attached to a wooden pole 0579: a laptop on a table on balcony, blue sky. 0580: Crowd of people walking across the street under umbrellas 0581: A white bathroom with all of the necessities. 0582: A green and white bus on street next to dirt area. 0583: A train that is going by some rocks in the day time. 0584: A young lad swings his Wilson tennis racquet. 0585: a clean street sign that reads bodacious dr 0586: The horse is grazing in the back yard of this home. 0587: a toilet a sink a counter a mirror and some toilet paper 0588: A sheep with short hair stands under a fence. 0589: 'A unique car sitting beside an airplane. ' 0590: A baseball player practicing his swing at a baseball game. 0591: A panda bear sitting and eating a plant. 0592: a man on his phone in some kind of room 0593: A young woman skiing through a snow covered forest. 0594: Their underbelly of the surfboard shows that it has been used frequently. 0595: An elephant walking alone in a grassy area. 0596: 'Traffic lights on a corner of a road with vehicles ' 0597: Two women play a game in a living room. 0598: A display rack of assorted kinds of donuts. 0599: A room with a mirrored closet, keyboard and a dog on a bed. '0600': The television, in a plastic cow, is on. '0601': Two men being drug on buggies by dogs. '0602': A woman holding a plastic utensil passing out a piece of cake. '0603': Teams of children wearing blue and green uniforms playing volleyball inside an auditorium. '0604': A bedroom with a bed next to a night stand with a lamp. '0605': A group of giraffes drink water in the wilderness. '0606': A brown train next to a large mountain. '0607': A man walking toward a line of surfboards lined up along a shop wall. 0608: An outdoor clock clock surrounded by mums in a street median. 0609: A person is riding a skateboard on a ramp. '0610': This is an image of a row of scooters '0611': Cattle grazing on grass near a lake surrounded by mountain.. '0612': A woman standing on the side walk with her child. '0613': A woman is playing with her dogs under an umbrella. '0614': An old plane sitting on the grass beside the runway. '0615': a batter holding the bat up to hit the ball '0616': Four sheep standing against a fence looking at a man with a red bowl. '0617': A person holding up a chocolate doughnut with a face drawn on it. 0618: Rider on a horse jumping over a fence 0619: 'Two giraffes in an outdoor setting eating grass. ' '0620': Lady on tennis court wearing red tennis outfit and holding racket. '0621': A roadside vendor sells food to passersby on the street '0622': Some people are in a kitchen with a big pot on the stove. '0623': a pizza pepperoni and veggies sitting on a pan '0624': Two men are standing while playing a video game. '0625': 'A table with many different objects, including a plate of sandwiches. ' '0626': 'The adult sheep is standing near a stone wall. ' '0627': A bird swooping down to the water to catch food. 0628: A waitress and a man are holding up a large pizza. 0629: A group of women sitting at a table with plates of food '0630': 'A made up dinner table with a flower vase on the table. ' '0631': A lot of people that are looking at a pool. '0632': A man that is standing on a surfboard in the water. '0633': A herd of zebra in a grass field. '0634': A large colorful living room with an abundance of large windows. '0635': A group of men leading a pack of horses through a field. '0636': A chicken walking around on the ocean shore. '0637': A box of doughnuts and some food on a plate. 0638: a black cat standing in front of a motorcycle 0639: A woman holding a racquet on top of a tennis court. '0640': Two people in wetsuits on beach next to water and buildings. '0641': 'Two people about to get on-board a bus. ' '0642': A gentleman is waving a flag at people passing by in a train. '0643': Two utility trucks on pavement with sky in background. '0644': A table holding a white gown with peas and broccoli in it. '0645': A little boy in pajamas playing with his toys. '0646': A woman on the grass is playing frisbee. '0647': a man sitting at a desk in front of a laptop computer 0648: A living room filled with furniture next to a window. 0649: A person in white is standing next to a table on sandy ground. '0650': 'An audience is looking at an film of a man taking that is projected onto a wall. ' '0651': A man holding horse reins connected to 2 horses on a dirt field. '0652': Two women leaving a beach with kites flying in the background. '0653': A corner with a Stop sign, a no entry sign and a one way sign in front of a brick wall with graffiti. '0654': 'A cat sitting on the man''s lap while the man types on the laptop. ' '0655': A MAN IS STANDING IN HIS KITCHEN HOLDING A YELLOW KITE '0656': A couple of people standing cutting a cake. '0657': A person riding a snowboard down a snow covered ramp. 0658: 'There are many people trying to push the school bus. ' 0659: A snow covered city street lined with small shops '0660': A boat going through the water with a wall in back. '0661': A giraffe drinking milk from a bottle behind a cage. '0662': A lap top sits on a small desk with jars and candles around it '0663': animals standing along the shore in a line '0664': A train has oval like mirrors on the sides. '0665': 'a man holding a piece of pizza in front of a kid ' '0666': A person holding a glazed pastry item with one bite taken out '0667': female surfer walking carrying surfboard on her side 0668: A group of elephants being ridden by people wearing blue 0669: Several cows grazing in the field with a house in the background. '0670': A little girl looking down the hill with her skies. '0671': 'a small cat sits inside of a bathroom sink ' '0672': We are looking at an almost empty street. '0673': 'a toilet sits next to a brick wall ' '0674': A boy skateboards on concrete behind a backyard. '0675': People walking down a city sidewalk showing one with headphones. '0676': 'A train that is at a train station at night time. ' '0677': 'A white train traveling down the tracks with expo center on a sign on the front. ' 0678: A baby is reaching up toward the camera. 0679: Two slices of chicken pizza on a plate. 0680: A dog sitting on the couch with it's paw over a tray that holds remotes for the tv and Wii. 0681: The various pictures are featuring bananas and holders. 0682: A group of people sitting in a restaurant booth eating food. 0683: 'Woman sitting at table with beverages consuming sandwich. ' 0684: 'A person is holding a doughnut that has a bite taken out of it. ' 0685: 'A toilet from above; flanked by a toilet brush and a small trash can. ' 0686: An older businessman standing by bunches of bananas. 0687: 'A woman holds a baby while sitting next to a cage with a bird in it. ' 0688: A woman laying on the beach under an umbrella. 0689: an image of a park bench with flowers next to it 0690: An unattended office containing several computers and a chair. 0691: A just married coule feed each other cake while a photographer takes photos. 0692: Small personal pizza on a crust made of rice. 0693: Boxed meal of sandwich roll, orange juice and strawberry yogurt 0694: A view of a bathroom, with two urinals in it. 0695: the scissors sit in a containers with pens and markers 0696: Three people with ski poles standing in the snow 0697: A hand spraying water into a white toilet bowl. 0698: Passengers wait for a train as it approaches the platform. 0699: A group of men enthusiastically caught in mid air at the same time. '0700': A person is eating at a table with plates, a fork, knife, spoon, cup, and cell phone on it. '0701': Guy in shorts holding tennis racket takes aim. '0702': Two cute children are sitting on the sidewalk eating. '0703': a bunch of food and stuff is laying on a tray '0704': There is no image to be reviewed on this hit. '0705': A person is riding a ramp on a skateboard. '0706': A man riding a surfboard inside of a wave. '0707': Two women petting a few goats at a petting zoo 0708: 'The tennis player wearing a purple outfit is about to hit the tennis ball. ' 0709: A CAT LAYING ON THE FLOOR AMIDST A COMPUTER,SPEAKERS,CORDS '0710': A pile of luggage sitting up against a white fence. '0711': Several children sit together while playing with plastic laptop computers. '0712': A plate that has some broccoli on it. '0713': 'A cow is leashed up to a green pole near the civilians. ' '0714': Several people crossing a road with one person having food in their hand. '0715': A very tall brown brick building next to a street sign. '0716': 'a little black bird with a big colorful beak sitting on a branch ' '0717': a city street at night with a lit clock on the corner 0718: This is a still life, slightly blurry, with a tea kettle and a floral arrangement. 0719: 'a plane flies through the air with fumes coming out the back ' '0720': 'A herd of zebras stand under tress near a road. ' '0721': A cat is on papers on a computer desk. '0722': The parking meter is decorated in different colors. '0723': two males a truck some cars and trees '0724': 'Two people smile while posing behind luggage suitcases on the sidewalk. ' '0725': A group of people enjoying a meat at a restaurant. '0726': A person is sitting at a keyboard near a microphone. '0727': A group of people flying kites in grassy field. 0728: A couple of people are crossing a busy street 0729: A vase of feathers on a shelf by some art. '0730': A yellow motorcycle is parked on a road with many bystanders '0731': People are talking on a tennis court while standing at the net. '0732': A woman riding on the back of a brown horse over an obstacle. '0733': Boats are docked in a lake by a road. '0734': 'A white and black train on rusty train tracks. ' '0735': The box contains six donuts, but only two are chocolate covered. '0736': 'A man walking while holding a ball in his hand. ' '0737': A train traveling under a rusted bridge on top of tracks. 0738: a cat resting on a laptop keyboard in a bedroom 0739: The puppy is wearing a black, white and pink apparatus. '0740': A group of people talking and sharing a meal in a restaurant. '0741': A brown, white and black cat looking at a laptop. '0742': 'A bike has had the wheels stolen from it sits in a pedestrian zone. ' '0743': A large batch of pineapples stacked up next to each other. '0744': A desk with a keyboard, mouse and computer monitor. '0745': A picture of a sign on the side of a street. '0746': a close up of street signs with buildings in the background '0747': A cow is being milked by a machine. 0748: A train running along a track near a station. 0749: a man dressed as a woman stands on a red carpet '0750': A dish with some orange slices and something else. '0751': Person cutting cake at a theme restaurant characters in background. '0752': A brown and white vase with foliage on a small table. '0753': 'Identical street signs pointing in the opposite directions of each other. ' '0754': A kid is riding down the street on a skateboard. '0755': A cat standing on a bed and wearing a tie. '0756': A man lighting candles on a birthday cake for a little girl. '0757': a long brown and white room with a kitchen 0758: A group of people that are behind a bus. 0759: An tusked elephant playing with a fallen tree at the zoo '0760': The boat sits in the lake below the mountain. '0761': A man at bat waiting for a pitch with a catcher and umpire behind him and players from the opposing team in the dugout. '0762': Bob the builder sicker on bottom of toilet lid in bathroom '0763': "Somebody is in the photograph not certain who that individual is. \n" '0764': People walk down a busy city street, with traffic light. '0765': a cat is way up in a tree '0766': A man walking across a dirt field next to a street with traffic. '0767': 'A bus and a car travelling in the same direction on a sunny day. ' 0768: A dog walks on the deck of a sail boat. 0769: 'A cat sitting on top of a bed on the comforter ' '0770': A train blowing smoke is coming down the tracks. '0771': A skateboarder is attempting a trick mid air. '0772': A woman with a snowboard with a man standing next to her on a ski slope. '0773': 'A man in a suit standing beside his bicycle. ' '0774': a stop sign with hammertime written on it '0775': Crisp white bedding creates a stately environment when combined with a cherry ensemble. '0776': A bus driving in the middle of traffic. '0777': A table is set with pizza and beer and a salad. 0778: Food on a train with a pie and some vegetable 0779: A bicycle that is tied to a post along a sidewalk. 0780: A partial sliced open piece of bread with some hotdogs in the middle 0781: A room with a bed, a clock, a lamp, a fireplace and a television. 0782: A family of all ages gather together in a kitchen. 0783: A red and white bus on street next to trees. 0784: A trio of teddy bears bundled up on a bed. 0785: 'a cat laying in a bathroom sink while looking at the camera ' 0786: A young man in a helmet standing by a bicycle. 0787: A cat resting on top of a laptop computer. 0788: there seem to be very few animals on this field 0789: A motorcycle club stopping along the highway to take a break and talk. 0790: A man rides a horse and his dog follows. 0791: 'Parketing meter next to a tree with a warning sign on it. ' 0792: A buffet display case filled with lots of pizzas on pans. 0793: A bathroom filled with toilets and a tub next to a sink. 0794: A group of people sitting at a table. 0795: Two woman wearing no shoes playing video games 0796: A person that is sitting in a bed facing out. 0797: A baby doll laying on top of a bed under a book. 0798: 'Guy walking pass a bus sitting at the curb ' 0799: a cat siting on the ground watching a tv 0800: An old dog sniffs a red fire hydrant. 0801: A cat sitting in front of a monitor that is displaying a picture of another cat. 0802: A man dressed as a jockey riding a horse along a path. 0803: A beach area with black birds flying over it. 0804: 'A picture of the inside of a house with the door open. ' 0805: Two giraffes standing next to each other at a zoo 0806: A blue motorcycle parked on display next to other vehicles. 0807: A large blue lit bridge spanning over a lake. 0808: There is no image here to provide a caption for. 0809: The street signs and traffic signal are below wires attached to the pole. 0810: An old airplane is flying through the sky. 0811: a group of young people playing baseball in a field 0812: The herd of sheep is standing on the grass. 0813: An animal with horns wears a rope from its nose and ear to its neck. 0814: 'A man holding a tennis racquet in the yard ' 0815: Tennis player running with ball in mid air. 0816: Three people one in the water with two boats one with an umbrella. 0817: An open laptop computer sitting next to a phone. 0818: Three children flying a box kite near the ocean. 0819: The sun is hitting the corner of one of the buildings. 0820: Two kids in pink and purple jackets standing by a fence. 0821: a close up of sheep on hay ground indoors 0822: a man holding a tennis racquet on a tennis court 0823: a couple of cows that are dragging some metal 0824: Two people are riding on surfboards in the ocean. 0825: Various size white refrigerators on display in dark room. 0826: A tree standing in the middle of a grass circle. 0827: Two cats laying on the floor and sleeping. 0828: Two brown bears in water open their mouths to each other 0829: A man who is walking across the street. 0830: A cow is standing on a street corner. 0831: 'A bathroom with toys and books for young children. ' 0832: THERE IS A CLOCK THAT IS ON DISPLAY IN THE CITY 0833: A Ferris wheel that is next to a body of water. 0834: A smiling girl standing beside a sign that says "Princess Parking Only". 0835: A group of people sit at a table with cake. 0836: This basic kitchen has tools sitting on the floor 0837: A young child and cat in a living room. 0838: 'Performers holding sticks in front of horses at a circus with audience. ' 0839: A living room with hard wood flooring and black furniture. 0840: A cat sprawled out over the top of a laptop computer keyboard. 0841: An elephant at a zoo on a sunny day 0842: 'The woman is reading something while laying down in bed. ' 0843: A group of people and luggage on a airport tarmac. 0844: A red bus sitting on the side of a road. 0845: two dishes holding a bunch of vegetables and fruit 0846: There is a phone on top of a calculator 0847: A young man in a suit and tie with a beer in his hand. 0848: Lady walking on sidewalk in the rain with an umbrella over her head. 0849: A woman resting her head on top of a baseball bat. 0850: A small airplane coming in for a landing. 0851: Young women having a cigarette in their back yard 0852: A person holding a teddy bear as she writes on her belly. 0853: A bicycle parked near a curb on the highway 0854: A woman standing in front of a box handing a woman a bag of food. 0855: A black and white cat sits on a red cloth that is over a television set. 0856: A lot of people that are in a room together. 0857: 'A person standing in the snow on skies. ' 0858: People on a beach and a line of surfboards. 0859: 'An apple computer monitor, keyboard and mouse sits on top of a clean wooden desk. ' 0860: a man swings a baseball bat at a ball 0861: Several motorcycles are parked on the green grass. 0862: Three teddy bears sit in a sled in fake snow. 0863: a lady that is smoking something and has a large hat 0864: Most people negotiate the city streets with motorbikes in this Asian city. 0865: A laptop and a computer mouse on a desk. 0866: a group of people that are petting a cat 0867: A baby crawls on the floor beside a blue cell phone. 0868: a row of motorcycles parked on a city street 0869: A man taking a picture of his reflection in a motorcycle mirror. 0870: A man sitting on a chair playing a video game. 0871: a little boy touching the nose of a brown horse 0872: A motorized cart fills a train with luggage 0873: A lone woman huddles under her umbrella at a picnic table in the rain. 0874: A group of people standing with remotes in hand. 0875: a street sign with buildings in the background 0876: A calico cat standing upon a bathroom sink. 0877: Two kids sitting in a luggage cart together 0878: 'A display of a variety of donuts at a store. ' 0879: Fish eye angle view of small kitchen with fire extinguisher at far end. 0880: The catcher races from behind homeplate onto the field. 0881: A man with a cup of coffee and a cell phone. 0882: 'a bed that looks like it has blood on it and a big hole in the paneling ' 0883: Woman surfing small wave using a body board. 0884: some people are looking at different television sets 0885: A guy smiling while standing under a run for rights banner. 0886: Two boys standing up playing a video game. 0887: Woman in red shirt getting ready to hit tennis ball. 0888: Trolly in the middle of the city next to people. 0889: Group of soccer players on field kicking ball. 0890: 'a horse with a person riding on top of it ' 0891: A horse and a dog stand in a meadow. 0892: A white and gray bird soaring over the blue ocean. 0893: I do not know what this is supposed to be.. 0894: A woman that is sitting outside on a bench in the snow. 0895: A very small boy on the beach with a disc. 0896: A part of a cake on a plate with a knife and cake server next to it. 0897: Panoramic view of a hospital room with medical machines and a woman lying in hospital bed. 0898: 'The roll of toilet paper beside the toilet is empty. ' 0899: A bathroom with sheer curtains framing the tub 0900: Girl holding onto Dad on the back of a motorcycle in traffic. 0901: Couple of Seagulls stand next to each other watching the same thing 0902: A green and yellow train traveling past a platform. 0903: A banana replacing the phone on an answering machine 0904: A double sink vanity with mirror, shower stall and toilet in a bathroom. 0905: A wedding cake is shown with pink petals. 0906: The side of an old building is fenced off. 0907: tied together broccoli inside an aquarium with fish 0908: A few people flying homemade kites in the center of some buildings. 0909: A bobble head is shown next to computer screens. 0910: 'The neon purple toilet with lid lifted is in the bathroom with brown tile. ' 0911: A child standing in a bathroom wrapped in a towel. 0912: Two plates containing a hamburger and chips, and one drink are sitting on a small table. 0913: a tennis player is serving the ball to his opponent 0914: The look on the tennis player's face telegraphs a possible error. 0915: A flock of sheep are grazing on a grassy slope. 0916: A woman holding a little boy who is brushing his teeth. 0917: A couple of dirt bikers in a race. 0918: A young child sitting on a kitchen counter next to some sliced apples. 0919: A flock of sheep walking along a grassy hillside grazing. 0920: A bench right next to some tall grass at the edge of a body of water. 0921: some kind of cage that is next to a tree 0922: 'A person in a black snow suit on a pair of skis in the snow. ' 0923: A bathroom with a toilet, bathtub, and a cat in a bathroom. 0924: A young boy flying a colorful kite on top of a sidewalk. 0925: A group of ski racers on a race trail. 0926: A clear vase of simple flowers is sitting in a windowsill. 0927: Vintage street with cars and buses lining it 0928: A windblown inverted umbrella mounted on a pole with city buildings behind. 0929: Some office supplies that have been grouped together. 0930: A brown horse standing next to a woman in front of a house. 0931: A close up of a pizza pie sitting on a table. 0932: 'The guy is standing outside taking his picture in his suit. ' 0933: People standing in an over cast ski looking out to sea with surf boards. 0934: A group of cows standing next to a line of laundry drying. 0935: Several students sit at a conference table with their laptops. 0936: a person sitting on a bench with a view of a body of water 0937: A man swinging a tennis racquet on a court. 0938: A purple and white city bus pulling up to the curb 0939: Two pizzas sitting in pie pans on top of a stove. 0940: A white horse standing on top of a dirt field. 0941: Old wooden boats in dry dock on a grey dreary day. 0942: A man makes a jump on his skateboard before a crowd. 0943: A person doing a trick on water ski's while another passes underneath them. 0944: 'Two jockeys ride their horses in a race at the track. ' 0945: A woman sits eating food at a table. 0946: A red plate topped with broccoli, meat and veggies. 0947: 'a man sits at a table with a bunch of doughnuts ' 0948: 'Three men and one woman sit at a table eating pizza. ' 0949: A small group of cows standing in front of the camera. 0950: The skate boarder is doing a jumping trick. 0951: A train is traveling along a stretch of track. 0952: A broken suitcase is on the side of the road. 0953: Two men on horses drive cattle down the road. 0954: Two elephants are facing each other on the side of a dirt road 0955: A man is posing for a camera in a room. 0956: A young man and his cute cat enjoy a nap together. 0957: A baby giraffe drinking milk from it's mother in a field. 0958: Motorcyclist on chromed motorcycle rounding a curve roadway. 0959: 'Four signs are stacked together under two others. ' 0960: A person has their cell phone clipped to their belt, 0961: A little boy holding a teddy bear walking past purple flowers. 0962: 'The rail trolley is driving in front of other automobiles. ' 0963: A baseball player swings the bat while the catcher & umpire look on 0964: A bed and a mirror in a small room. 0965: A book shelf filled with lots of colorful books. 0966: 'a plate covered with eggs, meat, pototes and veggies ' 0967: An 023 airplane flying solo above blue terrain. 0968: A room with woman at a desk next to a brown dog laying on the floor. 0969: A large plane sits on the large runway. 0970: A female statue in a sea of green grass next to plant 0971: Meat and a salad with knife and fork on a plate. 0972: A truck drives in the middle of a neighborhood road. 0973: A train coming down the tracks arriving at a station. 0974: In lady standing with her hand on carrots in a market. 0975: a grey couch and a glass table a very nice house 0976: 'A person holding a plate with food ' 0977: Several professionally dressed men watching a woman holding a Wii controller 0978: A little kid that is doing a skateboard trick in the air. 0979: A trail guide stands next to the loaded pack horse. 0980: A variety of beer sitting behind some bananas. 0981: A snow covered road with a stop sign next to it. 0982: Many surfboards are propped against a rail on the beach. 0983: A key bank sign with a clock on a building 0984: A young man skateboarding casts a shadow on the concrete. 0985: A man wearing a shirt and a pixilated looking neck tie. 0986: The Big Ben clock tower in red and blue shade of color. 0987: A woman wearing a creepy mask standing next to a child. 0988: 'A close shot of some type of machine, maybe in a factory. ' 0989: a man rides his skateboard on a small ledge while another man lights the spray from a can on fire 0990: a young boy sitting on a bed with a lamp on beside it 0991: This gentleman is shirtless, walking his bicycle down the street of an Asian city. 0992: A bedroom with bunk beds and the bathroom next door. 0993: A girl sitting on a bed is taking a self portrait. 0994: GIRAFFES EATING THE SHRUBBERY OR THE TOP OF TREES 0995: A boy in a brown coat playing with a camera. 0996: An old style bedroom with a bed in the middle of it. 0997: A nice red car parked in front of a roadside junk sale. 0998: A zebra standing on land and eating grass near a body of water that is behind it. 0999: A group of zebras that are in a field. '1000': A yellow fire hydrant by a wall and a sign '1001': Several people are leaving a bus station with a bus next to it as a person rides by on a bicycle '1002': A black dog laying on the floor sitting alcohol from a bottle. '1003': 'a man on skis standing on a snowy terrain ' '1004': some red and black buses are parked and a building '1005': A man is doing skateboard tricks on a ramp '1006': A nearly empty plate containing broccoli and brown sauce. '1007': A group of people holding a large pair of scissors to a ribbon. '1008': A group of different colored teddy bears sitting on top of a blue table. '1009': 'Outdoor impromptu bar scene on wooden bench, ice coolers, blender, bottles and several other items, appearing very crowded. ' '1010': A group of people walking on the street in a city. '1011': Two men ride on atv's and tilt them to the side. '1012': A fire hydrant outside a shop with graffiti. '1013': a couple of pizzas are sat out on a table '1014': A DOG QUIETLY SLEEPING IN HIS BED ENJOYING THE SUN. '1015': The boy waits to hit the incoming baseball. '1016': A man in a green shirt stands by a girl holding a piece of cake on a plate. '1017': People are standing on the corner of a street. '1018': 'a small green and white boat parked near the sidewalk near a lake ' '1019': a jumbo jet flies through the sky with the moon peeking out '1020': a bird sitting on a brick of bird food on a pole '1021': 'Two cats are sitting in a window sill. ' '1022': Trade show visitors and workers in a vendor booth. '1023': Groups of people seated under shade umbrellas at blue tables and chairs drinking beverages. ================================================ FILE: examples/diffusion/scripts/svdquant.sh ================================================ python -m deepcompressor.app.diffusion.ptq configs/model/flux.1-schnell.yaml configs/svdquant/int4.yaml ================================================ FILE: examples/llm/.gitignore ================================================ .tmp .tmp/ ================================================ FILE: examples/llm/README.md ================================================ # QServe: *W4A8KV4* Quantization for Efficient LLM Serving [[Website](https://hanlab.mit.edu/projects/qserve)][[Paper](https://arxiv.org/abs/2405.04532)][[QServe GPU Inference System](https://github.com/mit-han-lab/qserve)] Quantization can accelerate large language model (LLM) inference. Going beyond INT8 quantization, the research community is actively exploring even lower precision, such as INT4. Nonetheless, state-of-the-art INT4 quantization techniques only accelerate low-batch, edge LLM inference, failing to deliver performance gains in large-batch, cloud-based LLM serving. We uncover a critical issue: existing INT4 quantization methods suffer from significant runtime overhead (20-90%) when **dequantizing either weights or partial sums** on GPUs. To address this challenge, we introduce **QoQ**, a W4A8KV4 quantization algorithm with 4-bit weight, 8-bit activation, and 4-bit KV cache. QoQ stands for **quattuor-octo-quattuor**, which represents 4-8-4 in Latin. QoQ is implemented by the **QServe** inference library that achieves measured speedup. The key insight driving QServe is that the efficiency of LLM serving on GPUs is critically influenced by **operations on low-throughput CUDA cores**. Building upon this insight, in QoQ algorithm, we introduce progressive quantization that can allow low dequantization overhead in W4A8 GEMM. Additionally, we develop SmoothAttention to effectively mitigate the accuracy degradation incurred by 4-bit KV quantization. In the QServe system, we perform compute-aware weight reordering and take advantage of register-level parallelism to reduce dequantization latency. We also make fused attention memory-bound, harnessing the performance gain brought by KV4 quantization. As a result, QServe improves the maximum achievable serving throughput of Llama-3-8B by **1.2×** on A100, **1.4×** on L40S; and Qwen1.5-72B by **2.4×** on A100, **3.5×** on L40S, compared to TensorRT-LLM. ![QoQ-QServe](/assets/llm/qoq/qoq-qserve.png) ![QoQ](/assets/llm/qoq/qoq.png) ## Usage The following command will perform per-channel QoQ quantization (W4A8KV4) and evaluate the quantized model on Wikitext-2: ```bash python -m deepcompressor.app.llm.ptq \ configs/qoq-gchn.yaml \ --model-name llama-2-7b --model-path /PATH/TO/LLAMA-2-7B \ --smooth-proj-alpha 0 --smooth-proj-beta 1 \ --smooth-attn-alpha 0.5 --smooth-attn-beta 0 ``` In this command, - The positional arguments are configuration files which are loaded in order. [`configs/qoq-gchn.yaml`](configs/qoq-gchn.yaml) contains the quantization configurations specialized in QoQ per-channel W4A8KV4 quantization. Please make sure all configuration files are under a subfolder of the working directory where you run the command. - All configurations can be directly set in either YAML file or command line. Please refer to [`configs/__default__.yaml`](configs/llm.yaml) and `python -m deepcompressor.app.llm.ptq -h`. - `--model-name llama-2-7b` specifies the model name, e.g., llama-30b, llama-3-8b, mixtral-8x7b. - `--model-path /PATH/TO/LLAMA-2-7B` specifies the path to the llama-2-7b model directory. If your model directories are organized as `PATH_TO_ROOT_DIR/MODEL_FAMILY/MODEL_NAME` (e.g., `~/models/llama-2/llama-2-7b`), you can simply specify `--model-root PATH_TO_ROOT_DIR` (e.g., ```--model-root ~/models```). - `--smooth-proj-alpha 0` specifies the alpha for SmoothLinear to be 0. `--smooth-attn-alpha 0.5` specifies the alpha for SmoothAttention to be 0.5. - The default task is [GPTQ-style](https://github.com/IST-DASLab/gptq/blob/main/llama.py#L218) Wikitext2 perplexity evaluation. If you would like to evaluate the accuracy on zero-shot tasks such as Hellaswag using [lm_eval](https://github.com/EleutherAI/lm-evaluation-harness), please add `--eval-tasks EVAL_TASK [EVAL_TASK ...] --eval-evaluators lm_eval` to the command. You can use `--eval-tasks zero-shot --eval-evaluators lm_eval` which will automatically add [wikitext, hellaswag, piqa, winogrande, arc_easy, arc_challenge](/deepcompressor/llm/eval.py#L51) to the evaluation tasks. - If you would like to save quantized model checkpoint, please add `--save-model true` in the command. ## Deployment ### Deployment with Qserve Engine If you save the QoQ W4A8KV4 quantized model checkpoint, you can easily to deploy quantized model with [`QServe`](https://github.com/mit-han-lab/qserve) engine. Please run the following command to convert the saved checkpoint to QServe-compatible checkpoint: ```bash python -m deepcompressor.backend.qserve.convert \ --model-path /PATH/TO/HUGGINGCE-MODEL \ --quant-path /PATH/TO/QUANTIZED-MODEL \ --weight-bits 4 \ --output-root /ROOT/PATH/TO/OUTPUT-MODEL/DIRECTORY ``` After we have the QServe-compatible checkpoint, please switch to QServe conda environment, run [qserve_e2e_generation.py](https://github.com/mit-han-lab/qserve/tree/main/qserve_e2e_generation.py) to deploy quantized model with QServe Engine. ```bash conda deactivate conda activate qserve cd /PATH/TO/QSERVE python qserve_e2e_generation.py \ --model /PATH/TO/OUTPUT-MODEL \ --ifb-mode \ --precision w4a8kv4 \ --quant-path /PATH/TO/OUTPUT-MODEL \ --group-size GROUP_SIZE ``` Please refer to [`QServe`](https://github.com/mit-han-lab/qserve) for further details. ### Deployment with TinyChat Engine If you save the 4-bit weight quantized model checkpoint by running the following command, ```bash python -m deepcompressor.app.llm.ptq \ configs/awq.yaml \ --model-name llama-3-8b-instruct --model-path /PATH/TO/LLAMA-3-8B-INSTRUCT ``` you can easily to deploy quantized model with [`TinyChat`](https://github.com/mit-han-lab/llm-awq) engine. Please run the following command to convert the saved checkpoint to TinyChat-compatible checkpoint: ```bash python -m deepcompressor.backend.tinychat.convert \ --model-name MODEL_NAME \ --quant-path /PATH/TO/QUANTIZED-MODEL \ --output-root /ROOT/PATH/TO/OUTPUT-MODEL/DIRECTORY ``` After we have the TinyChat-compatible checkpoint, please switch to TinyChat conda environment, run [demo.py](https://github.com/mit-han-lab/llm-awq/tree/main/tinychat/demo.py) to deploy quantized model with TinyChat Engine. ```bash conda deactivate conda activate tinychat cd /PATH/TO/TINYCHAT python demo.py --model_type llama \ --model-path /PATH/TO/LLAMA-3-8B-INSTRUCT \ --q_group_size GROUP_SIZE \ --load_quant /PATH/TO/OUTPUT-MODEL \ --precision W4A16 ``` Please refer to [`TinyChat`](https://github.com/mit-han-lab/llm-awq/tree/main/tinychat) for further details. ## Evaluation Resutls ### Perplexity Evaluation Below is the WikiText2 perplexity evaluated with 2048 sequence length. The lower is the better. | Methods | Precision | Llama-3.1 70B | Llama-3.1 8B | Llama-3 70B | Llama-3 8B | Llama-2 7B | Llama-2 13B | Llama-2 70B | Llama 7B | Llama 13B | Llama 30B | Mistral 7B | Yi 34B | |-------------|--------------|---------------|--------------|-------------| ------------|------------|-------------|-------------|----------|-----------|-----------|------------|--------| | FP16 | | 2.81 | 6.24 | 2.85 | 6.14 | 5.47 | 4.88 | 3.32 | 5.68 | 5.09 | 4.10 | 5.25 | 4.60 | | SmoothQuant | W8A8 | 3.23 | 6.38 | 3.14 | 6.28 | 5.54 | 4.95 | 3.36 | 5.73 | 5.13 | 4.23 | 5.29 | 4.69 | | GPTQ-R | W4A16 g128 | 3.46 | 6.64 | 3.42 | 6.56 | 5.63 | 4.99 | 3.43 | 5.83 | 5.20 | 4.22 | 5.39 | 4.68 | | AWQ | W4A16 g128 | 3.22 | 6.60 | 3.20 | 6.54 | 5.60 | 4.97 | 3.41 | 5.78 | 5.19 | 4.21 | 5.37 | 4.67 | | QuaRot | W4A4 | 5.97 | 8.32 | 6.75 | 8.33 | 6.19 | 5.45 | 3.83 | 6.34 | 5.58 | 4.64 | 5.77 | - | | SpinQuant | W4A4 | 4.80 | 7.42 | 6.27 | 7.37 | 5.96 | 5.24 | 3.71 | 6.14 | 5.39 | 4.56 | - | - | | Atom | W4A4 g128 | - | - | 4.33 | 7.78 | 6.12 | 5.31 | 3.73 | 6.25 | 5.52 | 4.61 | 5.76 | 4.97 | | QoQ | W4A8KV4 | 3.68 | 6.87 | 3.65 | 6.81 | 5.75 | 5.11 | 3.50 | 5.92 | 5.27 | 4.31 | 5.44 | 4.73 | | QoQ | W4A8KV4 g128 | 3.51 | 6.77 | 3.50 | 6.70 | 5.67 | 5.06 | 3.46 | 5.88 | 5.23 | 4.27 | 5.41 | 4.73 | \* SmoothQuant is evaluated with per-tensor static KV cache quantization. \* SpinQuant is calibrated with Wikitext-2 dataset. ### Efficiency Benchmarks When serving the large language models Llama-3-8B and Qwen1.5-72B on L40S and A100 GPUs, QServe demonstrates superior performance, achieving **1.2x-1.4x higher throughput** compared to the leading industry solution, TensorRT-LLM, for Llama-3-8B, and a **2.4x-3.5x higher throughput** for Qwen1.5-72B. See more about benchmarking setting in [QServe GPU Inference System](https://github.com/mit-han-lab/qserve). | L40S (48G) | Llama-3-8B | Llama-2-7B | Mistral-7B | Llama-2-13B | Llama-30B | Yi-34B | Llama-2-70B | Qwen-1.5-72B | |----------------------|------------|------------|------------|-------------|-----------|-----------|-------------|--------------| | TRT-LLM-FP16 | 1326 | 444 | 1566 | 92 | OOM | OOM | OOM | OOM | | TRT-LLM-W4A16 | 1431 | 681 | 1457 | 368 | 148 | 313 | 119 | 17 | | TRT-LLM-W8A8 | 2634 | 1271 | 2569 | 440 | 123 | 364 | OOM | OOM | | Atom-W4A4 | -- | 2120 | -- | -- | -- | -- | -- | -- | | QuaRot-W4A4 | -- | 805 | -- | 413 | 133 | -- | -- | 15 | | QServe-W4A8KV4 | **3656** | **2394** | **3774** | **1327** | **504** | **869** | **286** | **59** | | Throughput Increase* | **1.39x** | **1.13x** | **1.47x** | **3.02x** | **3.41x** | **2.39x** | **2.40x** | **3.47x** | | A100 (80G) | Llama-3-8B | Llama-2-7B | Mistral-7B | Llama-2-13B | Llama-30B | Yi-34B | Llama-2-70B | Qwen-1.5-72B | |----------------------|------------| -----------|------------|-------------|-----------|-----------|-------------|--------------| | TRT-LLM-FP16 | 2503 | 1549 | 2371 | 488 | 80 | 145 | OOM | OOM | | TRT-LLM-W4A16 | 2370 | 1549 | 2403 | 871 | 352 | 569 | 358 | 143 | | TRT-LLM-W8A8 | 2396 | 2334 | 2427 | 1277 | 361 | 649 | 235 | 53 | | Atom-W4A4 | -- | 1160 | -- | -- | -- | -- | -- | -- | | QuaRot-W4A4 | -- | 1370 | -- | 289 | 267 | -- | -- | 68 | | QServe-W4A8KV4 | **3005** | **2908** | **2970** | **1741** | **749** | **803** | **419** | **340** | | Throughput Increase* | **1.20x** | **1.25x** | **1.22x** | **1.36x** | **2.07x** | **1.23x** | **1.17x** | **2.38x** | The absolute token generation throughputs of QServe and baseline systems (Unit: tokens/second. `--` means unsupported). All experiments were conducted under the same device memory budget. Throughput increase of QServe is calculated with regard to the best baseline in each column. ## Reference If you find `deepcompressor` useful or relevant to your research, please kindly cite our paper: ``` @article{lin2024qserve, title={QServe: W4A8KV4 Quantization and System Co-design for Efficient LLM Serving}, author={Lin*, Yujun and Tang*, Haotian and Yang*, Shang and Zhang, Zhekai and Xiao, Guangxuan and Gan, Chuang and Han, Song}, journal={arXiv preprint arXiv:2405.04532}, year={2024} } ``` ================================================ FILE: examples/llm/configs/__default__.yaml ================================================ seed: 12345 save_model: false cache: root: runs output: root: runs dirname: default model: name: llama-2-7b path: null root: '' local_path: null local_root: ~/models dtype: torch.float16 eval: num_gpus: 8 batch_size: 8 tasks: - wikitext max_seq_length: -4096 evaluators: - gptq quant: calib: data: pileval path: mit-han-lab/pile-val-backup num_samples: 128 seq_length: 1024 min_seq_length: 0 max_seq_length: 0 develop_dtype: torch.float32 wgts: dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null intermediate_dtypes: [] intermediate_levels: [] needs_dequant_saturation: false enable_kernel_gptq: false kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 enable_calib_range: true calib_range: objective: OutputsError strategy: Manual granularity: Group degree: 2 element_batch_size: 64 sample_batch_size: -1 element_size: 512 sample_size: -1 pre_reshape: true outputs_device: cpu ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skips: [] skips: [] ipts: static: false dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null enable_calib_range: false calib_range: objective: OutputsError strategy: GridSearch granularity: ChannelGroup degree: 2 element_batch_size: 64 sample_batch_size: -1 element_size: 512 sample_size: -1 pre_reshape: true outputs_device: cpu ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skips: [] skips: [] opts: static: false dtype: null zero_point: null group_shapes: - - 1 - -1 scale_dtypes: - null enable_calib_range: false calib_range: objective: OutputsError strategy: GridSearch granularity: ChannelGroup degree: 2 element_batch_size: 64 sample_batch_size: -1 element_size: 512 sample_size: -1 pre_reshape: true outputs_device: cpu ratio: 1.0 max_shrink: 0.2 max_expand: 1.0 num_grids: 80 skips: [] skips: [] enable_rotation: false rotation: random: false transforms: [] enable_reorder: false reorder: strategy: Manual degree: 2 sample_batch_size: -1 sample_size: -1 outputs_device: cpu channel_metric: InputsAbsMax channel_index: Sequential dynamic: false skips: - residual enable_smooth: false smooth: enable_proj: false proj: objective: OutputsError strategy: GridSearch granularity: Layer degree: 2 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu spans: - - AbsMax - AbsMax alpha: -3 beta: -3 num_grids: 20 skips: [] enable_attn: false attn: strategy: Manual degree: 2 sample_batch_size: -1 sample_size: -1 outputs_device: cpu spans: - - AbsMax - AbsMax alpha: 0.5 beta: 0 num_grids: 20 ================================================ FILE: examples/llm/configs/awq.yaml ================================================ quant: calib: num_samples: 128 seq_length: 512 min_seq_length: 0 max_seq_length: 512 wgts: dtype: uint4 zero_point: PostScale group_shapes: - - 1 - 128 scale_dtypes: - torch.float16 enable_calib_range: true calib_range: objective: ProductsError strategy: GridSearch granularity: Group degree: 2 max_shrink: 0.8 max_expand: 1.0 num_grids: 20 skips: - qkv_proj ipts: static: false dtype: null group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 opts: static: false dtype: null group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: GridSearch granularity: Layer spans: - - AbsMax - AbsMax alpha: 0.5 beta: 0 num_grids: 20 enable_attn: false ================================================ FILE: examples/llm/configs/gptq.yaml ================================================ quant: calib: num_samples: 128 seq_length: 2048 min_seq_length: 2048 max_seq_length: 0 wgts: dtype: uint4 zero_point: PostScale group_shapes: - - 1 - 128 scale_dtypes: - torch.float16 enable_kernel_gptq: true kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 enable_calib_range: false calib_range: objective: TensorError strategy: GridSearch granularity: Group degree: 2.4 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu max_shrink: 0.2 max_expand: 1.0 num_grids: 100 ipts: static: false dtype: null group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 opts: static: false dtype: null group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 ================================================ FILE: examples/llm/configs/ooo.yaml ================================================ quant: calib: num_samples: 128 seq_length: 1024 min_seq_length: 0 max_seq_length: 0 wgts: dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 enable_kernel_gptq: true kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 enable_calib_range: true calib_range: objective: OutputsError strategy: GridSearch granularity: Group max_shrink: 0.2 max_expand: 1.0 num_grids: 80 ipts: static: false dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 opts: static: true dtype: sint8 group_shapes: - - -1 - -1 scale_dtypes: - torch.float16 enable_calib_range: true calib_range: objective: OutputsError strategy: Manual granularity: Layer degree: 2 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu enable_rotation: true rotation: transforms: - out_proj enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: Manual granularity: Layer degree: 2 spans: - - AbsMax - AbsMax alpha: 0.1 beta: 0.9 num_grids: 20 skips: - qkv_proj - up_proj - out_proj enable_attn: true attn: strategy: GridSearch degree: 2 spans: - - AbsMax - AbsMax alpha: 0.5 beta: -2 num_grids: 20 ================================================ FILE: examples/llm/configs/qoq-g128.yaml ================================================ quant: calib: num_samples: 128 seq_length: 1024 min_seq_length: 0 max_seq_length: 0 wgts: dtype: uint4 zero_point: PostScale group_shapes: - - 1 - -1 - - 1 - 128 scale_dtypes: - torch.float16 - sint8 intermediate_dtypes: - sint8 intermediate_levels: - 0 needs_dequant_saturation: false enable_kernel_gptq: true kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 ipts: static: false dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 opts: static: false dtype: uint4 zero_point: PostScale group_shapes: - - 1 - 128 scale_dtypes: - torch.float16 skips: - attn_q enable_rotation: true enable_reorder: true reorder: strategy: Manual channel_metric: InputsAbsMax channel_index: Sequential skips: - residual enable_smooth: true rotation: transforms: - out_proj smooth: enable_proj: true proj: objective: OutputsError strategy: Manual granularity: Layer degree: 2 spans: - - AbsMax - AbsMax alpha: 0.3 beta: 0.7 num_grids: 20 skips: - qkv_proj - up_proj - out_proj enable_attn: true attn: strategy: Manual degree: 2 spans: - - AbsMax - AbsMax alpha: 0.5 beta: 0 num_grids: 20 ================================================ FILE: examples/llm/configs/qoq-gchn.yaml ================================================ quant: calib: num_samples: 128 seq_length: 1024 min_seq_length: 0 max_seq_length: 0 wgts: dtype: uint4 zero_point: PostScale group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 enable_kernel_gptq: true kernel_gptq: damp_percentage: 0.01 block_size: 128 num_inv_tries: 250 hessian_block_size: 512 enable_calib_range: true calib_range: objective: OutputsError strategy: GridSearch granularity: Group max_shrink: 0.2 max_expand: 1.0 num_grids: 80 ipts: static: false dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - torch.float16 opts: static: false dtype: uint4 zero_point: PostScale group_shapes: - - 1 - 128 scale_dtypes: - torch.float16 skips: - attn_q enable_rotation: true rotation: transforms: - out_proj enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: Manual granularity: Layer degree: 2 spans: - - AbsMax - AbsMax alpha: 0.1 beta: 0.9 num_grids: 20 skips: - qkv_proj - up_proj - out_proj enable_attn: true attn: strategy: GridSearch degree: 2 spans: - - AbsMax - AbsMax alpha: 0.5 beta: -2 num_grids: 20 ================================================ FILE: examples/llm/configs/smoothquant-dynamic.yaml ================================================ quant: calib: num_samples: 128 seq_length: 512 min_seq_length: 0 max_seq_length: 0 wgts: dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - null ipts: static: false dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - null opts: static: false dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - null enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: Manual granularity: Layer spans: - - AbsMax - AbsMax alpha: 0.85 beta: 0.15 skips: - out_proj - down_proj enable_attn: false ================================================ FILE: examples/llm/configs/smoothquant-static.yaml ================================================ quant: calib: num_samples: 128 seq_length: 512 min_seq_length: 0 max_seq_length: 0 wgts: dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - null ipts: static: false dtype: sint8 group_shapes: - - 1 - -1 scale_dtypes: - null opts: static: true dtype: sint8 group_shapes: - - -1 - -1 scale_dtypes: - null enable_calib_range: true calib_range: objective: OutputsError strategy: Manual granularity: Layer degree: 2 element_batch_size: -1 sample_batch_size: -1 element_size: -1 sample_size: -1 pre_reshape: true outputs_device: cpu enable_smooth: true smooth: enable_proj: true proj: objective: OutputsError strategy: Manual granularity: Layer spans: - - AbsMax - AbsMax alpha: 0.85 beta: 0.15 skips: - out_proj - down_proj enable_attn: false ================================================ FILE: examples/llm/scripts/awq.sh ================================================ # AWQ (W4A16) on Llama2-7B python -m deepcompressor.app.llm.ptq configs/awq.yaml --model-name llama-2-7b # AWQ (W4A16) on Llama2-13B python -m deepcompressor.app.llm.ptq configs/awq.yaml --model-name llama-2-13b # AWQ (W4A16) on Llama2-70B python -m deepcompressor.app.llm.ptq configs/awq.yaml --model-name llama-2-70b # AWQ (W4A16) on Llama3-8B python -m deepcompressor.app.llm.ptq configs/awq.yaml --model-name llama-3-8b # AWQ (W4A16) on Llama3-70B python -m deepcompressor.app.llm.ptq configs/awq.yaml --model-name llama-3-70b ================================================ FILE: examples/llm/scripts/gptq.sh ================================================ # GPTQ-R (W4A16) on Llama2-7B python -m deepcompressor.app.llm.ptq configs/gptq.yaml --model-name llama-2-7b # GPTQ-R (W4A16) on Llama2-13B python -m deepcompressor.app.llm.ptq configs/gptq.yaml --model-name llama-2-13b # GPTQ-R (W4A16) on Llama2-70B python -m deepcompressor.app.llm.ptq configs/gptq.yaml --model-name llama-2-70b # GPTQ-R (W4A16) on Llama3-8B python -m deepcompressor.app.llm.ptq configs/gptq.yaml --model-name llama-3-8b # GPTQ-R (W4A16) on Llama3-70B python -m deepcompressor.app.llm.ptq configs/gptq.yaml --model-name llama-3-70b ================================================ FILE: examples/llm/scripts/qoq.sh ================================================ # ========== QoQ with Post-Scale Zero Point =============================== # ========== QoQ (W4A8KV4 with per-channel weight quantization) ========== # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-2-7B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-2-7b --smooth-proj-alpha 0.05 --smooth-proj-beta 0.95 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-2-13B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-2-13b --smooth-proj-alpha 0.05 --smooth-proj-beta 0.95 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-2-70B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-2-70b --smooth-proj-alpha 0.1 --smooth-proj-beta 0.9 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-7B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-7b --smooth-proj-alpha 0.1 --smooth-proj-beta 0.9 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-13B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-13b --smooth-proj-alpha 0 --smooth-proj-beta 1 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-30B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-30b --smooth-proj-alpha 0.1 --smooth-proj-beta 0.9 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-3-8B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-3-8b --smooth-proj-alpha 0.1 --smooth-proj-beta 0.9 --smooth-attn-strategy Manual --smooth-attn-beta 0 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-3-70B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-3-70b --smooth-proj-alpha 0.1 --smooth-proj-beta 0.9 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-3.1-8B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-3.1-8b --smooth-proj-alpha 0.05 --smooth-proj-beta 0.95 # QoQ (W4A8KV4 with per-channel weight quantization) on Llama-3.1-70B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name llama-3.1-70b --smooth-proj-alpha 0.05 --smooth-proj-beta 0.95 # QoQ (W4A8KV4 with per-channel weight quantization) on Mistral-7B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name mistral-7b --smooth-proj-alpha 0 --smooth-proj-beta 1 # QoQ (W4A8KV4 with per-channel weight quantization) on Yi-34B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name yi-34b --smooth-proj-alpha --smooth-proj-alpha 0.15 --smooth-proj-beta 0.85 # QoQ (W4A8KV4 with per-channel weight quantization) on Mixtral-8x7B python -m deepcompressor.app.llm.ptq configs/qoq-gchn.yaml --model-name mixtral-8x7b --smooth-proj-alpha 0.05 --smooth-proj-beta 0.95 # ======================================================================== # ========== QoQ (W4A8KV4 with progressive weight quantization) ========== # QoQ (W4A8KV4 with progressive weight quantization) on Llama-2-7B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-2-7b --smooth-proj-alpha 0.2 --smooth-proj-beta 0.8 # QoQ (W4A8KV4 with progressive weight quantization) on Llama-2-13B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-2-13b --smooth-proj-alpha 0.35 --smooth-proj-beta 0.65 # QoQ (W4A8KV4 with progressive weight quantization) on Llama-2-70B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-2-70b --smooth-proj-alpha 0.35 --smooth-proj-beta 0.65 # QoQ (W4A8KV4 with progressive weight quantization) on Llama-7B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-7b --smooth-proj-alpha 0.3 --smooth-proj-beta 0.7 # QoQ (W4A8KV4 with progressive weight quantization) on Llama-13B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-13b --smooth-proj-alpha 0.2 --smooth-proj-beta 0.8 --smooth-attn-strategy GridSearch --smooth-attn-beta " -2" # QoQ (W4A8KV4 with progressive weight quantization) on Llama-30B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-30b --smooth-proj-alpha 0.25 --smooth-proj-beta 0.75 # QoQ (W4A8KV4 with progressive weight quantization) on Llama-3-8B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-3-8b --smooth-proj-alpha 0.3 --smooth-proj-beta 0.7 --smooth-attn-strategy GridSearch --smooth-attn-beta " -2" # QoQ (W4A8KV4 with progressive weight quantization) on Llama-3-70B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-3-70b --smooth-proj-alpha 0.35 --smooth-proj-beta 0.65 --smooth-attn-strategy GridSearch --smooth-attn-beta " -2" # QoQ (W4A8KV4 with progressive weight quantization) on Llama-3.1-8B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-3.1-8b --smooth-proj-alpha 0.35 --smooth-proj-beta 0.65 --smooth-attn-strategy GridSearch --smooth-attn-beta " -2" # QoQ (W4A8KV4 with progressive weight quantization) on Llama-3.1-70B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name llama-3.1-70b --smooth-proj-alpha 0.3 --smooth-proj-beta 0.7 --smooth-attn-strategy GridSearch --smooth-attn-beta " -2" # QoQ (W4A8KV4 with progressive weight quantization) on Mistral-7B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name mistral-7b --smooth-proj-alpha 0.15 --smooth-proj-beta 0.85 # QoQ (W4A8KV4 with progressive weight quantization) on Yi-34B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name yi-34b --smooth-proj-alpha 0.35 --smooth-proj-beta 0.65 # QoQ (W4A8KV4 with progressive weight quantization) on Mixtral-8x7B python -m deepcompressor.app.llm.ptq configs/qoq-g128.yaml --model-name mixtral-8x7b --smooth-proj-alpha 0.25 --smooth-proj-beta 0.75 --smooth-attn-strategy GridSearch --smooth-attn-beta " -2" # ======================================================================== # ======================================================================== ================================================ FILE: examples/llm/scripts/smoothquant.sh ================================================ # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Llama2-7B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name llama-2-7b --smooth-proj-alpha 0.85 --smooth-proj-beta 0.15 # SmoothQuant (W8A8 with per-tensor static KV quantization) on Llama2-7B python -m deepcompressor.app.llm.ptq configs/smoothquant-static.yaml --model-name llama-2-7b --smooth-proj-alpha 0.85 --smooth-proj-beta 0.15 # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Llama2-13B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name llama-2-13b --smooth-proj-alpha 0.85 --smooth-proj-beta 0.15 # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Llama2-70B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name llama-2-13b --smooth-proj-alpha 0.9 --smooth-proj-beta 0.1 # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Llama3-8B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name llama-3-8b --smooth-proj-alpha 0.85 --smooth-proj-beta 0.15 # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Llama3-70B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name llama-3-8b --smooth-proj-alpha 0.85 --smooth-proj-beta 0.15 # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Mistral-7B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name mistral-7b --smooth-proj-alpha 0.8 --smooth-proj-beta 0.2 # SmoothQuant (W8A8 with per-token dynamic KV quantization) on Mixtral-8x7B python -m deepcompressor.app.llm.ptq configs/smoothquant-dynamic.yaml --model-name mixtral-8x7b --smooth-proj-alpha 0.8 --smooth-proj-beta 0.2 ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "deepcompressor-toolkit" version = "0.0.2" description = "This package is model compression toolkit for large language models and diffusion models." authors = [ "Yujun Lin", "Muyang Li", "Shang Yang", "Zhekai Zhang", "Haotian Tang", "Song Han", ] packages = [ { include = "deepcompressor" } ] license = "Apache-2.0" readme = "README.md" [tool.poetry.dependencies] python = ">= 3.10 < 4.0" tqdm = ">= 4.66.0" torch = ">= 2.5.0" torchvision = ">= 0.18.1" torchmetrics = ">= 1.4.0" ninja = ">= 1.11.1" bitsandbytes = ">= 0.42.0" transformers = ">= 4.46.0" lm_eval = ">= 0.4.2" accelerate = ">= 0.26.0" datasets = ">= 2.16.0" sentencepiece = ">= 0.1.99" omniconfig = ">= 0.1.10" jieba = ">= 0.42.1" fuzzywuzzy = ">= 0.18.0" rouge = ">= 1.0.1" python-Levenshtein = ">=0.26.1" protobuf = ">= 5.26.0" diffusers = ">= 0.32.0" clean-fid = ">= 0.1.35" dominate = ">= 2.9.1" opencv-python = ">= 4.10.0" einops = ">= 0.8.0" timm = ">= 1.0.7" rotary-embedding-torch = ">= 0.6.4" bs4 = ">= 0.0.2" ftfy = ">= 6.2.0" cd-fvd = ">= 0.1.1" xformers = ">= 0.0.26" pyav = ">= 13.0.0" clip = ">= 0.2.0" image_reward = { git = "https://github.com/THUDM/ImageReward.git", branch = "main" } [tool.poetry.group.dev.dependencies] matplotlib = ">= 3.8.4" ipython = ">= 8.0.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.ruff] line-length = 120 indent-width = 4 target-version = "py310" [tool.ruff.lint] select = ["B", "C", "E", "F", "I", "W"] ignore = [] [tool.ruff.lint.mccabe] max-complexity = 15 [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401", "F403"] [tool.ruff.format] quote-style = "double" indent-style = "space" skip-magic-trailing-comma = false line-ending = "auto"