Repository: sphincs/sphincsplus Branch: master Commit: 7ec789ace687 Files: 161 Total size: 526.4 KB Directory structure: gitextract_8vizip4a/ ├── .github/ │ └── workflows/ │ ├── test-haraka-aesni.yml │ ├── test-ref.yml │ ├── test-sha2-avx2.yml │ └── test-shake-avx2.yml ├── .reuse/ │ └── dep5 ├── LICENSE ├── LICENSES/ │ ├── 0BSD.txt │ ├── CC0-1.0.txt │ ├── LicenseRef-SPHINCS-PLUS-Public-Domain.txt │ ├── MIT-0.txt │ └── MIT.txt ├── README.md ├── SHA256SUMS ├── benchmark.py ├── haraka-aesni/ │ ├── .gitignore │ ├── Makefile │ ├── context.h │ ├── haraka.c │ ├── harakax4.h │ ├── hash_harakax4.c │ ├── test/ │ │ └── benchmark.c │ ├── thash_haraka_robustx4.c │ └── thash_haraka_simplex4.c ├── ref/ │ ├── .gitignore │ ├── Makefile │ ├── PQCgenKAT_sign.c │ ├── address.c │ ├── address.h │ ├── api.h │ ├── context.h │ ├── fips202.c │ ├── fips202.h │ ├── fors.c │ ├── fors.h │ ├── haraka.c │ ├── haraka.h │ ├── haraka_offsets.h │ ├── hash.h │ ├── hash_haraka.c │ ├── hash_sha2.c │ ├── hash_shake.c │ ├── merkle.c │ ├── merkle.h │ ├── params/ │ │ ├── params-sphincs-haraka-128f.h │ │ ├── params-sphincs-haraka-128s.h │ │ ├── params-sphincs-haraka-192f.h │ │ ├── params-sphincs-haraka-192s.h │ │ ├── params-sphincs-haraka-256f.h │ │ ├── params-sphincs-haraka-256s.h │ │ ├── params-sphincs-sha2-128f.h │ │ ├── params-sphincs-sha2-128s.h │ │ ├── params-sphincs-sha2-192f.h │ │ ├── params-sphincs-sha2-192s.h │ │ ├── params-sphincs-sha2-256f.h │ │ ├── params-sphincs-sha2-256s.h │ │ ├── params-sphincs-shake-128f.h │ │ ├── params-sphincs-shake-128s.h │ │ ├── params-sphincs-shake-192f.h │ │ ├── params-sphincs-shake-192s.h │ │ ├── params-sphincs-shake-256f.h │ │ └── params-sphincs-shake-256s.h │ ├── params.h │ ├── randombytes.c │ ├── randombytes.h │ ├── rng.c │ ├── rng.h │ ├── sha2.c │ ├── sha2.h │ ├── sha2_offsets.h │ ├── shake_offsets.h │ ├── sign.c │ ├── test/ │ │ ├── benchmark.c │ │ ├── cycles.c │ │ ├── cycles.h │ │ ├── fors.c │ │ ├── haraka.c │ │ └── spx.c │ ├── thash.h │ ├── thash_haraka_robust.c │ ├── thash_haraka_simple.c │ ├── thash_sha2_robust.c │ ├── thash_sha2_simple.c │ ├── thash_shake_robust.c │ ├── thash_shake_simple.c │ ├── utils.c │ ├── utils.h │ ├── utilsx1.c │ ├── utilsx1.h │ ├── wots.c │ ├── wots.h │ ├── wotsx1.c │ └── wotsx1.h ├── sha2-avx2/ │ ├── .gitignore │ ├── Makefile │ ├── context.h │ ├── fors.c │ ├── hash_sha2x8.c │ ├── hashx8.h │ ├── merkle.c │ ├── sha256avx.c │ ├── sha256avx.h │ ├── sha256x8.c │ ├── sha256x8.h │ ├── sha512x4.c │ ├── sha512x4.h │ ├── test/ │ │ ├── benchmark.c │ │ └── thashx8.c │ ├── thash_sha2_robustx8.c │ ├── thash_sha2_simplex8.c │ ├── thashx8.h │ ├── utilsx8.c │ ├── utilsx8.h │ ├── wots.c │ └── wotsx8.h ├── shake-a64/ │ ├── .gitignore │ ├── Makefile │ ├── context.h │ ├── f1600x2.h │ ├── f1600x2.s │ ├── f1600x2_const.c │ ├── fips202x2.c │ ├── fips202x2.h │ ├── fors.c │ ├── hash_shakex2.c │ ├── hashx2.h │ ├── merkle.c │ ├── test/ │ │ ├── benchmark.c │ │ └── thashx2.c │ ├── thash.h │ ├── thash_shake_robustx2.c │ ├── thash_shake_simplex2.c │ ├── thashx2.h │ ├── utilsx2.c │ ├── utilsx2.h │ ├── wots.c │ └── wotsx2.h ├── shake-avx2/ │ ├── .gitignore │ ├── Makefile │ ├── context.h │ ├── fips202x4.c │ ├── fips202x4.h │ ├── fors.c │ ├── hash_shakex4.c │ ├── hashx4.h │ ├── keccak4x/ │ │ ├── KeccakP-1600-times4-SIMD256.c │ │ ├── KeccakP-1600-times4-SnP.h │ │ ├── KeccakP-1600-unrolling.macros │ │ ├── SIMD256-config.h │ │ ├── align.h │ │ └── brg_endian.h │ ├── merkle.c │ ├── test/ │ │ ├── benchmark.c │ │ └── thashx4.c │ ├── thash_shake_robustx4.c │ ├── thash_shake_simplex4.c │ ├── thashx4.h │ ├── utilsx4.c │ ├── utilsx4.h │ ├── wots.c │ └── wotsx4.h └── vectors.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/test-haraka-aesni.yml ================================================ name: Tests for haraka-aesni implementation on: - push - pull_request jobs: build: runs-on: ubuntu-latest strategy: matrix: size: - 128 - 192 - 256 option: - s - f thash: - simple - robust steps: - uses: actions/checkout@v1 - name: Run make run: | make -C haraka-aesni THASH=${{ matrix.thash }} clean make -C haraka-aesni THASH=${{ matrix.thash }} PARAMS=sphincs-haraka-${{ matrix.size }}${{ matrix.option }} tests make -C haraka-aesni THASH=${{ matrix.thash }} PARAMS=sphincs-haraka-${{ matrix.size }}${{ matrix.option }} test make -C haraka-aesni THASH=${{ matrix.thash }} PARAMS=sphincs-haraka-${{ matrix.size }}${{ matrix.option }} PQCgenKAT_sign - name: Run PQCgenKAT_sign run: python3 vectors.py sphincs-haraka-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} haraka-aesni # vim: set ft=yaml ts=2 sw=2 et : ================================================ FILE: .github/workflows/test-ref.yml ================================================ name: Tests for ref implementation on: - push - pull_request jobs: build: runs-on: ubuntu-latest strategy: matrix: hash: - sha2 - shake - haraka size: - 128 - 192 - 256 option: - s - f thash: - simple - robust steps: - uses: actions/checkout@v1 - name: Run make run: | make -C ref HASH=${{ matrix.hash }} THASH=${{ matrix.thash }} clean make -C ref HASH=${{ matrix.hash }} THASH=${{ matrix.thash }} PARAMS=sphincs-${{ matrix.hash }}-${{ matrix.size }}${{ matrix.option }} tests make -C ref HASH=${{ matrix.hash }} THASH=${{ matrix.thash }} PARAMS=sphincs-${{ matrix.hash }}-${{ matrix.size }}${{ matrix.option }} test make -C ref THASH=${{ matrix.thash }} PQCgenKAT_sign - name: Run PQCgenKAT_sign run: python3 vectors.py sphincs-${{ matrix.hash }}-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} ref # vim: set ft=yaml ts=2 sw=2 et : ================================================ FILE: .github/workflows/test-sha2-avx2.yml ================================================ name: Tests for sha2-avx2 implementation on: - push - pull_request jobs: build: runs-on: ubuntu-latest strategy: matrix: size: - 128 - 192 - 256 option: - s - f thash: - simple - robust steps: - uses: actions/checkout@v1 - name: Run make run: | make -C sha2-avx2 THASH=${{ matrix.thash }} clean make -C sha2-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-sha2-${{ matrix.size }}${{ matrix.option }} tests make -C sha2-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-sha2-${{ matrix.size }}${{ matrix.option }} test make -C sha2-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-sha2-${{ matrix.size }}${{ matrix.option }} PQCgenKAT_sign - name: Run PQCgenKAT_sign run: python3 vectors.py sphincs-sha2-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} sha2-avx2 # vim: set ft=yaml ts=2 sw=2 et : ================================================ FILE: .github/workflows/test-shake-avx2.yml ================================================ name: Tests for shake-avx2 implementation on: - push - pull_request jobs: build: runs-on: ubuntu-latest strategy: matrix: size: - 128 - 192 - 256 option: - s - f thash: - simple - robust steps: - uses: actions/checkout@v1 - name: Run make run: | make -C shake-avx2 THASH=${{ matrix.thash }} clean make -C shake-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-shake-${{ matrix.size }}${{ matrix.option }} tests make -C shake-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-shake-${{ matrix.size }}${{ matrix.option }} test make -C shake-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-shake-${{ matrix.size }}${{ matrix.option }} PQCgenKAT_sign - name: Run PQCgenKAT_sign run: python3 vectors.py sphincs-shake-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} shake-avx2 # vim: set ft=yaml ts=2 sw=2 et : ================================================ FILE: .reuse/dep5 ================================================ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: SPHINCS+ Upstream-Contact: contact@sphincs.org Souce: https://github.com/sphincs/sphincsplus Files: * License: LicenseRef-SPHINCS-PLUS-Public-Domain OR CC0-1.0 OR 0BSD OR MIT-0 Files: ref/haraka.c Copyright: 2016 Thomas Pornin and SPHINCS+ team License: (LicenseRef-SPHINCS-PLUS-Public-Domain OR CC0-1.0 OR 0BSD OR MIT-0) AND MIT Files: ref/PQCgenKAT_sign.c ref/rng.c ref/rng.h Copyright: 2017 Bassham, Lawrence E (Fed). License: All rights reserved. ================================================ FILE: LICENSE ================================================ SPDX-License-Identifier: (LicenseRef-SPHINCS-PLUS-Public-Domain OR CC0-1.0 OR 0BSD OR MIT-0) AND MIT ================================================ FILE: LICENSES/0BSD.txt ================================================ BSD Zero Clause License Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ================================================ FILE: LICENSES/CC0-1.0.txt ================================================ Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. ================================================ FILE: LICENSES/LicenseRef-SPHINCS-PLUS-Public-Domain.txt ================================================ This work is hereby placed into the public domain. ================================================ FILE: LICENSES/MIT-0.txt ================================================ MIT No Attribution Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: LICENSES/MIT.txt ================================================ MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ ## SPHINCS+ This repository contains the software that accompanies the [SPHINCS+ submission](https://sphincs.org/) to [NIST's Post-Quantum Cryptography](https://csrc.nist.gov/Projects/Post-Quantum-Cryptography) project. ![][test-ref] ![][test-sha256-avx2] ![][test-shake256-avx2] ![][test-haraka-aesni] ### Parameters The [SPHINCS+ specification](https://sphincs.org/data/sphincs+-specification.pdf) proposed a set of 36 named instances, specifying hash functions and concrete parameters for the security level, tree dimensions, WOTS+ and FORS. This reference implementation allows for more flexibility, as parameters can be specified in a `params.h` file. The proposed parameter sets have been predefined in `ref/params/params-*.h`, and the hash function can be varied by linking with the different implementations of `hash.h`, i.e., `hash_haraka.c`, `hash_sha2.c` and `hash_shake.c`, as well as different implementations of `thash.h`, i.e., `*_robust.c` and `*_simple.c`. This is demonstrated in the `Makefile`. See the table below for a summary of the parameter sets. These parameters target the NIST security categories 1, 3 and 5; for each category, there is a parameter set geared towards either small signatures or fast signature generation. | | n | h | d | log(t) | k | w | bit security | pk bytes | sk bytes | sig bytes | | :------------ | -: | -: | -: | -----: | -: | --: | -----------: | -------: | -------: | --------: | | SPHINCS+-128s | 16 | 63 | 7 | 12 | 14 | 16 | 133 | 32 | 64 | 7,856 | | SPHINCS+-128f | 16 | 66 | 22 | 6 | 33 | 16 | 128 | 32 | 64 | 17,088 | | SPHINCS+-192s | 24 | 63 | 7 | 14 | 17 | 16 | 193 | 48 | 96 | 16,224 | | SPHINCS+-192f | 24 | 66 | 22 | 8 | 33 | 16 | 194 | 48 | 96 | 35,664 | | SPHINCS+-256s | 32 | 64 | 8 | 14 | 22 | 16 | 255 | 64 | 128 | 29,792 | | SPHINCS+-256f | 32 | 68 | 17 | 9 | 35 | 16 | 255 | 64 | 128 | 49,856 | ### License All included code has been placed into [Public Domain](LICENSES/LicenseRef-SPHINCS-PLUS-Public-Domain.txt) and is available under various open source licenses ([Creative Commons Zero v1.0 Universal (CC0-1.0)](LICENSES/CC0-1.0.txt), [BSD Zero Clause License (0BSD)](LICENSES/0BSD.txt), and [MIT No Attribution (MIT-0)](LICENSES/MIT-0.txt), see the [LICENSE file](LICENSE) and the licenses in the [LICENSES folder](LICENSES)), with the exception of `rng.c`, `rng.h` and `PQCgenKAT_sign.c`, which were provided by NIST, and parts of `ref/haraka.c`, which are under [MIT license (MIT)](LICENSES/MIT.txt). [test-ref]: https://github.com/sphincs/sphincsplus/actions/workflows/test-ref.yml/badge.svg [test-sha256-avx2]: https://github.com/sphincs/sphincsplus/actions/workflows/test-sha256-avx2.yml/badge.svg [test-shake256-avx2]: https://github.com/sphincs/sphincsplus/actions/workflows/test-shake256-avx2.yml/badge.svg [test-haraka-aesni]: https://github.com/sphincs/sphincsplus/actions/workflows/test-haraka-aesni.yml/badge.svg ================================================ FILE: SHA256SUMS ================================================ 9e1b3168520c917b6de676caa7a5799ec972e55caa150090e8452c80c299545e sphincs-haraka-128f-robust c6a28dcf0667bd91c7bb46814ac7408c0375727fe5fec7d41332149006d3f9d7 sphincs-haraka-128f-simple f93f4a554322080545a70f85ce936a12acc2fe928a243e3d13546ffe87872a9e sphincs-haraka-128s-robust 3c9b181d3d96c066039b77e9accd926745fe1ecb010039d3579140b877da6f33 sphincs-haraka-128s-simple 8876bfae8924983db27acfeaee6252d37cea86f05fcc4b16ea2c902d717e6a6e sphincs-haraka-192f-robust df26bd02796f5ad9d6ff412793960e79ec911cbf4521656814895e6ef5a1db83 sphincs-haraka-192f-simple 6cfde6cb5f9ce93eb3f7b0845e1149f661f92000f54e9d340c0bff504920ec7e sphincs-haraka-192s-robust 64037177e1524f2b2d3ea4a79fdaf9352eb39a3aa6e68bc9d3316b7c2b835820 sphincs-haraka-192s-simple a838509fa6ec49ade2638efc35d9e29fdb56bd9b843d5e1f48210a2cab218332 sphincs-haraka-256f-robust e1e3258be6b4467bcea81392363f657a58278a5b99fe240f29e388b0fe72f5da sphincs-haraka-256f-simple b5c5cc535f03789c25c018c009615ac62ba5b64188e4db5e3ede5513e3704dcc sphincs-haraka-256s-robust 9428a566a2c2ee03665fc0eb2dbf208deb1b28716dc8c2d5e7c036a9f83d31da sphincs-haraka-256s-simple b6c82007bbce794f9fd67de708cd4d959319c744b918ddb28795fd491b713aa9 sphincs-sha2-128f-robust 708f6ab77f8026361e975f7be7b9b5d1cd8aca56e4a3604c85ef3f9fe6618549 sphincs-sha2-128f-simple f4c2f31082fc8ad15419edc4f24c34a83d909f75eb37ea5ffe53df0fb5ef5306 sphincs-sha2-128s-robust 65942fac8e225fde77dd277d297e68c94c2e25a2a4089f88be4b56fa92b18a84 sphincs-sha2-128s-simple b8e617db2099e617dfc372ff732eead88872aea791e2fe82628568d75dd03c78 sphincs-sha2-192f-robust 84b1a342683bcad658efb6c65f7367c6b30623e74e3a24c2238d19eaf74722ab sphincs-sha2-192f-simple 50c4b94dc788446077b48af1d8fa0170dc2114b4cb72a19f1d8c7628f9dadfd6 sphincs-sha2-192s-robust 13efa67b9297afa051b9b30e2686266350c8b4000caa49aa432516e2a86d0b68 sphincs-sha2-192s-simple dc3330f8f19c816f45ee9a1127bf2b8a8c900e05df9a964bb760f0adf8f9b1b3 sphincs-sha2-256f-robust 46e286dc1a20012789c1bf4793a8eb2043dd0c11df729fa36d9f96b0aeffdac6 sphincs-sha2-256f-simple 1f42b407e1e351861ba23e520b1974f399e349fcb66c614d727a38fb4e646634 sphincs-sha2-256s-robust c816ca365a667e4d6564a95ac576bc9d7be0de7e66eff93e6f05dd4f134a183f sphincs-sha2-256s-simple 4be71430814589ce7c861030c7cdce0aa73f75885b693b41fdb7c34d8f32fa79 sphincs-shake-128f-robust 5167df2ce46f33b76ccf0688f7769217d91878bd7d9b431080a3032eba51da10 sphincs-shake-128f-simple fbe6c99d6ccc42fc9af5babbac532f28288d4164b182515dffeb1cd47f351d12 sphincs-shake-128s-robust e7d5caee1941be99b6dfe46a95fc4535a34792f429e61d1cdc7fd3bbafe9ff02 sphincs-shake-128s-simple 243d0e25de08fea547b0beae5f778a48bd55e56066435f9cdb9afc60a722699e sphincs-shake-192f-robust f204fd1cd5dce187441d104ae7159b64322b6a4afae708d48dc9966fe418ec4e sphincs-shake-192f-simple cb13eaa2b1c074f53c87f1025e6bb1b356ad8de3bea9388b90a058a6460766bb sphincs-shake-192s-robust 4cc01c4a562d738ac54f5abfead35ecc4f46a1e2531fa12b4bc2819f4560c351 sphincs-shake-192s-simple 5a736aeba47f8d84e3ca47126715affcb4ce6cef13e3c9f6af220827973aa383 sphincs-shake-256f-robust 127f7ab83c740344546fe30777b221e8cb39f30fc4242d07d7608dc31a9835d4 sphincs-shake-256f-simple 4d2ca7d10f2206c3cb9a26c6b00a0361601a1fe2dddf102fbfd6d3dac0be10fe sphincs-shake-256s-robust 4ce4552e2e9b009a9016eb6dbcbefae3da2de151d61e2f392d4b9517eaeab91d sphincs-shake-256s-simple ================================================ FILE: benchmark.py ================================================ #! /usr/bin/env python3 import fileinput import itertools import os import sys from subprocess import DEVNULL, run implementations = [ ('ref', ['shake', 'sha2', 'haraka']), ('haraka-aesni', ['haraka']), ('shake-avx2', ['shake']), ('sha2-avx2', ['sha2']), ] options = ["f", "s"] sizes = [128, 192, 256] thashes = ['robust', 'simple'] for impl, fns in implementations: params = os.path.join(impl, "params.h") for fn in fns: for opt, size, thash in itertools.product(options, sizes, thashes): paramset = "sphincs-{}-{}{}".format(fn, size, opt) paramfile = "params-{}.h".format(paramset) print("Benchmarking", paramset, thash, "using", impl, flush=True) params = 'PARAMS={}'.format(paramset) # overrides Makefile var thash = 'THASH={}'.format(thash) # overrides Makefile var run(["make", "-C", impl, "clean", thash, params], stdout=DEVNULL, stderr=sys.stderr) run(["make", "-C", impl, "benchmarks", thash, params], stdout=DEVNULL, stderr=sys.stderr) run(["make", "-C", impl, "benchmark", thash, params], stdout=sys.stdout, stderr=sys.stderr) print(flush=True) ================================================ FILE: haraka-aesni/.gitignore ================================================ test/* !test/*.c PQCsignKAT_*.rsp PQCsignKAT_*.req PQCgenKAT_sign ================================================ FILE: haraka-aesni/Makefile ================================================ PARAMS = sphincs-haraka-128f THASH = robust CC = /usr/bin/gcc CFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -march=native -fomit-frame-pointer -flto -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS) SOURCES = hash_haraka.c hash_harakax4.c thash_haraka_$(THASH).c thash_haraka_$(THASH)x4.c address.c randombytes.c merkle.c wots.c utils.c utilsx4.c fors.c sign.c haraka.c HEADERS = params.h hash.h hashx4.h thash.h thashx4.h address.h randombytes.h merkle.c wots.h utils.h utilsx4.h fors.h api.h haraka.h harakax4.h DET_SOURCES = $(SOURCES:randombytes.%=rng.%) DET_HEADERS = $(HEADERS:randombytes.%=rng.%) TESTS = test/fors \ test/spx \ BENCHMARK = test/benchmark .PHONY: clean test benchmark default: PQCgenKAT_sign all: PQCgenKAT_sign tests benchmarks tests: $(TESTS) test: $(TESTS:=.exec) benchmarks: $(BENCHMARK) benchmark: $(BENCHMARK:=.exec) PQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS) $(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto test/%: test/%.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS) test/%.exec: test/% @$< clean: -$(RM) $(TESTS) -$(RM) $(BENCHMARK) -$(RM) PQCgenKAT_sign -$(RM) PQCsignKAT_*.rsp -$(RM) PQCsignKAT_*.req ================================================ FILE: haraka-aesni/context.h ================================================ #ifndef SPX_CONTEXT_H #define SPX_CONTEXT_H #include #include "params.h" #include "immintrin.h" typedef struct { uint8_t pub_seed[SPX_N]; uint8_t sk_seed[SPX_N]; __m128i rc[40]; } spx_ctx; #endif ================================================ FILE: haraka-aesni/haraka.c ================================================ /* Plain C implementation of the Haraka256 and Haraka512 permutations. */ #include #include #include #include #include #include "haraka.h" #include "harakax4.h" #include "utils.h" #define HARAKAS_RATE 32 #define u64 unsigned long #define u128 __m128i #define LOAD(src) _mm_loadu_si128((u128 *)(src)) #define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src) #define XOR128(a, b) _mm_xor_si128(a, b) #define AES2(s0, s1, rci) \ s0 = _mm_aesenc_si128(s0, *(rci)); \ s1 = _mm_aesenc_si128(s1, *(rci + 1)); \ s0 = _mm_aesenc_si128(s0, *(rci + 2)); \ s1 = _mm_aesenc_si128(s1, *(rci + 3)); #define AES2_4x(s0, s1, s2, s3, rci) \ AES2(s0[0], s0[1], rci); \ AES2(s1[0], s1[1], rci); \ AES2(s2[0], s2[1], rci); \ AES2(s3[0], s3[1], rci); #define AES4(s0, s1, s2, s3, rci) \ s0 = _mm_aesenc_si128(s0, *(rci)); \ s1 = _mm_aesenc_si128(s1, *(rci + 1)); \ s2 = _mm_aesenc_si128(s2, *(rci + 2)); \ s3 = _mm_aesenc_si128(s3, *(rci + 3)); \ s0 = _mm_aesenc_si128(s0, *(rci + 4)); \ s1 = _mm_aesenc_si128(s1, *(rci + 5)); \ s2 = _mm_aesenc_si128(s2, *(rci + 6)); \ s3 = _mm_aesenc_si128(s3, *(rci + 7)); #define AES4_4x(s0, s1, s2, s3, rci) \ AES4(s0[0], s0[1], s0[2], s0[3], rci); \ AES4(s1[0], s1[1], s1[2], s1[3], rci); \ AES4(s2[0], s2[1], s2[2], s2[3], rci); \ AES4(s3[0], s3[1], s3[2], s3[3], rci); #define MIX2(s0, s1) \ tmp = _mm_unpacklo_epi32(s0, s1); \ s1 = _mm_unpackhi_epi32(s0, s1); \ s0 = tmp; #define MIX4(s0, s1, s2, s3) \ tmp = _mm_unpacklo_epi32(s0, s1); \ s0 = _mm_unpackhi_epi32(s0, s1); \ s1 = _mm_unpacklo_epi32(s2, s3); \ s2 = _mm_unpackhi_epi32(s2, s3); \ s3 = _mm_unpacklo_epi32(s0, s2); \ s0 = _mm_unpackhi_epi32(s0, s2); \ s2 = _mm_unpackhi_epi32(s1, tmp); \ s1 = _mm_unpacklo_epi32(s1, tmp); #define TRUNCSTORE(out, s0, s1, s2, s3) \ _mm_storeu_si128((u128 *)out, \ _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(s0), _mm_castsi128_pd(s1), 3))); \ _mm_storeu_si128((u128 *)(out + 16), \ _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(s2), _mm_castsi128_pd(s3), 0))); static void load_haraka_constants(u128 *rc) { rc[0] = _mm_set_epi32(0x0684704c,0xe620c00a,0xb2c5fef0,0x75817b9d); rc[1] = _mm_set_epi32(0x8b66b4e1,0x88f3a06b,0x640f6ba4,0x2f08f717); rc[2] = _mm_set_epi32(0x3402de2d,0x53f28498,0xcf029d60,0x9f029114); rc[3] = _mm_set_epi32(0x0ed6eae6,0x2e7b4f08,0xbbf3bcaf,0xfd5b4f79); rc[4] = _mm_set_epi32(0xcbcfb0cb,0x4872448b,0x79eecd1c,0xbe397044); rc[5] = _mm_set_epi32(0x7eeacdee,0x6e9032b7,0x8d5335ed,0x2b8a057b); rc[6] = _mm_set_epi32(0x67c28f43,0x5e2e7cd0,0xe2412761,0xda4fef1b); rc[7] = _mm_set_epi32(0x2924d9b0,0xafcacc07,0x675ffde2,0x1fc70b3b); rc[8] = _mm_set_epi32(0xab4d63f1,0xe6867fe9,0xecdb8fca,0xb9d465ee); rc[9] = _mm_set_epi32(0x1c30bf84,0xd4b7cd64,0x5b2a404f,0xad037e33); rc[10] = _mm_set_epi32(0xb2cc0bb9,0x941723bf,0x69028b2e,0x8df69800); rc[11] = _mm_set_epi32(0xfa0478a6,0xde6f5572,0x4aaa9ec8,0x5c9d2d8a); rc[12] = _mm_set_epi32(0xdfb49f2b,0x6b772a12,0x0efa4f2e,0x29129fd4); rc[13] = _mm_set_epi32(0x1ea10344,0xf449a236,0x32d611ae,0xbb6a12ee); rc[14] = _mm_set_epi32(0xaf044988,0x4b050084,0x5f9600c9,0x9ca8eca6); rc[15] = _mm_set_epi32(0x21025ed8,0x9d199c4f,0x78a2c7e3,0x27e593ec); rc[16] = _mm_set_epi32(0xbf3aaaf8,0xa759c9b7,0xb9282ecd,0x82d40173); rc[17] = _mm_set_epi32(0x6260700d,0x6186b017,0x37f2efd9,0x10307d6b); rc[18] = _mm_set_epi32(0x5aca45c2,0x21300443,0x81c29153,0xf6fc9ac6); rc[19] = _mm_set_epi32(0x9223973c,0x226b68bb,0x2caf92e8,0x36d1943a); rc[20] = _mm_set_epi32(0xd3bf9238,0x225886eb,0x6cbab958,0xe51071b4); rc[21] = _mm_set_epi32(0xdb863ce5,0xaef0c677,0x933dfddd,0x24e1128d); rc[22] = _mm_set_epi32(0xbb606268,0xffeba09c,0x83e48de3,0xcb2212b1); rc[23] = _mm_set_epi32(0x734bd3dc,0xe2e4d19c,0x2db91a4e,0xc72bf77d); rc[24] = _mm_set_epi32(0x43bb47c3,0x61301b43,0x4b1415c4,0x2cb3924e); rc[25] = _mm_set_epi32(0xdba775a8,0xe707eff6,0x03b231dd,0x16eb6899); rc[26] = _mm_set_epi32(0x6df3614b,0x3c755977,0x8e5e2302,0x7eca472c); rc[27] = _mm_set_epi32(0xcda75a17,0xd6de7d77,0x6d1be5b9,0xb88617f9); rc[28] = _mm_set_epi32(0xec6b43f0,0x6ba8e9aa,0x9d6c069d,0xa946ee5d); rc[29] = _mm_set_epi32(0xcb1e6950,0xf957332b,0xa2531159,0x3bf327c1); rc[30] = _mm_set_epi32(0x2cee0c75,0x00da619c,0xe4ed0353,0x600ed0d9); rc[31] = _mm_set_epi32(0xf0b1a5a1,0x96e90cab,0x80bbbabc,0x63a4a350); rc[32] = _mm_set_epi32(0xae3db102,0x5e962988,0xab0dde30,0x938dca39); rc[33] = _mm_set_epi32(0x17bb8f38,0xd554a40b,0x8814f3a8,0x2e75b442); rc[34] = _mm_set_epi32(0x34bb8a5b,0x5f427fd7,0xaeb6b779,0x360a16f6); rc[35] = _mm_set_epi32(0x26f65241,0xcbe55438,0x43ce5918,0xffbaafde); rc[36] = _mm_set_epi32(0x4ce99a54,0xb9f3026a,0xa2ca9cf7,0x839ec978); rc[37] = _mm_set_epi32(0xae51a51a,0x1bdff7be,0x40c06e28,0x22901235); rc[38] = _mm_set_epi32(0xa0c1613c,0xba7ed22b,0xc173bc0f,0x48a659cf); rc[39] = _mm_set_epi32(0x756acc03,0x02288288,0x4ad6bdfd,0xe9c59da1); } void tweak_constants(spx_ctx *ctx) { int i; unsigned char buf[40*16]; /* Use the standard constants to generate tweaked ones. */ load_haraka_constants(ctx->rc); /* Constants for pk.seed */ haraka_S(buf, 40*16, ctx->pub_seed, SPX_N, ctx); /* Tweak constants with the pub_seed */ for (i = 0; i < 40; i++) { ctx->rc[i] = LOAD(buf + i*16); } } static void haraka_S_absorb(unsigned char *s, unsigned int r, const unsigned char *m, unsigned long long mlen, unsigned char p, const spx_ctx *ctx) { unsigned long long i; SPX_VLA(unsigned char, t, r); while (mlen >= r) { // XOR block to state STORE(s, XOR128(LOAD(s), LOAD(m))); STORE(s + 16, XOR128(LOAD(s + 16), LOAD(m + 16))); haraka512_perm(s, s, ctx); mlen -= r; m += r; } for (i = 0; i < r; ++i) { t[i] = 0; } for (i = 0; i < mlen; ++i) { t[i] = m[i]; } t[i] = p; t[r - 1] |= 128; STORE(s, XOR128(LOAD(s), LOAD(t))); STORE(s + 16, XOR128(LOAD(s + 16), LOAD(t + 16))); } static void haraka_S_absorb4x(unsigned char *s, unsigned int r, const unsigned char *m0, const unsigned char *m1, const unsigned char *m2, const unsigned char *m3, unsigned long long int mlen, unsigned char p, const spx_ctx *ctx) { unsigned long long i; SPX_VLA(unsigned char, t0, r); SPX_VLA(unsigned char, t1, r); SPX_VLA(unsigned char, t2, r); SPX_VLA(unsigned char, t3, r); while (mlen >= r) { // XOR block to state STORE(s, XOR128(LOAD(s), LOAD(m0))); STORE(s + 16, XOR128(LOAD(s + 16), LOAD(m0 + 16))); STORE(s + 64, XOR128(LOAD(s + 64), LOAD(m1))); STORE(s + 80, XOR128(LOAD(s + 80), LOAD(m1 + 16))); STORE(s + 128, XOR128(LOAD(s + 128), LOAD(m2))); STORE(s + 144, XOR128(LOAD(s + 144), LOAD(m2 + 16))); STORE(s + 192, XOR128(LOAD(s + 192), LOAD(m3))); STORE(s + 208, XOR128(LOAD(s + 208), LOAD(m3 + 16))); haraka512_perm_x4(s, s, ctx); mlen -= r; m0 += r; m1 += r; m2 += r; m3 += r; } for (i = 0; i < r; ++i) { t0[i] = 0; t1[i] = 0; t2[i] = 0; t3[i] = 0; } for (i = 0; i < mlen; ++i) { t0[i] = m0[i]; t1[i] = m1[i]; t2[i] = m2[i]; t3[i] = m3[i]; } t0[i] = p; t1[i] = p; t2[i] = p; t3[i] = p; t0[r - 1] |= 128; t1[r - 1] |= 128; t2[r - 1] |= 128; t3[r - 1] |= 128; STORE(s, XOR128(LOAD(s), LOAD(t0))); STORE(s + 16, XOR128(LOAD(s + 16), LOAD(t0 + 16))); STORE(s + 64, XOR128(LOAD(s + 64), LOAD(t1))); STORE(s + 80, XOR128(LOAD(s + 80), LOAD(t1 + 16))); STORE(s + 128, XOR128(LOAD(s + 128), LOAD(t2))); STORE(s + 144, XOR128(LOAD(s + 144), LOAD(t2 + 16))); STORE(s + 192, XOR128(LOAD(s + 192), LOAD(t3))); STORE(s + 208, XOR128(LOAD(s + 208), LOAD(t3 + 16))); } static void haraka_S_squeezeblocks(unsigned char *h, unsigned long long nblocks, unsigned char *s, unsigned int r, const spx_ctx *ctx) { while (nblocks > 0) { haraka512_perm(s, s, ctx); STORE(h, LOAD(s)); STORE(h + 16, LOAD(s + 16)); h += r; nblocks--; } } static void haraka_S_squeezeblocks4x(unsigned char *h0, unsigned char *h1, unsigned char *h2, unsigned char *h3, unsigned long long nblocks, unsigned char *s, unsigned int r, const spx_ctx *ctx) { while (nblocks > 0) { haraka512_perm_x4(s, s, ctx); STORE(h0, LOAD(s)); STORE(h0 + 16, LOAD(s + 16)); STORE(h1, LOAD(s + 64)); STORE(h1 + 16, LOAD(s + 80)); STORE(h2, LOAD(s + 128)); STORE(h2 + 16, LOAD(s + 144)); STORE(h3, LOAD(s + 192)); STORE(h3 + 16, LOAD(s + 208)); h0 += r; h1 += r; h2 += r; h3 += r; nblocks--; } } void haraka_S_inc_init(uint8_t *s_inc) { size_t i; for (i = 0; i < 64; i++) { s_inc[i] = 0; } s_inc[64] = 0; } void haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen, const spx_ctx *ctx) { size_t i; /* Recall that s_inc[64] is the non-absorbed bytes xored into the state */ while (mlen + s_inc[64] >= HARAKAS_RATE) { for (i = 0; i < (size_t)(HARAKAS_RATE - s_inc[64]); i++) { /* Take the i'th byte from message xor with the s_inc[64] + i'th byte of the state */ s_inc[s_inc[64] + i] ^= m[i]; } mlen -= (size_t)(HARAKAS_RATE - s_inc[64]); m += HARAKAS_RATE - s_inc[64]; s_inc[64] = 0; haraka512_perm(s_inc, s_inc, ctx); } for (i = 0; i < mlen; i++) { s_inc[s_inc[64] + i] ^= m[i]; } s_inc[64] += mlen; } void haraka_S_inc_finalize(uint8_t *s_inc) { /* After haraka_S_inc_absorb, we are guaranteed that s_inc[64] < HARAKAS_RATE, so we can always use one more byte for p in the current state. */ s_inc[s_inc[64]] ^= 0x1F; s_inc[HARAKAS_RATE - 1] ^= 128; s_inc[64] = 0; } void haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc, const spx_ctx *ctx) { size_t i; /* First consume any bytes we still have sitting around */ for (i = 0; i < outlen && i < s_inc[64]; i++) { /* There are s_inc[64] bytes left, so r - s_inc[64] is the first available byte. We consume from there, i.e., up to r. */ out[i] = (uint8_t)s_inc[(HARAKAS_RATE - s_inc[64] + i)]; } out += i; outlen -= i; s_inc[64] -= i; /* Then squeeze the remaining necessary blocks */ while (outlen > 0) { haraka512_perm(s_inc, s_inc, ctx); for (i = 0; i < outlen && i < HARAKAS_RATE; i++) { out[i] = s_inc[i]; } out += i; outlen -= i; s_inc[64] = HARAKAS_RATE - i; } } void haraka_S(unsigned char *out, unsigned long long outlen, const unsigned char *in, unsigned long long inlen, const spx_ctx *ctx) { unsigned long long i; unsigned char s[64]; unsigned char d[32]; for (i = 0; i < 64; i++) { s[i] = 0; } haraka_S_absorb(s, HARAKAS_RATE, in, inlen, 0x1F, ctx); haraka_S_squeezeblocks(out, outlen / HARAKAS_RATE, s, HARAKAS_RATE, ctx); out += (outlen / HARAKAS_RATE) * HARAKAS_RATE; if (outlen % HARAKAS_RATE) { haraka_S_squeezeblocks(d, 1, s, HARAKAS_RATE, ctx); for (i = 0; i < outlen % HARAKAS_RATE; i++) { out[i] = d[i]; } } } void haraka_Sx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned long long outlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long long inlen, const spx_ctx *ctx) { unsigned long long i; unsigned char s[64 * 4]; unsigned char d0[32]; unsigned char d1[32]; unsigned char d2[32]; unsigned char d3[32]; for (i = 0; i < 64 * 4; i++) { s[i] = 0; } haraka_S_absorb4x(s, HARAKAS_RATE, in0, in1, in2, in3, inlen, 0x1F, ctx); haraka_S_squeezeblocks4x(out0, out1, out2, out3, outlen / HARAKAS_RATE, s, HARAKAS_RATE, ctx); out0 += (outlen / HARAKAS_RATE) * HARAKAS_RATE; out1 += (outlen / HARAKAS_RATE) * HARAKAS_RATE; out2 += (outlen / HARAKAS_RATE) * HARAKAS_RATE; out3 += (outlen / HARAKAS_RATE) * HARAKAS_RATE; if (outlen % HARAKAS_RATE) { haraka_S_squeezeblocks4x(d0, d1, d2, d3, 1, s, HARAKAS_RATE, ctx); for (i = 0; i < outlen % HARAKAS_RATE; i++) { out0[i] = d0[i]; out1[i] = d1[i]; out2[i] = d2[i]; out3[i] = d3[i]; } } } void haraka512_perm(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { u128 s[4], tmp; s[0] = LOAD(in); s[1] = LOAD(in + 16); s[2] = LOAD(in + 32); s[3] = LOAD(in + 48); AES4(s[0], s[1], s[2], s[3], ctx->rc); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 8); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 16); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 24); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 32); MIX4(s[0], s[1], s[2], s[3]); STORE(out, s[0]); STORE(out + 16, s[1]); STORE(out + 32, s[2]); STORE(out + 48, s[3]); } void haraka512_perm_x4(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { u128 s[4][4], tmp; s[0][0] = LOAD(in); s[0][1] = LOAD(in + 16); s[0][2] = LOAD(in + 32); s[0][3] = LOAD(in + 48); s[1][0] = LOAD(in + 64); s[1][1] = LOAD(in + 80); s[1][2] = LOAD(in + 96); s[1][3] = LOAD(in + 112); s[2][0] = LOAD(in + 128); s[2][1] = LOAD(in + 144); s[2][2] = LOAD(in + 160); s[2][3] = LOAD(in + 176); s[3][0] = LOAD(in + 192); s[3][1] = LOAD(in + 208); s[3][2] = LOAD(in + 224); s[3][3] = LOAD(in + 240); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 8); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 16); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 24); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 32); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); STORE(out, s[0][0]); STORE(out + 16, s[0][1]); STORE(out + 32, s[0][2]); STORE(out + 48, s[0][3]); STORE(out + 64, s[1][0]); STORE(out + 80, s[1][1]); STORE(out + 96, s[1][2]); STORE(out + 112, s[1][3]); STORE(out + 128, s[2][0]); STORE(out + 144, s[2][1]); STORE(out + 160, s[2][2]); STORE(out + 176, s[2][3]); STORE(out + 192, s[3][0]); STORE(out + 208, s[3][1]); STORE(out + 224, s[3][2]); STORE(out + 240, s[3][3]); } void haraka512(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { u128 s[4], tmp; s[0] = LOAD(in); s[1] = LOAD(in + 16); s[2] = LOAD(in + 32); s[3] = LOAD(in + 48); AES4(s[0], s[1], s[2], s[3], ctx->rc); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 8); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 16); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 24); MIX4(s[0], s[1], s[2], s[3]); AES4(s[0], s[1], s[2], s[3], ctx->rc + 32); MIX4(s[0], s[1], s[2], s[3]); s[0] = XOR128(s[0], LOAD(in)); s[1] = XOR128(s[1], LOAD(in + 16)); s[2] = XOR128(s[2], LOAD(in + 32)); s[3] = XOR128(s[3], LOAD(in + 48)); // truncate and store result TRUNCSTORE(out, s[0], s[1], s[2], s[3]); } void haraka512x4(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { u128 s[4][4], tmp; s[0][0] = LOAD(in); s[0][1] = LOAD(in + 16); s[0][2] = LOAD(in + 32); s[0][3] = LOAD(in + 48); s[1][0] = LOAD(in + 64); s[1][1] = LOAD(in + 80); s[1][2] = LOAD(in + 96); s[1][3] = LOAD(in + 112); s[2][0] = LOAD(in + 128); s[2][1] = LOAD(in + 144); s[2][2] = LOAD(in + 160); s[2][3] = LOAD(in + 176); s[3][0] = LOAD(in + 192); s[3][1] = LOAD(in + 208); s[3][2] = LOAD(in + 224); s[3][3] = LOAD(in + 240); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 8); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 16); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 24); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 32); MIX4(s[0][0], s[0][1], s[0][2], s[0][3]); MIX4(s[1][0], s[1][1], s[1][2], s[1][3]); MIX4(s[2][0], s[2][1], s[2][2], s[2][3]); MIX4(s[3][0], s[3][1], s[3][2], s[3][3]); s[0][0] = XOR128(s[0][0], LOAD(in)); s[0][1] = XOR128(s[0][1], LOAD(in + 16)); s[0][2] = XOR128(s[0][2], LOAD(in + 32)); s[0][3] = XOR128(s[0][3], LOAD(in + 48)); s[1][0] = XOR128(s[1][0], LOAD(in + 64)); s[1][1] = XOR128(s[1][1], LOAD(in + 80)); s[1][2] = XOR128(s[1][2], LOAD(in + 96)); s[1][3] = XOR128(s[1][3], LOAD(in + 112)); s[2][0] = XOR128(s[2][0], LOAD(in + 128)); s[2][1] = XOR128(s[2][1], LOAD(in + 144)); s[2][2] = XOR128(s[2][2], LOAD(in + 160)); s[2][3] = XOR128(s[2][3], LOAD(in + 176)); s[3][0] = XOR128(s[3][0], LOAD(in + 192)); s[3][1] = XOR128(s[3][1], LOAD(in + 208)); s[3][2] = XOR128(s[3][2], LOAD(in + 224)); s[3][3] = XOR128(s[3][3], LOAD(in + 240)); TRUNCSTORE(out, s[0][0], s[0][1], s[0][2], s[0][3]); TRUNCSTORE((out + 32), s[1][0], s[1][1], s[1][2], s[1][3]); TRUNCSTORE((out + 64), s[2][0], s[2][1], s[2][2], s[2][3]); TRUNCSTORE((out + 96), s[3][0], s[3][1], s[3][2], s[3][3]); } void haraka256(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { u128 s[2], tmp; s[0] = LOAD(in); s[1] = LOAD(in + 16); AES2(s[0], s[1], ctx->rc); MIX2(s[0], s[1]); AES2(s[0], s[1], ctx->rc + 4); MIX2(s[0], s[1]); AES2(s[0], s[1], ctx->rc + 8); MIX2(s[0], s[1]); AES2(s[0], s[1], ctx->rc + 12); MIX2(s[0], s[1]); AES2(s[0], s[1], ctx->rc + 16); MIX2(s[0], s[1]); s[0] = XOR128(s[0], LOAD(in)); s[1] = XOR128(s[1], LOAD(in + 16)); STORE(out, s[0]); STORE(out + 16, s[1]); } void haraka256x4(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { u128 s[4][2], tmp; s[0][0] = LOAD(in); s[0][1] = LOAD(in + 16); s[1][0] = LOAD(in + 32); s[1][1] = LOAD(in + 48); s[2][0] = LOAD(in + 64); s[2][1] = LOAD(in + 80); s[3][0] = LOAD(in + 96); s[3][1] = LOAD(in + 112); // Round 1 AES2_4x(s[0], s[1], s[2], s[3], ctx->rc); MIX2(s[0][0], s[0][1]); MIX2(s[1][0], s[1][1]); MIX2(s[2][0], s[2][1]); MIX2(s[3][0], s[3][1]); // Round 2 AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 4); MIX2(s[0][0], s[0][1]); MIX2(s[1][0], s[1][1]); MIX2(s[2][0], s[2][1]); MIX2(s[3][0], s[3][1]); // Round 3 AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 8); MIX2(s[0][0], s[0][1]); MIX2(s[1][0], s[1][1]); MIX2(s[2][0], s[2][1]); MIX2(s[3][0], s[3][1]); // Round 4 AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 12); MIX2(s[0][0], s[0][1]); MIX2(s[1][0], s[1][1]); MIX2(s[2][0], s[2][1]); MIX2(s[3][0], s[3][1]); // Round 5 AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 16); MIX2(s[0][0], s[0][1]); MIX2(s[1][0], s[1][1]); MIX2(s[2][0], s[2][1]); MIX2(s[3][0], s[3][1]); // Feed Forward s[0][0] = _mm_xor_si128(s[0][0], LOAD(in)); s[0][1] = _mm_xor_si128(s[0][1], LOAD(in + 16)); s[1][0] = _mm_xor_si128(s[1][0], LOAD(in + 32)); s[1][1] = _mm_xor_si128(s[1][1], LOAD(in + 48)); s[2][0] = _mm_xor_si128(s[2][0], LOAD(in + 64)); s[2][1] = _mm_xor_si128(s[2][1], LOAD(in + 80)); s[3][0] = _mm_xor_si128(s[3][0], LOAD(in + 96)); s[3][1] = _mm_xor_si128(s[3][1], LOAD(in + 112)); STORE(out, s[0][0]); STORE(out + 16, s[0][1]); STORE(out + 32, s[1][0]); STORE(out + 48, s[1][1]); STORE(out + 64, s[2][0]); STORE(out + 80, s[2][1]); STORE(out + 96, s[3][0]); STORE(out + 112, s[3][1]); } ================================================ FILE: haraka-aesni/harakax4.h ================================================ #ifndef SPX_HARAKAX4_H #define SPX_HARAKAX4_H #include "context.h" #include "params.h" /* Haraka Sponge */ #define haraka_Sx4 SPX_NAMESPACE(haraka_Sx4) void haraka_Sx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned long long outlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long long inlen, const spx_ctx *ctx); /* Applies the 512-bit Haraka permutation x4 to in. */ #define haraka512_perm_x4 SPX_NAMESPACE(haraka512_perm_x4) void haraka512_perm_x4(unsigned char *out, const unsigned char *in, const spx_ctx *ctx); /* Implementation of Haraka-512 x4*/ #define haraka512x4 SPX_NAMESPACE(haraka512x4) void haraka512x4(unsigned char *out, const unsigned char *in, const spx_ctx *ctx); /* Implementation of Haraka-256 x4 */ #define haraka256x4 SPX_NAMESPACE(haraka256x4) void haraka256x4(unsigned char *out, const unsigned char *in, const spx_ctx *ctx); #endif ================================================ FILE: haraka-aesni/hash_harakax4.c ================================================ #include #include #include "address.h" #include "params.h" #include "harakax4.h" #include "hashx4.h" /* * 4-way parallel version of prf_addr; takes 4x as much input and output */ #define prf_addrx4 SPX_NAMESPACE(prf_addrx4) void prf_addrx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const spx_ctx *ctx, const uint32_t addrx4[4*8]) { unsigned char bufx4[4 * 64] = {0}; /* Since SPX_N may be smaller than 32, we need temporary buffers. */ unsigned char outbuf[4 * 32]; unsigned int i; for (i = 0; i < 4; i++) { memcpy(bufx4 + i*64, addrx4 + i*8, SPX_ADDR_BYTES); memcpy(bufx4 + i*64 + SPX_ADDR_BYTES, ctx->sk_seed, SPX_N); } haraka512x4(outbuf, bufx4, ctx); memcpy(out0, outbuf, SPX_N); memcpy(out1, outbuf + 32, SPX_N); memcpy(out2, outbuf + 64, SPX_N); memcpy(out3, outbuf + 96, SPX_N); } ================================================ FILE: haraka-aesni/test/benchmark.c ================================================ #define _POSIX_C_SOURCE 199309L #include #include #include #include "../api.h" #include "../fors.h" #include "../wotsx4.h" #include "../params.h" #include "../randombytes.h" #define SPX_MLEN 32 #define NTESTS 10 static void wots_gen_pkx4(unsigned char* pk, const spx_ctx *ctx, uint32_t addr[8]); static int cmp_llu(const void *a, const void*b) { if(*(unsigned long long *)a < *(unsigned long long *)b) return -1; if(*(unsigned long long *)a > *(unsigned long long *)b) return 1; return 0; } static unsigned long long median(unsigned long long *l, size_t llen) { qsort(l,llen,sizeof(unsigned long long),cmp_llu); if(llen%2) return l[llen/2]; else return (l[llen/2-1]+l[llen/2])/2; } static void delta(unsigned long long *l, size_t llen) { unsigned int i; for(i = 0; i < llen - 1; i++) { l[i] = l[i+1] - l[i]; } } static unsigned long long cpucycles(void) { unsigned long long result; __asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } static void printfcomma (unsigned long long n) { if (n < 1000) { printf("%llu", n); return; } printfcomma(n / 1000); printf (",%03llu", n % 1000); } static void printfalignedcomma (unsigned long long n, int len) { unsigned long long ncopy = n; int i = 0; while (ncopy > 9) { len -= 1; ncopy /= 10; i += 1; // to account for commas } i = i/3 - 1; // to account for commas for (; i < len; i++) { printf(" "); } printfcomma(n); } static void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul) { unsigned long long med; result /= NTESTS; delta(l, NTESTS + 1); med = median(l, llen); printf("avg. %11.2lf us (%2.2lf sec); median ", result, result / 1e6); printfalignedcomma(med, 12); printf(" cycles, %5llux: ", mul); printfalignedcomma(mul*med, 12); printf(" cycles\n"); } #define MEASURE(TEXT, MUL, FNCALL)\ printf(TEXT);\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\ for(i = 0; i < NTESTS; i++) {\ t[i] = cpucycles();\ FNCALL;\ }\ t[NTESTS] = cpucycles();\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\ result = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) / 1e3;\ display_result(result, t, NTESTS, MUL); int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); spx_ctx ctx; unsigned char pk[SPX_PK_BYTES]; unsigned char sk[SPX_SK_BYTES]; unsigned char *m = malloc(SPX_MLEN); unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN); unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN); unsigned char fors_pk[SPX_FORS_PK_BYTES]; unsigned char fors_m[SPX_FORS_MSG_BYTES]; unsigned char fors_sig[SPX_FORS_BYTES]; unsigned char addr[SPX_ADDR_BYTES]; unsigned char wots_pk[SPX_WOTS_PK_BYTES]; unsigned long long smlen; unsigned long long mlen; unsigned long long t[NTESTS+1]; struct timespec start, stop; double result; int i; randombytes(m, SPX_MLEN); randombytes(addr, SPX_ADDR_BYTES); printf("Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\n", SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES, SPX_WOTS_W); printf("Running %d iterations.\n", NTESTS); MEASURE("Generating keypair.. ", 1, crypto_sign_keypair(pk, sk)); MEASURE(" - WOTS pk gen.. ", (1 << SPX_TREE_HEIGHT), wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Signing.. ", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk)); MEASURE(" - FORS signing.. ", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr)); MEASURE(" - WOTS pk gen.. ", SPX_D * (1 << SPX_TREE_HEIGHT), wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Verifying.. ", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk)); printf("Signature size: %d (%.2f KiB)\n", SPX_BYTES, SPX_BYTES / 1024.0); printf("Public key size: %d (%.2f KiB)\n", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0); printf("Secret key size: %d (%.2f KiB)\n", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0); free(m); free(sm); free(mout); return 0; } static void wots_gen_pkx4(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) { struct leaf_info_x4 leaf; unsigned steps[ SPX_WOTS_LEN ] = { 0 }; INITIALIZE_LEAF_INFO_X4(leaf, addr, steps); wots_gen_leafx4(pk, ctx, 0, &leaf); } ================================================ FILE: haraka-aesni/thash_haraka_robustx4.c ================================================ #include #include #include "thashx4.h" #include "address.h" #include "params.h" #include "utils.h" #include "harakax4.h" /** * 4-way parallel version of thash; takes 4x as much input and output */ #define thashx4 SPX_NAMESPACE(thashx4) void thashx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx4[4*8]) { SPX_VLA(unsigned char, buf0, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf1, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf2, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf3, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, bitmask0, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask1, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask2, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask3, inblocks * SPX_N); unsigned char outbuf[32 * 4]; unsigned char buf_tmp[64 * 4]; unsigned int i; if (inblocks == 1) { memset(buf_tmp, 0, 64 * 4); // Generate masks first in buffer memcpy(buf_tmp, addrx4 + 0*8, 32); memcpy(buf_tmp + 32, addrx4 + 1*8, 32); memcpy(buf_tmp + 64, addrx4 + 2*8, 32); memcpy(buf_tmp + 96, addrx4 + 3*8, 32); haraka256x4(outbuf, buf_tmp, ctx); /* move addresses to make room for inputs; zero old values */ memcpy(buf_tmp + 192, buf_tmp + 96, SPX_ADDR_BYTES); memcpy(buf_tmp + 128, buf_tmp + 64, SPX_ADDR_BYTES); memcpy(buf_tmp + 64, buf_tmp + 32, SPX_ADDR_BYTES); /* skip memcpy(buf_tmp, buf_tmp, SPX_ADDR_BYTES); already in place */ /* skip memset(buf_tmp, 0, SPX_ADDR_BYTES); remained untouched */ memset(buf_tmp + 32, 0, SPX_ADDR_BYTES); /* skip memset(buf_tmp + 64, 0, SPX_ADDR_BYTES); contains addr1 */ memset(buf_tmp + 96, 0, SPX_ADDR_BYTES); for (i = 0; i < SPX_N; i++) { buf_tmp[SPX_ADDR_BYTES + i] = in0[i] ^ outbuf[i]; buf_tmp[SPX_ADDR_BYTES + i + 64] = in1[i] ^ outbuf[i + 32]; buf_tmp[SPX_ADDR_BYTES + i + 128] = in2[i] ^ outbuf[i + 64]; buf_tmp[SPX_ADDR_BYTES + i + 192] = in3[i] ^ outbuf[i + 96]; } haraka512x4(outbuf, buf_tmp, ctx); memcpy(out0, outbuf, SPX_N); memcpy(out1, outbuf + 32, SPX_N); memcpy(out2, outbuf + 64, SPX_N); memcpy(out3, outbuf + 96, SPX_N); } else { /* All other tweakable hashes*/ memcpy(buf0, addrx4 + 0*8, 32); memcpy(buf1, addrx4 + 1*8, 32); memcpy(buf2, addrx4 + 2*8, 32); memcpy(buf3, addrx4 + 3*8, 32); haraka_Sx4(bitmask0, bitmask1, bitmask2, bitmask3, inblocks * SPX_N, buf0, buf1, buf2, buf3, SPX_ADDR_BYTES, ctx); for (i = 0; i < inblocks * SPX_N; i++) { buf0[SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; buf1[SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; buf2[SPX_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; buf3[SPX_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; } haraka_Sx4(out0, out1, out2, out3, SPX_N, buf0, buf1, buf2, buf3, SPX_ADDR_BYTES + inblocks*SPX_N, ctx); } } ================================================ FILE: haraka-aesni/thash_haraka_simplex4.c ================================================ #include #include #include "thashx4.h" #include "address.h" #include "params.h" #include "utils.h" #include "harakax4.h" /** * 4-way parallel version of thash; takes 4x as much input and output */ #define thashx4 SPX_NAMESPACE(thashx4) void thashx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx4[4*8]) { SPX_VLA(unsigned char, buf0, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf1, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf2, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf3, SPX_ADDR_BYTES + inblocks*SPX_N); unsigned char outbuf[32 * 4]; unsigned char buf_tmp[64 * 4]; if (inblocks == 1) { memset(buf_tmp, 0, 64 * 4); memcpy(buf_tmp, addrx4 + 0*8, 32); memcpy(buf_tmp + 64, addrx4 + 1*8, 32); memcpy(buf_tmp + 128, addrx4 + 2*8, 32); memcpy(buf_tmp + 192, addrx4 + 3*8, 32); memcpy(buf_tmp + SPX_ADDR_BYTES, in0, SPX_N); memcpy(buf_tmp + SPX_ADDR_BYTES + 64, in1, SPX_N); memcpy(buf_tmp + SPX_ADDR_BYTES + 128, in2, SPX_N); memcpy(buf_tmp + SPX_ADDR_BYTES + 192, in3, SPX_N); haraka512x4(outbuf, buf_tmp, ctx); memcpy(out0, outbuf, SPX_N); memcpy(out1, outbuf + 32, SPX_N); memcpy(out2, outbuf + 64, SPX_N); memcpy(out3, outbuf + 96, SPX_N); } else { /* All other tweakable hashes*/ memcpy(buf0, addrx4 + 0*8, 32); memcpy(buf1, addrx4 + 1*8, 32); memcpy(buf2, addrx4 + 2*8, 32); memcpy(buf3, addrx4 + 3*8, 32); memcpy(buf0 + SPX_ADDR_BYTES, in0, inblocks * SPX_N); memcpy(buf1 + SPX_ADDR_BYTES, in1, inblocks * SPX_N); memcpy(buf2 + SPX_ADDR_BYTES, in2, inblocks * SPX_N); memcpy(buf3 + SPX_ADDR_BYTES, in3, inblocks * SPX_N); haraka_Sx4(out0, out1, out2, out3, SPX_N, buf0, buf1, buf2, buf3, SPX_ADDR_BYTES + inblocks*SPX_N, ctx); } } ================================================ FILE: ref/.gitignore ================================================ test/* !test/*.c PQCsignKAT_*.rsp PQCsignKAT_*.req PQCgenKAT_sign ================================================ FILE: ref/Makefile ================================================ PARAMS = sphincs-haraka-128f THASH = robust CC=/usr/bin/gcc CFLAGS=-Wall -Wextra -Wpedantic -O3 -std=c99 -Wconversion -Wmissing-prototypes -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS) SOURCES = address.c randombytes.c merkle.c wots.c wotsx1.c utils.c utilsx1.c fors.c sign.c HEADERS = params.h address.h randombytes.h merkle.h wots.h wotsx1.h utils.h utilsx1.h fors.h api.h hash.h thash.h ifneq (,$(findstring shake,$(PARAMS))) SOURCES += fips202.c hash_shake.c thash_shake_$(THASH).c HEADERS += fips202.h endif ifneq (,$(findstring haraka,$(PARAMS))) SOURCES += haraka.c hash_haraka.c thash_haraka_$(THASH).c HEADERS += haraka.h endif ifneq (,$(findstring sha2,$(PARAMS))) SOURCES += sha2.c hash_sha2.c thash_sha2_$(THASH).c HEADERS += sha2.h endif DET_SOURCES = $(SOURCES:randombytes.%=rng.%) DET_HEADERS = $(HEADERS:randombytes.%=rng.%) TESTS = test/fors \ test/spx \ BENCHMARK = test/benchmark .PHONY: clean test benchmark default: PQCgenKAT_sign all: PQCgenKAT_sign tests benchmarks tests: $(TESTS) test: $(TESTS:=.exec) benchmarks: $(BENCHMARK) benchmark: $(BENCHMARK:=.exec) PQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS) $(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto test/benchmark: test/benchmark.c test/cycles.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ test/cycles.c $(SOURCES) $< $(LDLIBS) test/%: test/%.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS) test/haraka: test/haraka.c $(filter-out haraka.c,$(SOURCES)) $(HEADERS) $(CC) $(CFLAGS) -o $@ $(filter-out haraka.c,$(SOURCES)) $< $(LDLIBS) test/%.exec: test/% @$< clean: -$(RM) $(TESTS) -$(RM) $(BENCHMARK) -$(RM) PQCgenKAT_sign -$(RM) PQCsignKAT_*.rsp -$(RM) PQCsignKAT_*.req ================================================ FILE: ref/PQCgenKAT_sign.c ================================================ // // PQCgenKAT_sign.c // // Created by Bassham, Lawrence E (Fed) on 8/29/17. // Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. // #include #include #include #include #include "rng.h" #include "api.h" #define MAX_MARKER_LEN 50 #define KAT_SUCCESS 0 #define KAT_FILE_OPEN_ERROR -1 #define KAT_DATA_ERROR -3 #define KAT_CRYPTO_FAILURE -4 int FindMarker(FILE *infile, const char *marker); int ReadHex(FILE *infile, unsigned char *A, int Length, char *str); void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L); char AlgName[] = "My Alg Name"; int main(void) { char fn_req[32], fn_rsp[32]; FILE *fp_req, *fp_rsp; unsigned char seed[48]; unsigned char msg[3300]; unsigned char entropy_input[48]; unsigned char *m, *sm, *m1; unsigned long long mlen, smlen, mlen1; int count; int done; unsigned char pk[CRYPTO_PUBLICKEYBYTES], sk[CRYPTO_SECRETKEYBYTES]; int ret_val; // Create the REQUEST file sprintf(fn_req, "PQCsignKAT_%d.req", CRYPTO_SECRETKEYBYTES); if ( (fp_req = fopen(fn_req, "w")) == NULL ) { printf("Couldn't open <%s> for write\n", fn_req); return KAT_FILE_OPEN_ERROR; } sprintf(fn_rsp, "PQCsignKAT_%d.rsp", CRYPTO_SECRETKEYBYTES); if ( (fp_rsp = fopen(fn_rsp, "w")) == NULL ) { printf("Couldn't open <%s> for write\n", fn_rsp); return KAT_FILE_OPEN_ERROR; } for (int i=0; i<48; i++) entropy_input[i] = (unsigned char)i; randombytes_init(entropy_input, NULL); for (int i=0; i<100; i++) { fprintf(fp_req, "count = %d\n", i); randombytes(seed, 48); fprintBstr(fp_req, "seed = ", seed, 48); mlen = (unsigned long long int)(33*(i+1)); fprintf(fp_req, "mlen = %llu\n", mlen); randombytes(msg, mlen); fprintBstr(fp_req, "msg = ", msg, mlen); fprintf(fp_req, "pk =\n"); fprintf(fp_req, "sk =\n"); fprintf(fp_req, "smlen =\n"); fprintf(fp_req, "sm =\n\n"); } fclose(fp_req); //Create the RESPONSE file based on what's in the REQUEST file if ( (fp_req = fopen(fn_req, "r")) == NULL ) { printf("Couldn't open <%s> for read\n", fn_req); return KAT_FILE_OPEN_ERROR; } fprintf(fp_rsp, "# %s\n\n", CRYPTO_ALGNAME); done = 0; do { if ( FindMarker(fp_req, "count = ") ) ret_val = fscanf(fp_req, "%d", &count); else { done = 1; break; } fprintf(fp_rsp, "count = %d\n", count); if ( !ReadHex(fp_req, seed, 48, "seed = ") ) { printf("ERROR: unable to read 'seed' from <%s>\n", fn_req); return KAT_DATA_ERROR; } fprintBstr(fp_rsp, "seed = ", seed, 48); randombytes_init(seed, NULL); if ( FindMarker(fp_req, "mlen = ") ) ret_val = fscanf(fp_req, "%llu", &mlen); else { printf("ERROR: unable to read 'mlen' from <%s>\n", fn_req); return KAT_DATA_ERROR; } fprintf(fp_rsp, "mlen = %llu\n", mlen); m = (unsigned char *)calloc(mlen, sizeof(unsigned char)); m1 = (unsigned char *)calloc(mlen+CRYPTO_BYTES, sizeof(unsigned char)); sm = (unsigned char *)calloc(mlen+CRYPTO_BYTES, sizeof(unsigned char)); if ( !ReadHex(fp_req, m, (int)mlen, "msg = ") ) { printf("ERROR: unable to read 'msg' from <%s>\n", fn_req); return KAT_DATA_ERROR; } fprintBstr(fp_rsp, "msg = ", m, mlen); // Generate the public/private keypair if ( (ret_val = crypto_sign_keypair(pk, sk)) != 0) { printf("crypto_sign_keypair returned <%d>\n", ret_val); return KAT_CRYPTO_FAILURE; } fprintBstr(fp_rsp, "pk = ", pk, CRYPTO_PUBLICKEYBYTES); fprintBstr(fp_rsp, "sk = ", sk, CRYPTO_SECRETKEYBYTES); if ( (ret_val = crypto_sign(sm, &smlen, m, mlen, sk)) != 0) { printf("crypto_sign returned <%d>\n", ret_val); return KAT_CRYPTO_FAILURE; } fprintf(fp_rsp, "smlen = %llu\n", smlen); fprintBstr(fp_rsp, "sm = ", sm, smlen); fprintf(fp_rsp, "\n"); if ( (ret_val = crypto_sign_open(m1, &mlen1, sm, smlen, pk)) != 0) { printf("crypto_sign_open returned <%d>\n", ret_val); return KAT_CRYPTO_FAILURE; } if ( mlen != mlen1 ) { printf("crypto_sign_open returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen1, mlen); return KAT_CRYPTO_FAILURE; } if ( memcmp(m, m1, mlen) ) { printf("crypto_sign_open returned bad 'm' value\n"); return KAT_CRYPTO_FAILURE; } free(m); free(m1); free(sm); } while ( !done ); fclose(fp_req); fclose(fp_rsp); return KAT_SUCCESS; } // // ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.) // int FindMarker(FILE *infile, const char *marker) { char line[MAX_MARKER_LEN]; size_t i, len; int curr_line; len = strlen(marker); if ( len > MAX_MARKER_LEN-1 ) len = MAX_MARKER_LEN-1; for ( i=0; i= '0') && (ch <= '9') ) ich = (unsigned char)(ch - '0'); else if ( (ch >= 'A') && (ch <= 'F') ) ich = (unsigned char)(ch - 'A' + 10); else if ( (ch >= 'a') && (ch <= 'f') ) ich = (unsigned char)(ch - 'a' + 10); else // shouldn't ever get here ich = 0; for ( i=0; i> 4)); A[Length-1] = (unsigned char)((A[Length-1] << 4) | ich); } else return 0; return 1; } void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L) { unsigned long long i; fprintf(fp, "%s", S); for ( i=0; i #include #include "address.h" #include "params.h" #include "utils.h" /* * Specify which level of Merkle tree (the "layer") we're working on */ void set_layer_addr(uint32_t addr[8], uint32_t layer) { ((unsigned char *)addr)[SPX_OFFSET_LAYER] = (unsigned char)layer; } /* * Specify which Merkle tree within the level (the "tree address") we're working on */ void set_tree_addr(uint32_t addr[8], uint64_t tree) { #if (SPX_TREE_HEIGHT * (SPX_D - 1)) > 64 #error Subtree addressing is currently limited to at most 2^64 trees #endif ull_to_bytes(&((unsigned char *)addr)[SPX_OFFSET_TREE], 8, tree ); } /* * Specify the reason we'll use this address structure for, that is, what * hash will we compute with it. This is used so that unrelated types of * hashes don't accidentally get the same address structure. The type will be * one of the SPX_ADDR_TYPE constants */ void set_type(uint32_t addr[8], uint32_t type) { ((unsigned char *)addr)[SPX_OFFSET_TYPE] = (unsigned char)type; } /* * Copy the layer and tree fields of the address structure. This is used * when we're doing multiple types of hashes within the same Merkle tree */ void copy_subtree_addr(uint32_t out[8], const uint32_t in[8]) { memcpy( out, in, SPX_OFFSET_TREE+8 ); } /* These functions are used for OTS addresses. */ /* * Specify which Merkle leaf we're working on; that is, which OTS keypair * we're talking about. */ void set_keypair_addr(uint32_t addr[8], uint32_t keypair) { u32_to_bytes(&((unsigned char *)addr)[SPX_OFFSET_KP_ADDR], keypair); } /* * Copy the layer, tree and keypair fields of the address structure. This is * used when we're doing multiple things within the same OTS keypair */ void copy_keypair_addr(uint32_t out[8], const uint32_t in[8]) { memcpy( out, in, SPX_OFFSET_TREE+8 ); memcpy( (unsigned char *)out + SPX_OFFSET_KP_ADDR, (unsigned char *)in + SPX_OFFSET_KP_ADDR, 4); } /* * Specify which Merkle chain within the OTS we're working with * (the chain address) */ void set_chain_addr(uint32_t addr[8], uint32_t chain) { ((unsigned char *)addr)[SPX_OFFSET_CHAIN_ADDR] = (unsigned char)chain; } /* * Specify where in the Merkle chain we are * (the hash address) */ void set_hash_addr(uint32_t addr[8], uint32_t hash) { ((unsigned char *)addr)[SPX_OFFSET_HASH_ADDR] = (unsigned char)hash; } /* These functions are used for all hash tree addresses (including FORS). */ /* * Specify the height of the node in the Merkle/FORS tree we are in * (the tree height) */ void set_tree_height(uint32_t addr[8], uint32_t tree_height) { ((unsigned char *)addr)[SPX_OFFSET_TREE_HGT] = (unsigned char)tree_height; } /* * Specify the distance from the left edge of the node in the Merkle/FORS tree * (the tree index) */ void set_tree_index(uint32_t addr[8], uint32_t tree_index) { u32_to_bytes(&((unsigned char *)addr)[SPX_OFFSET_TREE_INDEX], tree_index ); } ================================================ FILE: ref/address.h ================================================ #ifndef SPX_ADDRESS_H #define SPX_ADDRESS_H #include #include "params.h" /* The hash types that are passed to set_type */ #define SPX_ADDR_TYPE_WOTS 0 #define SPX_ADDR_TYPE_WOTSPK 1 #define SPX_ADDR_TYPE_HASHTREE 2 #define SPX_ADDR_TYPE_FORSTREE 3 #define SPX_ADDR_TYPE_FORSPK 4 #define SPX_ADDR_TYPE_WOTSPRF 5 #define SPX_ADDR_TYPE_FORSPRF 6 #define set_layer_addr SPX_NAMESPACE(set_layer_addr) void set_layer_addr(uint32_t addr[8], uint32_t layer); #define set_tree_addr SPX_NAMESPACE(set_tree_addr) void set_tree_addr(uint32_t addr[8], uint64_t tree); #define set_type SPX_NAMESPACE(set_type) void set_type(uint32_t addr[8], uint32_t type); /* Copies the layer and tree part of one address into the other */ #define copy_subtree_addr SPX_NAMESPACE(copy_subtree_addr) void copy_subtree_addr(uint32_t out[8], const uint32_t in[8]); /* These functions are used for WOTS and FORS addresses. */ #define set_keypair_addr SPX_NAMESPACE(set_keypair_addr) void set_keypair_addr(uint32_t addr[8], uint32_t keypair); #define set_chain_addr SPX_NAMESPACE(set_chain_addr) void set_chain_addr(uint32_t addr[8], uint32_t chain); #define set_hash_addr SPX_NAMESPACE(set_hash_addr) void set_hash_addr(uint32_t addr[8], uint32_t hash); #define copy_keypair_addr SPX_NAMESPACE(copy_keypair_addr) void copy_keypair_addr(uint32_t out[8], const uint32_t in[8]); /* These functions are used for all hash tree addresses (including FORS). */ #define set_tree_height SPX_NAMESPACE(set_tree_height) void set_tree_height(uint32_t addr[8], uint32_t tree_height); #define set_tree_index SPX_NAMESPACE(set_tree_index) void set_tree_index(uint32_t addr[8], uint32_t tree_index); #endif ================================================ FILE: ref/api.h ================================================ #ifndef SPX_API_H #define SPX_API_H #include #include #include "params.h" #define CRYPTO_ALGNAME "SPHINCS+" #define CRYPTO_SECRETKEYBYTES SPX_SK_BYTES #define CRYPTO_PUBLICKEYBYTES SPX_PK_BYTES #define CRYPTO_BYTES SPX_BYTES #define CRYPTO_SEEDBYTES 3*SPX_N /* * Returns the length of a secret key, in bytes */ unsigned long long crypto_sign_secretkeybytes(void); /* * Returns the length of a public key, in bytes */ unsigned long long crypto_sign_publickeybytes(void); /* * Returns the length of a signature, in bytes */ unsigned long long crypto_sign_bytes(void); /* * Returns the length of the seed required to generate a key pair, in bytes */ unsigned long long crypto_sign_seedbytes(void); /* * Generates a SPHINCS+ key pair given a seed. * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] * Format pk: [root || PUB_SEED] */ int crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk, const unsigned char *seed); /* * Generates a SPHINCS+ key pair. * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] * Format pk: [root || PUB_SEED] */ int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); /** * Returns an array containing a detached signature. */ int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk); /** * Verifies a detached signature and message under a given public key. */ int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk); /** * Returns an array containing the signature followed by the message. */ int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk); /** * Verifies a given signature-message pair under a given public key. */ int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk); #endif ================================================ FILE: ref/context.h ================================================ #ifndef SPX_CONTEXT_H #define SPX_CONTEXT_H #include #include "params.h" typedef struct { uint8_t pub_seed[SPX_N]; uint8_t sk_seed[SPX_N]; #ifdef SPX_SHA2 // sha256 state that absorbed pub_seed uint8_t state_seeded[40]; # if SPX_SHA512 // sha512 state that absorbed pub_seed uint8_t state_seeded_512[72]; # endif #endif #ifdef SPX_HARAKA uint64_t tweaked512_rc64[10][8]; uint32_t tweaked256_rc32[10][8]; #endif } spx_ctx; #endif ================================================ FILE: ref/fips202.c ================================================ /* Based on the public domain implementation in * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html * by Ronny Van Keer * and the public domain "TweetFips202" implementation * from https://twitter.com/tweetfips202 * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ #include #include #include "fips202.h" #define NROUNDS 24 #define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) /************************************************* * Name: load64 * * Description: Load 8 bytes into uint64_t in little-endian order * * Arguments: - const uint8_t *x: pointer to input byte array * * Returns the loaded 64-bit unsigned integer **************************************************/ static uint64_t load64(const uint8_t *x) { uint64_t r = 0; for (size_t i = 0; i < 8; ++i) { r |= (uint64_t)x[i] << 8 * i; } return r; } /************************************************* * Name: store64 * * Description: Store a 64-bit integer to a byte array in little-endian order * * Arguments: - uint8_t *x: pointer to the output byte array * - uint64_t u: input 64-bit unsigned integer **************************************************/ static void store64(uint8_t *x, uint64_t u) { for (size_t i = 0; i < 8; ++i) { x[i] = (uint8_t) (u >> 8 * i); } } /* Keccak round constants */ static const uint64_t KeccakF_RoundConstants[NROUNDS] = { 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL }; /************************************************* * Name: KeccakF1600_StatePermute * * Description: The Keccak F1600 Permutation * * Arguments: - uint64_t *state: pointer to input/output Keccak state **************************************************/ static void KeccakF1600_StatePermute(uint64_t *state) { int round; uint64_t Aba, Abe, Abi, Abo, Abu; uint64_t Aga, Age, Agi, Ago, Agu; uint64_t Aka, Ake, Aki, Ako, Aku; uint64_t Ama, Ame, Ami, Amo, Amu; uint64_t Asa, Ase, Asi, Aso, Asu; uint64_t BCa, BCe, BCi, BCo, BCu; uint64_t Da, De, Di, Do, Du; uint64_t Eba, Ebe, Ebi, Ebo, Ebu; uint64_t Ega, Ege, Egi, Ego, Egu; uint64_t Eka, Eke, Eki, Eko, Eku; uint64_t Ema, Eme, Emi, Emo, Emu; uint64_t Esa, Ese, Esi, Eso, Esu; // copyFromState(A, state) Aba = state[0]; Abe = state[1]; Abi = state[2]; Abo = state[3]; Abu = state[4]; Aga = state[5]; Age = state[6]; Agi = state[7]; Ago = state[8]; Agu = state[9]; Aka = state[10]; Ake = state[11]; Aki = state[12]; Ako = state[13]; Aku = state[14]; Ama = state[15]; Ame = state[16]; Ami = state[17]; Amo = state[18]; Amu = state[19]; Asa = state[20]; Ase = state[21]; Asi = state[22]; Aso = state[23]; Asu = state[24]; for (round = 0; round < NROUNDS; round += 2) { // prepareTheta BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; // thetaRhoPiChiIotaPrepareTheta(round , A, E) Da = BCu ^ ROL(BCe, 1); De = BCa ^ ROL(BCi, 1); Di = BCe ^ ROL(BCo, 1); Do = BCi ^ ROL(BCu, 1); Du = BCo ^ ROL(BCa, 1); Aba ^= Da; BCa = Aba; Age ^= De; BCe = ROL(Age, 44); Aki ^= Di; BCi = ROL(Aki, 43); Amo ^= Do; BCo = ROL(Amo, 21); Asu ^= Du; BCu = ROL(Asu, 14); Eba = BCa ^ ((~BCe) & BCi); Eba ^= KeccakF_RoundConstants[round]; Ebe = BCe ^ ((~BCi) & BCo); Ebi = BCi ^ ((~BCo) & BCu); Ebo = BCo ^ ((~BCu) & BCa); Ebu = BCu ^ ((~BCa) & BCe); Abo ^= Do; BCa = ROL(Abo, 28); Agu ^= Du; BCe = ROL(Agu, 20); Aka ^= Da; BCi = ROL(Aka, 3); Ame ^= De; BCo = ROL(Ame, 45); Asi ^= Di; BCu = ROL(Asi, 61); Ega = BCa ^ ((~BCe) & BCi); Ege = BCe ^ ((~BCi) & BCo); Egi = BCi ^ ((~BCo) & BCu); Ego = BCo ^ ((~BCu) & BCa); Egu = BCu ^ ((~BCa) & BCe); Abe ^= De; BCa = ROL(Abe, 1); Agi ^= Di; BCe = ROL(Agi, 6); Ako ^= Do; BCi = ROL(Ako, 25); Amu ^= Du; BCo = ROL(Amu, 8); Asa ^= Da; BCu = ROL(Asa, 18); Eka = BCa ^ ((~BCe) & BCi); Eke = BCe ^ ((~BCi) & BCo); Eki = BCi ^ ((~BCo) & BCu); Eko = BCo ^ ((~BCu) & BCa); Eku = BCu ^ ((~BCa) & BCe); Abu ^= Du; BCa = ROL(Abu, 27); Aga ^= Da; BCe = ROL(Aga, 36); Ake ^= De; BCi = ROL(Ake, 10); Ami ^= Di; BCo = ROL(Ami, 15); Aso ^= Do; BCu = ROL(Aso, 56); Ema = BCa ^ ((~BCe) & BCi); Eme = BCe ^ ((~BCi) & BCo); Emi = BCi ^ ((~BCo) & BCu); Emo = BCo ^ ((~BCu) & BCa); Emu = BCu ^ ((~BCa) & BCe); Abi ^= Di; BCa = ROL(Abi, 62); Ago ^= Do; BCe = ROL(Ago, 55); Aku ^= Du; BCi = ROL(Aku, 39); Ama ^= Da; BCo = ROL(Ama, 41); Ase ^= De; BCu = ROL(Ase, 2); Esa = BCa ^ ((~BCe) & BCi); Ese = BCe ^ ((~BCi) & BCo); Esi = BCi ^ ((~BCo) & BCu); Eso = BCo ^ ((~BCu) & BCa); Esu = BCu ^ ((~BCa) & BCe); // prepareTheta BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; // thetaRhoPiChiIotaPrepareTheta(round+1, E, A) Da = BCu ^ ROL(BCe, 1); De = BCa ^ ROL(BCi, 1); Di = BCe ^ ROL(BCo, 1); Do = BCi ^ ROL(BCu, 1); Du = BCo ^ ROL(BCa, 1); Eba ^= Da; BCa = Eba; Ege ^= De; BCe = ROL(Ege, 44); Eki ^= Di; BCi = ROL(Eki, 43); Emo ^= Do; BCo = ROL(Emo, 21); Esu ^= Du; BCu = ROL(Esu, 14); Aba = BCa ^ ((~BCe) & BCi); Aba ^= KeccakF_RoundConstants[round + 1]; Abe = BCe ^ ((~BCi) & BCo); Abi = BCi ^ ((~BCo) & BCu); Abo = BCo ^ ((~BCu) & BCa); Abu = BCu ^ ((~BCa) & BCe); Ebo ^= Do; BCa = ROL(Ebo, 28); Egu ^= Du; BCe = ROL(Egu, 20); Eka ^= Da; BCi = ROL(Eka, 3); Eme ^= De; BCo = ROL(Eme, 45); Esi ^= Di; BCu = ROL(Esi, 61); Aga = BCa ^ ((~BCe) & BCi); Age = BCe ^ ((~BCi) & BCo); Agi = BCi ^ ((~BCo) & BCu); Ago = BCo ^ ((~BCu) & BCa); Agu = BCu ^ ((~BCa) & BCe); Ebe ^= De; BCa = ROL(Ebe, 1); Egi ^= Di; BCe = ROL(Egi, 6); Eko ^= Do; BCi = ROL(Eko, 25); Emu ^= Du; BCo = ROL(Emu, 8); Esa ^= Da; BCu = ROL(Esa, 18); Aka = BCa ^ ((~BCe) & BCi); Ake = BCe ^ ((~BCi) & BCo); Aki = BCi ^ ((~BCo) & BCu); Ako = BCo ^ ((~BCu) & BCa); Aku = BCu ^ ((~BCa) & BCe); Ebu ^= Du; BCa = ROL(Ebu, 27); Ega ^= Da; BCe = ROL(Ega, 36); Eke ^= De; BCi = ROL(Eke, 10); Emi ^= Di; BCo = ROL(Emi, 15); Eso ^= Do; BCu = ROL(Eso, 56); Ama = BCa ^ ((~BCe) & BCi); Ame = BCe ^ ((~BCi) & BCo); Ami = BCi ^ ((~BCo) & BCu); Amo = BCo ^ ((~BCu) & BCa); Amu = BCu ^ ((~BCa) & BCe); Ebi ^= Di; BCa = ROL(Ebi, 62); Ego ^= Do; BCe = ROL(Ego, 55); Eku ^= Du; BCi = ROL(Eku, 39); Ema ^= Da; BCo = ROL(Ema, 41); Ese ^= De; BCu = ROL(Ese, 2); Asa = BCa ^ ((~BCe) & BCi); Ase = BCe ^ ((~BCi) & BCo); Asi = BCi ^ ((~BCo) & BCu); Aso = BCo ^ ((~BCu) & BCa); Asu = BCu ^ ((~BCa) & BCe); } // copyToState(state, A) state[0] = Aba; state[1] = Abe; state[2] = Abi; state[3] = Abo; state[4] = Abu; state[5] = Aga; state[6] = Age; state[7] = Agi; state[8] = Ago; state[9] = Agu; state[10] = Aka; state[11] = Ake; state[12] = Aki; state[13] = Ako; state[14] = Aku; state[15] = Ama; state[16] = Ame; state[17] = Ami; state[18] = Amo; state[19] = Amu; state[20] = Asa; state[21] = Ase; state[22] = Asi; state[23] = Aso; state[24] = Asu; } /************************************************* * Name: keccak_absorb * * Description: Absorb step of Keccak; * non-incremental, starts by zeroeing the state. * * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) * - const uint8_t *m: pointer to input to be absorbed into s * - size_t mlen: length of input in bytes * - uint8_t p: domain-separation byte for different * Keccak-derived functions **************************************************/ static void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, size_t mlen, uint8_t p) { size_t i; uint8_t t[200]; /* Zero state */ for (i = 0; i < 25; ++i) { s[i] = 0; } while (mlen >= r) { for (i = 0; i < r / 8; ++i) { s[i] ^= load64(m + 8 * i); } KeccakF1600_StatePermute(s); mlen -= r; m += r; } for (i = 0; i < r; ++i) { t[i] = 0; } for (i = 0; i < mlen; ++i) { t[i] = m[i]; } t[i] = p; t[r - 1] |= 128; for (i = 0; i < r / 8; ++i) { s[i] ^= load64(t + 8 * i); } } /************************************************* * Name: keccak_squeezeblocks * * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. * Modifies the state. Can be called multiple times to keep * squeezing, i.e., is incremental. * * Arguments: - uint8_t *h: pointer to output blocks * - size_t nblocks: number of blocks to be * squeezed (written to h) * - uint64_t *s: pointer to input/output Keccak state * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) **************************************************/ static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, uint64_t *s, uint32_t r) { while (nblocks > 0) { KeccakF1600_StatePermute(s); for (size_t i = 0; i < (r >> 3); i++) { store64(h + 8 * i, s[i]); } h += r; nblocks--; } } /************************************************* * Name: keccak_inc_init * * Description: Initializes the incremental Keccak state to zero. * * Arguments: - uint64_t *s_inc: pointer to input/output incremental state * First 25 values represent Keccak state. * 26th value represents either the number of absorbed bytes * that have not been permuted, or not-yet-squeezed bytes. **************************************************/ static void keccak_inc_init(uint64_t *s_inc) { size_t i; for (i = 0; i < 25; ++i) { s_inc[i] = 0; } s_inc[25] = 0; } /************************************************* * Name: keccak_inc_absorb * * Description: Incremental keccak absorb * Preceded by keccak_inc_init, succeeded by keccak_inc_finalize * * Arguments: - uint64_t *s_inc: pointer to input/output incremental state * First 25 values represent Keccak state. * 26th value represents either the number of absorbed bytes * that have not been permuted, or not-yet-squeezed bytes. * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) * - const uint8_t *m: pointer to input to be absorbed into s * - size_t mlen: length of input in bytes **************************************************/ static void keccak_inc_absorb(uint64_t *s_inc, uint32_t r, const uint8_t *m, size_t mlen) { size_t i; /* Recall that s_inc[25] is the non-absorbed bytes xored into the state */ while (mlen + s_inc[25] >= r) { for (i = 0; i < r - s_inc[25]; i++) { /* Take the i'th byte from message xor with the s_inc[25] + i'th byte of the state; little-endian */ s_inc[(s_inc[25] + i) >> 3] ^= (uint64_t)m[i] << (8 * ((s_inc[25] + i) & 0x07)); } mlen -= (size_t)(r - s_inc[25]); m += r - s_inc[25]; s_inc[25] = 0; KeccakF1600_StatePermute(s_inc); } for (i = 0; i < mlen; i++) { s_inc[(s_inc[25] + i) >> 3] ^= (uint64_t)m[i] << (8 * ((s_inc[25] + i) & 0x07)); } s_inc[25] += mlen; } /************************************************* * Name: keccak_inc_finalize * * Description: Finalizes Keccak absorb phase, prepares for squeezing * * Arguments: - uint64_t *s_inc: pointer to input/output incremental state * First 25 values represent Keccak state. * 26th value represents either the number of absorbed bytes * that have not been permuted, or not-yet-squeezed bytes. * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) * - uint8_t p: domain-separation byte for different * Keccak-derived functions **************************************************/ static void keccak_inc_finalize(uint64_t *s_inc, uint32_t r, uint8_t p) { /* After keccak_inc_absorb, we are guaranteed that s_inc[25] < r, so we can always use one more byte for p in the current state. */ s_inc[s_inc[25] >> 3] ^= (uint64_t)p << (8 * (s_inc[25] & 0x07)); s_inc[(r - 1) >> 3] ^= (uint64_t)128 << (8 * ((r - 1) & 0x07)); s_inc[25] = 0; } /************************************************* * Name: keccak_inc_squeeze * * Description: Incremental Keccak squeeze; can be called on byte-level * * Arguments: - uint8_t *h: pointer to output bytes * - size_t outlen: number of bytes to be squeezed * - uint64_t *s_inc: pointer to input/output incremental state * First 25 values represent Keccak state. * 26th value represents either the number of absorbed bytes * that have not been permuted, or not-yet-squeezed bytes. * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) **************************************************/ static void keccak_inc_squeeze(uint8_t *h, size_t outlen, uint64_t *s_inc, uint32_t r) { size_t i; /* First consume any bytes we still have sitting around */ for (i = 0; i < outlen && i < s_inc[25]; i++) { /* There are s_inc[25] bytes left, so r - s_inc[25] is the first available byte. We consume from there, i.e., up to r. */ h[i] = (uint8_t)(s_inc[(r - s_inc[25] + i) >> 3] >> (8 * ((r - s_inc[25] + i) & 0x07))); } h += i; outlen -= i; s_inc[25] -= i; /* Then squeeze the remaining necessary blocks */ while (outlen > 0) { KeccakF1600_StatePermute(s_inc); for (i = 0; i < outlen && i < r; i++) { h[i] = (uint8_t)(s_inc[i >> 3] >> (8 * (i & 0x07))); } h += i; outlen -= i; s_inc[25] = r - i; } } void shake256_inc_init(uint64_t *s_inc) { keccak_inc_init(s_inc); } void shake256_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen) { keccak_inc_absorb(s_inc, SHAKE256_RATE, input, inlen); } void shake256_inc_finalize(uint64_t *s_inc) { keccak_inc_finalize(s_inc, SHAKE256_RATE, 0x1F); } void shake256_inc_squeeze(uint8_t *output, size_t outlen, uint64_t *s_inc) { keccak_inc_squeeze(output, outlen, s_inc, SHAKE256_RATE); } /************************************************* * Name: shake256_absorb * * Description: Absorb step of the SHAKE256 XOF. * non-incremental, starts by zeroeing the state. * * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state * - const uint8_t *input: pointer to input to be absorbed * into s * - size_t inlen: length of input in bytes **************************************************/ void shake256_absorb(uint64_t *s, const uint8_t *input, size_t inlen) { keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F); } /************************************************* * Name: shake256_squeezeblocks * * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of * SHAKE256_RATE bytes each. Modifies the state. Can be called * multiple times to keep squeezing, i.e., is incremental. * * Arguments: - uint8_t *output: pointer to output blocks * - size_t nblocks: number of blocks to be squeezed * (written to output) * - uint64_t *s: pointer to input/output Keccak state **************************************************/ void shake256_squeezeblocks(uint8_t *output, size_t nblocks, uint64_t *s) { keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE); } /************************************************* * Name: shake256 * * Description: SHAKE256 XOF with non-incremental API * * Arguments: - uint8_t *output: pointer to output * - size_t outlen: requested output length in bytes * - const uint8_t *input: pointer to input * - size_t inlen: length of input in bytes **************************************************/ void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen) { size_t nblocks = outlen / SHAKE256_RATE; uint8_t t[SHAKE256_RATE]; uint64_t s[25]; shake256_absorb(s, input, inlen); shake256_squeezeblocks(output, nblocks, s); output += nblocks * SHAKE256_RATE; outlen -= nblocks * SHAKE256_RATE; if (outlen) { shake256_squeezeblocks(t, 1, s); for (size_t i = 0; i < outlen; ++i) { output[i] = t[i]; } } } ================================================ FILE: ref/fips202.h ================================================ #ifndef SPX_FIPS202_H #define SPX_FIPS202_H #include #include #define SHAKE128_RATE 168 #define SHAKE256_RATE 136 #define SHA3_256_RATE 136 #define SHA3_512_RATE 72 void shake128_absorb(uint64_t *s, const uint8_t *input, size_t inlen); void shake128_squeezeblocks(uint8_t *output, size_t nblocks, uint64_t *s); void shake128_inc_init(uint64_t *s_inc); void shake128_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen); void shake128_inc_finalize(uint64_t *s_inc); void shake128_inc_squeeze(uint8_t *output, size_t outlen, uint64_t *s_inc); void shake256_absorb(uint64_t *s, const uint8_t *input, size_t inlen); void shake256_squeezeblocks(uint8_t *output, size_t nblocks, uint64_t *s); void shake256_inc_init(uint64_t *s_inc); void shake256_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen); void shake256_inc_finalize(uint64_t *s_inc); void shake256_inc_squeeze(uint8_t *output, size_t outlen, uint64_t *s_inc); void shake128(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); void sha3_256_inc_init(uint64_t *s_inc); void sha3_256_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen); void sha3_256_inc_finalize(uint8_t *output, uint64_t *s_inc); void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen); void sha3_512_inc_init(uint64_t *s_inc); void sha3_512_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen); void sha3_512_inc_finalize(uint8_t *output, uint64_t *s_inc); void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen); #endif ================================================ FILE: ref/fors.c ================================================ #include #include #include #include "fors.h" #include "utils.h" #include "utilsx1.h" #include "hash.h" #include "thash.h" #include "address.h" static void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { prf_addr(sk, ctx, fors_leaf_addr); } static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { thash(leaf, sk, 1, ctx, fors_leaf_addr); } struct fors_gen_leaf_info { uint32_t leaf_addrx[8]; }; static void fors_gen_leafx1(unsigned char *leaf, const spx_ctx *ctx, uint32_t addr_idx, void *info) { struct fors_gen_leaf_info *fors_info = info; uint32_t *fors_leaf_addr = fors_info->leaf_addrx; /* Only set the parts that the caller doesn't set */ set_tree_index(fors_leaf_addr, addr_idx); set_type(fors_leaf_addr, SPX_ADDR_TYPE_FORSPRF); fors_gen_sk(leaf, ctx, fors_leaf_addr); set_type(fors_leaf_addr, SPX_ADDR_TYPE_FORSTREE); fors_sk_to_leaf(leaf, leaf, ctx, fors_leaf_addr); } /** * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. * Assumes indices has space for SPX_FORS_TREES integers. */ static void message_to_indices(uint32_t *indices, const unsigned char *m) { unsigned int i, j; unsigned int offset = 0; for (i = 0; i < SPX_FORS_TREES; i++) { indices[i] = 0; for (j = 0; j < SPX_FORS_HEIGHT; j++) { indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 1u) << j; offset++; } } } /** * Signs a message m, deriving the secret key from sk_seed and the FTS address. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_sign(unsigned char *sig, unsigned char *pk, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; uint32_t fors_tree_addr[8] = {0}; struct fors_gen_leaf_info fors_info = {0}; uint32_t *fors_leaf_addr = fors_info.leaf_addrx; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; copy_keypair_addr(fors_tree_addr, fors_addr); copy_keypair_addr(fors_leaf_addr, fors_addr); copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF); /* Include the secret key part that produces the selected leaf node. */ fors_gen_sk(sig, ctx, fors_tree_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); sig += SPX_N; /* Compute the authentication path for this leaf node. */ treehashx1(roots + i*SPX_N, sig, ctx, indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx1, fors_tree_addr, &fors_info); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } /** * Derives the FORS public key from a signature. * This can be used for verification by comparing to a known public key, or to * subsequently verify a signature on the derived public key. The latter is the * typical use-case when used as an FTS below an OTS in a hypertree. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *m, const spx_ctx* ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; unsigned char leaf[SPX_N]; uint32_t fors_tree_addr[8] = {0}; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; copy_keypair_addr(fors_tree_addr, fors_addr); copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Derive the leaf from the included secret key part. */ fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr); sig += SPX_N; /* Derive the corresponding root node of this tree. */ compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset, sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } ================================================ FILE: ref/fors.h ================================================ #ifndef SPX_FORS_H #define SPX_FORS_H #include #include "params.h" #include "context.h" /** * Signs a message m, deriving the secret key from sk_seed and the FTS address. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ #define fors_sign SPX_NAMESPACE(fors_sign) void fors_sign(unsigned char *sig, unsigned char *pk, const unsigned char *m, const spx_ctx* ctx, const uint32_t fors_addr[8]); /** * Derives the FORS public key from a signature. * This can be used for verification by comparing to a known public key, or to * subsequently verify a signature on the derived public key. The latter is the * typical use-case when used as an FTS below an OTS in a hypertree. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ #define fors_pk_from_sig SPX_NAMESPACE(fors_pk_from_sig) void fors_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *m, const spx_ctx* ctx, const uint32_t fors_addr[8]); #endif ================================================ FILE: ref/haraka.c ================================================ /* * Constant time implementation of the Haraka hash function. * * The bit-sliced implementation of the AES round functions are * based on the AES implementation in BearSSL written * by Thomas Pornin , licensed as follows: * * Copyright (c) 2016 Thomas Pornin * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include "haraka.h" #include "utils.h" #define HARAKAS_RATE 32 static const uint64_t haraka512_rc64[10][8] = { {0x24cf0ab9086f628b, 0xbdd6eeecc83b8382, 0xd96fb0306cdad0a7, 0xaace082ac8f95f89, 0x449d8e8870d7041f, 0x49bb2f80b2b3e2f8, 0x0569ae98d93bb258, 0x23dc9691e7d6a4b1}, {0xd8ba10ede0fe5b6e, 0x7ecf7dbe424c7b8e, 0x6ea9949c6df62a31, 0xbf3f3c97ec9c313e, 0x241d03a196a1861e, 0xead3a51116e5a2ea, 0x77d479fcad9574e3, 0x18657a1af894b7a0}, {0x10671e1a7f595522, 0xd9a00ff675d28c7b, 0x2f1edf0d2b9ba661, 0xb8ff58b8e3de45f9, 0xee29261da9865c02, 0xd1532aa4b50bdf43, 0x8bf858159b231bb1, 0xdf17439d22d4f599}, {0xdd4b2f0870b918c0, 0x757a81f3b39b1bb6, 0x7a5c556898952e3f, 0x7dd70a16d915d87a, 0x3ae61971982b8301, 0xc3ab319e030412be, 0x17c0033ac094a8cb, 0x5a0630fc1a8dc4ef}, {0x17708988c1632f73, 0xf92ddae090b44f4f, 0x11ac0285c43aa314, 0x509059941936b8ba, 0xd03e152fa2ce9b69, 0x3fbcbcb63a32998b, 0x6204696d692254f7, 0x915542ed93ec59b4}, {0xf4ed94aa8879236e, 0xff6cb41cd38e03c0, 0x069b38602368aeab, 0x669495b820f0ddba, 0xf42013b1b8bf9e3d, 0xcf935efe6439734d, 0xbc1dcf42ca29e3f8, 0x7e6d3ed29f78ad67}, {0xf3b0f6837ffcddaa, 0x3a76faef934ddf41, 0xcec7ae583a9c8e35, 0xe4dd18c68f0260af, 0x2c0e5df1ad398eaa, 0x478df5236ae22e8c, 0xfb944c46fe865f39, 0xaa48f82f028132ba}, {0x231b9ae2b76aca77, 0x292a76a712db0b40, 0x5850625dc8134491, 0x73137dd469810fb5, 0x8a12a6a202a474fd, 0xd36fd9daa78bdb80, 0xb34c5e733505706f, 0xbaf1cdca818d9d96}, {0x2e99781335e8c641, 0xbddfe5cce47d560e, 0xf74e9bf32e5e040c, 0x1d7a709d65996be9, 0x670df36a9cf66cdd, 0xd05ef84a176a2875, 0x0f888e828cb1c44e, 0x1a79e9c9727b052c}, {0x83497348628d84de, 0x2e9387d51f22a754, 0xb000068da2f852d6, 0x378c9e1190fd6fe5, 0x870027c316de7293, 0xe51a9d4462e047bb, 0x90ecf7f8c6251195, 0x655953bfbed90a9c}, }; static inline uint32_t br_dec32le(const unsigned char *src) { return (uint32_t)src[0] | ((uint32_t)src[1] << 8) | ((uint32_t)src[2] << 16) | ((uint32_t)src[3] << 24); } static void br_range_dec32le(uint32_t *v, size_t num, const unsigned char *src) { while (num-- > 0) { *v ++ = br_dec32le(src); src += 4; } } static inline void br_enc32le(unsigned char *dst, uint32_t x) { dst[0] = (unsigned char)x; dst[1] = (unsigned char)(x >> 8); dst[2] = (unsigned char)(x >> 16); dst[3] = (unsigned char)(x >> 24); } static void br_range_enc32le(unsigned char *dst, const uint32_t *v, size_t num) { while (num-- > 0) { br_enc32le(dst, *v ++); dst += 4; } } static void br_aes_ct64_bitslice_Sbox(uint64_t *q) { /* * This S-box implementation is a straightforward translation of * the circuit described by Boyar and Peralta in "A new * combinational logic minimization technique with applications * to cryptology" (https://eprint.iacr.org/2009/191.pdf). * * Note that variables x* (input) and s* (output) are numbered * in "reverse" order (x0 is the high bit, x7 is the low bit). */ uint64_t x0, x1, x2, x3, x4, x5, x6, x7; uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9; uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; uint64_t y20, y21; uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9; uint64_t z10, z11, z12, z13, z14, z15, z16, z17; uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19; uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39; uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49; uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59; uint64_t t60, t61, t62, t63, t64, t65, t66, t67; uint64_t s0, s1, s2, s3, s4, s5, s6, s7; x0 = q[7]; x1 = q[6]; x2 = q[5]; x3 = q[4]; x4 = q[3]; x5 = q[2]; x6 = q[1]; x7 = q[0]; /* * Top linear transformation. */ y14 = x3 ^ x5; y13 = x0 ^ x6; y9 = x0 ^ x3; y8 = x0 ^ x5; t0 = x1 ^ x2; y1 = t0 ^ x7; y4 = y1 ^ x3; y12 = y13 ^ y14; y2 = y1 ^ x0; y5 = y1 ^ x6; y3 = y5 ^ y8; t1 = x4 ^ y12; y15 = t1 ^ x5; y20 = t1 ^ x1; y6 = y15 ^ x7; y10 = y15 ^ t0; y11 = y20 ^ y9; y7 = x7 ^ y11; y17 = y10 ^ y11; y19 = y10 ^ y8; y16 = t0 ^ y11; y21 = y13 ^ y16; y18 = x0 ^ y16; /* * Non-linear section. */ t2 = y12 & y15; t3 = y3 & y6; t4 = t3 ^ t2; t5 = y4 & x7; t6 = t5 ^ t2; t7 = y13 & y16; t8 = y5 & y1; t9 = t8 ^ t7; t10 = y2 & y7; t11 = t10 ^ t7; t12 = y9 & y11; t13 = y14 & y17; t14 = t13 ^ t12; t15 = y8 & y10; t16 = t15 ^ t12; t17 = t4 ^ t14; t18 = t6 ^ t16; t19 = t9 ^ t14; t20 = t11 ^ t16; t21 = t17 ^ y20; t22 = t18 ^ y19; t23 = t19 ^ y21; t24 = t20 ^ y18; t25 = t21 ^ t22; t26 = t21 & t23; t27 = t24 ^ t26; t28 = t25 & t27; t29 = t28 ^ t22; t30 = t23 ^ t24; t31 = t22 ^ t26; t32 = t31 & t30; t33 = t32 ^ t24; t34 = t23 ^ t33; t35 = t27 ^ t33; t36 = t24 & t35; t37 = t36 ^ t34; t38 = t27 ^ t36; t39 = t29 & t38; t40 = t25 ^ t39; t41 = t40 ^ t37; t42 = t29 ^ t33; t43 = t29 ^ t40; t44 = t33 ^ t37; t45 = t42 ^ t41; z0 = t44 & y15; z1 = t37 & y6; z2 = t33 & x7; z3 = t43 & y16; z4 = t40 & y1; z5 = t29 & y7; z6 = t42 & y11; z7 = t45 & y17; z8 = t41 & y10; z9 = t44 & y12; z10 = t37 & y3; z11 = t33 & y4; z12 = t43 & y13; z13 = t40 & y5; z14 = t29 & y2; z15 = t42 & y9; z16 = t45 & y14; z17 = t41 & y8; /* * Bottom linear transformation. */ t46 = z15 ^ z16; t47 = z10 ^ z11; t48 = z5 ^ z13; t49 = z9 ^ z10; t50 = z2 ^ z12; t51 = z2 ^ z5; t52 = z7 ^ z8; t53 = z0 ^ z3; t54 = z6 ^ z7; t55 = z16 ^ z17; t56 = z12 ^ t48; t57 = t50 ^ t53; t58 = z4 ^ t46; t59 = z3 ^ t54; t60 = t46 ^ t57; t61 = z14 ^ t57; t62 = t52 ^ t58; t63 = t49 ^ t58; t64 = z4 ^ t59; t65 = t61 ^ t62; t66 = z1 ^ t63; s0 = t59 ^ t63; s6 = t56 ^ ~t62; s7 = t48 ^ ~t60; t67 = t64 ^ t65; s3 = t53 ^ t66; s4 = t51 ^ t66; s5 = t47 ^ t65; s1 = t64 ^ ~s3; s2 = t55 ^ ~t67; q[7] = s0; q[6] = s1; q[5] = s2; q[4] = s3; q[3] = s4; q[2] = s5; q[1] = s6; q[0] = s7; } static void br_aes_ct_bitslice_Sbox(uint32_t *q) { /* * This S-box implementation is a straightforward translation of * the circuit described by Boyar and Peralta in "A new * combinational logic minimization technique with applications * to cryptology" (https://eprint.iacr.org/2009/191.pdf). * * Note that variables x* (input) and s* (output) are numbered * in "reverse" order (x0 is the high bit, x7 is the low bit). */ uint32_t x0, x1, x2, x3, x4, x5, x6, x7; uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9; uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; uint32_t y20, y21; uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9; uint32_t z10, z11, z12, z13, z14, z15, z16, z17; uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19; uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39; uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49; uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59; uint32_t t60, t61, t62, t63, t64, t65, t66, t67; uint32_t s0, s1, s2, s3, s4, s5, s6, s7; x0 = q[7]; x1 = q[6]; x2 = q[5]; x3 = q[4]; x4 = q[3]; x5 = q[2]; x6 = q[1]; x7 = q[0]; /* * Top linear transformation. */ y14 = x3 ^ x5; y13 = x0 ^ x6; y9 = x0 ^ x3; y8 = x0 ^ x5; t0 = x1 ^ x2; y1 = t0 ^ x7; y4 = y1 ^ x3; y12 = y13 ^ y14; y2 = y1 ^ x0; y5 = y1 ^ x6; y3 = y5 ^ y8; t1 = x4 ^ y12; y15 = t1 ^ x5; y20 = t1 ^ x1; y6 = y15 ^ x7; y10 = y15 ^ t0; y11 = y20 ^ y9; y7 = x7 ^ y11; y17 = y10 ^ y11; y19 = y10 ^ y8; y16 = t0 ^ y11; y21 = y13 ^ y16; y18 = x0 ^ y16; /* * Non-linear section. */ t2 = y12 & y15; t3 = y3 & y6; t4 = t3 ^ t2; t5 = y4 & x7; t6 = t5 ^ t2; t7 = y13 & y16; t8 = y5 & y1; t9 = t8 ^ t7; t10 = y2 & y7; t11 = t10 ^ t7; t12 = y9 & y11; t13 = y14 & y17; t14 = t13 ^ t12; t15 = y8 & y10; t16 = t15 ^ t12; t17 = t4 ^ t14; t18 = t6 ^ t16; t19 = t9 ^ t14; t20 = t11 ^ t16; t21 = t17 ^ y20; t22 = t18 ^ y19; t23 = t19 ^ y21; t24 = t20 ^ y18; t25 = t21 ^ t22; t26 = t21 & t23; t27 = t24 ^ t26; t28 = t25 & t27; t29 = t28 ^ t22; t30 = t23 ^ t24; t31 = t22 ^ t26; t32 = t31 & t30; t33 = t32 ^ t24; t34 = t23 ^ t33; t35 = t27 ^ t33; t36 = t24 & t35; t37 = t36 ^ t34; t38 = t27 ^ t36; t39 = t29 & t38; t40 = t25 ^ t39; t41 = t40 ^ t37; t42 = t29 ^ t33; t43 = t29 ^ t40; t44 = t33 ^ t37; t45 = t42 ^ t41; z0 = t44 & y15; z1 = t37 & y6; z2 = t33 & x7; z3 = t43 & y16; z4 = t40 & y1; z5 = t29 & y7; z6 = t42 & y11; z7 = t45 & y17; z8 = t41 & y10; z9 = t44 & y12; z10 = t37 & y3; z11 = t33 & y4; z12 = t43 & y13; z13 = t40 & y5; z14 = t29 & y2; z15 = t42 & y9; z16 = t45 & y14; z17 = t41 & y8; /* * Bottom linear transformation. */ t46 = z15 ^ z16; t47 = z10 ^ z11; t48 = z5 ^ z13; t49 = z9 ^ z10; t50 = z2 ^ z12; t51 = z2 ^ z5; t52 = z7 ^ z8; t53 = z0 ^ z3; t54 = z6 ^ z7; t55 = z16 ^ z17; t56 = z12 ^ t48; t57 = t50 ^ t53; t58 = z4 ^ t46; t59 = z3 ^ t54; t60 = t46 ^ t57; t61 = z14 ^ t57; t62 = t52 ^ t58; t63 = t49 ^ t58; t64 = z4 ^ t59; t65 = t61 ^ t62; t66 = z1 ^ t63; s0 = t59 ^ t63; s6 = t56 ^ ~t62; s7 = t48 ^ ~t60; t67 = t64 ^ t65; s3 = t53 ^ t66; s4 = t51 ^ t66; s5 = t47 ^ t65; s1 = t64 ^ ~s3; s2 = t55 ^ ~t67; q[7] = s0; q[6] = s1; q[5] = s2; q[4] = s3; q[3] = s4; q[2] = s5; q[1] = s6; q[0] = s7; } static void br_aes_ct_ortho(uint32_t *q) { #define SWAPN_32(cl, ch, s, x, y) do { \ uint32_t a, b; \ a = (x); \ b = (y); \ (x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \ (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \ } while (0) #define SWAP2_32(x, y) SWAPN_32(0x55555555, 0xAAAAAAAA, 1, x, y) #define SWAP4_32(x, y) SWAPN_32(0x33333333, 0xCCCCCCCC, 2, x, y) #define SWAP8_32(x, y) SWAPN_32(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y) SWAP2_32(q[0], q[1]); SWAP2_32(q[2], q[3]); SWAP2_32(q[4], q[5]); SWAP2_32(q[6], q[7]); SWAP4_32(q[0], q[2]); SWAP4_32(q[1], q[3]); SWAP4_32(q[4], q[6]); SWAP4_32(q[5], q[7]); SWAP8_32(q[0], q[4]); SWAP8_32(q[1], q[5]); SWAP8_32(q[2], q[6]); SWAP8_32(q[3], q[7]); } static inline void add_round_key32(uint32_t *q, const uint32_t *sk) { q[0] ^= sk[0]; q[1] ^= sk[1]; q[2] ^= sk[2]; q[3] ^= sk[3]; q[4] ^= sk[4]; q[5] ^= sk[5]; q[6] ^= sk[6]; q[7] ^= sk[7]; } static inline void shift_rows32(uint32_t *q) { int i; for (i = 0; i < 8; i++) { uint32_t x; x = q[i]; q[i] = (x & 0x000000FF) | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6) | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4) | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2); } } static inline uint32_t rotr16(uint32_t x) { return (x << 16) | (x >> 16); } static inline void mix_columns32(uint32_t *q) { uint32_t q0, q1, q2, q3, q4, q5, q6, q7; uint32_t r0, r1, r2, r3, r4, r5, r6, r7; q0 = q[0]; q1 = q[1]; q2 = q[2]; q3 = q[3]; q4 = q[4]; q5 = q[5]; q6 = q[6]; q7 = q[7]; r0 = (q0 >> 8) | (q0 << 24); r1 = (q1 >> 8) | (q1 << 24); r2 = (q2 >> 8) | (q2 << 24); r3 = (q3 >> 8) | (q3 << 24); r4 = (q4 >> 8) | (q4 << 24); r5 = (q5 >> 8) | (q5 << 24); r6 = (q6 >> 8) | (q6 << 24); r7 = (q7 >> 8) | (q7 << 24); q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0); q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1); q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2); q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3); q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4); q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5); q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6); q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7); } static void br_aes_ct64_ortho(uint64_t *q) { #define SWAPN(cl, ch, s, x, y) do { \ uint64_t a, b; \ a = (x); \ b = (y); \ (x) = (a & (uint64_t)(cl)) | ((b & (uint64_t)(cl)) << (s)); \ (y) = ((a & (uint64_t)(ch)) >> (s)) | (b & (uint64_t)(ch)); \ } while (0) #define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y) #define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y) #define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y) SWAP2(q[0], q[1]); SWAP2(q[2], q[3]); SWAP2(q[4], q[5]); SWAP2(q[6], q[7]); SWAP4(q[0], q[2]); SWAP4(q[1], q[3]); SWAP4(q[4], q[6]); SWAP4(q[5], q[7]); SWAP8(q[0], q[4]); SWAP8(q[1], q[5]); SWAP8(q[2], q[6]); SWAP8(q[3], q[7]); } static void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w) { uint64_t x0, x1, x2, x3; x0 = w[0]; x1 = w[1]; x2 = w[2]; x3 = w[3]; x0 |= (x0 << 16); x1 |= (x1 << 16); x2 |= (x2 << 16); x3 |= (x3 << 16); x0 &= (uint64_t)0x0000FFFF0000FFFF; x1 &= (uint64_t)0x0000FFFF0000FFFF; x2 &= (uint64_t)0x0000FFFF0000FFFF; x3 &= (uint64_t)0x0000FFFF0000FFFF; x0 |= (x0 << 8); x1 |= (x1 << 8); x2 |= (x2 << 8); x3 |= (x3 << 8); x0 &= (uint64_t)0x00FF00FF00FF00FF; x1 &= (uint64_t)0x00FF00FF00FF00FF; x2 &= (uint64_t)0x00FF00FF00FF00FF; x3 &= (uint64_t)0x00FF00FF00FF00FF; *q0 = x0 | (x2 << 8); *q1 = x1 | (x3 << 8); } static void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1) { uint64_t x0, x1, x2, x3; x0 = q0 & (uint64_t)0x00FF00FF00FF00FF; x1 = q1 & (uint64_t)0x00FF00FF00FF00FF; x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF; x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF; x0 |= (x0 >> 8); x1 |= (x1 >> 8); x2 |= (x2 >> 8); x3 |= (x3 >> 8); x0 &= (uint64_t)0x0000FFFF0000FFFF; x1 &= (uint64_t)0x0000FFFF0000FFFF; x2 &= (uint64_t)0x0000FFFF0000FFFF; x3 &= (uint64_t)0x0000FFFF0000FFFF; w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16); w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16); w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16); w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16); } static inline void add_round_key(uint64_t *q, const uint64_t *sk) { q[0] ^= sk[0]; q[1] ^= sk[1]; q[2] ^= sk[2]; q[3] ^= sk[3]; q[4] ^= sk[4]; q[5] ^= sk[5]; q[6] ^= sk[6]; q[7] ^= sk[7]; } static inline void shift_rows(uint64_t *q) { int i; for (i = 0; i < 8; i++) { uint64_t x; x = q[i]; q[i] = (x & (uint64_t)0x000000000000FFFF) | ((x & (uint64_t)0x00000000FFF00000) >> 4) | ((x & (uint64_t)0x00000000000F0000) << 12) | ((x & (uint64_t)0x0000FF0000000000) >> 8) | ((x & (uint64_t)0x000000FF00000000) << 8) | ((x & (uint64_t)0xF000000000000000) >> 12) | ((x & (uint64_t)0x0FFF000000000000) << 4); } } static inline uint64_t rotr32(uint64_t x) { return (x << 32) | (x >> 32); } static inline void mix_columns(uint64_t *q) { uint64_t q0, q1, q2, q3, q4, q5, q6, q7; uint64_t r0, r1, r2, r3, r4, r5, r6, r7; q0 = q[0]; q1 = q[1]; q2 = q[2]; q3 = q[3]; q4 = q[4]; q5 = q[5]; q6 = q[6]; q7 = q[7]; r0 = (q0 >> 16) | (q0 << 48); r1 = (q1 >> 16) | (q1 << 48); r2 = (q2 >> 16) | (q2 << 48); r3 = (q3 >> 16) | (q3 << 48); r4 = (q4 >> 16) | (q4 << 48); r5 = (q5 >> 16) | (q5 << 48); r6 = (q6 >> 16) | (q6 << 48); r7 = (q7 >> 16) | (q7 << 48); q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0); q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1); q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2); q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3); q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4); q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5); q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6); q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7); } static void interleave_constant(uint64_t *out, const unsigned char *in) { uint32_t tmp_32_constant[16]; int i; br_range_dec32le(tmp_32_constant, 16, in); for (i = 0; i < 4; i++) { br_aes_ct64_interleave_in(&out[i], &out[i + 4], tmp_32_constant + (i << 2)); } br_aes_ct64_ortho(out); } static void interleave_constant32(uint32_t *out, const unsigned char *in) { int i; for (i = 0; i < 4; i++) { out[2*i] = br_dec32le(in + 4*i); out[2*i + 1] = br_dec32le(in + 4*i + 16); } br_aes_ct_ortho(out); } void tweak_constants(spx_ctx *ctx) { unsigned char buf[40*16]; int i; /* Use the standard constants to generate tweaked ones. */ memcpy((uint8_t *)ctx->tweaked512_rc64, (uint8_t *)haraka512_rc64, 40*16); /* Constants for pk.seed */ haraka_S(buf, 40*16, ctx->pub_seed, SPX_N, ctx); for (i = 0; i < 10; i++) { interleave_constant32(ctx->tweaked256_rc32[i], buf + 32*i); interleave_constant(ctx->tweaked512_rc64[i], buf + 64*i); } } static void haraka_S_absorb(unsigned char *s, unsigned int r, const unsigned char *m, unsigned long long mlen, unsigned char p, const spx_ctx *ctx) { unsigned long long i; SPX_VLA(uint8_t, t, r); while (mlen >= r) { /* XOR block to state */ for (i = 0; i < r; ++i) { s[i] ^= m[i]; } haraka512_perm(s, s, ctx); mlen -= r; m += r; } for (i = 0; i < r; ++i) { t[i] = 0; } for (i = 0; i < mlen; ++i) { t[i] = m[i]; } t[i] = p; t[r - 1] |= 128; for (i = 0; i < r; ++i) { s[i] ^= t[i]; } } static void haraka_S_squeezeblocks(unsigned char *h, unsigned long long nblocks, unsigned char *s, unsigned int r, const spx_ctx *ctx) { while (nblocks > 0) { haraka512_perm(s, s, ctx); memcpy(h, s, HARAKAS_RATE); h += r; nblocks--; } } void haraka_S_inc_init(uint8_t *s_inc) { size_t i; for (i = 0; i < 64; i++) { s_inc[i] = 0; } s_inc[64] = 0; } void haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen, const spx_ctx *ctx) { size_t i; /* Recall that s_inc[64] is the non-absorbed bytes xored into the state */ while (mlen + s_inc[64] >= HARAKAS_RATE) { for (i = 0; i < (size_t)(HARAKAS_RATE - s_inc[64]); i++) { /* Take the i'th byte from message xor with the s_inc[64] + i'th byte of the state */ s_inc[s_inc[64] + i] ^= m[i]; } mlen -= (size_t)(HARAKAS_RATE - s_inc[64]); m += HARAKAS_RATE - (uint8_t)s_inc[64]; s_inc[64] = 0; haraka512_perm(s_inc, s_inc, ctx); } for (i = 0; i < mlen; i++) { s_inc[s_inc[64] + i] ^= m[i]; } s_inc[64] += (uint8_t)mlen; } void haraka_S_inc_finalize(uint8_t *s_inc) { /* After haraka_S_inc_absorb, we are guaranteed that s_inc[64] < HARAKAS_RATE, so we can always use one more byte for p in the current state. */ s_inc[s_inc[64]] ^= 0x1F; s_inc[HARAKAS_RATE - 1] ^= 128; s_inc[64] = 0; } void haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc, const spx_ctx *ctx) { size_t i; /* First consume any bytes we still have sitting around */ for (i = 0; i < outlen && i < s_inc[64]; i++) { /* There are s_inc[64] bytes left, so r - s_inc[64] is the first available byte. We consume from there, i.e., up to r. */ out[i] = (uint8_t)s_inc[(HARAKAS_RATE - s_inc[64] + i)]; } out += i; outlen -= i; s_inc[64] -= (uint8_t)i; /* Then squeeze the remaining necessary blocks */ while (outlen > 0) { haraka512_perm(s_inc, s_inc, ctx); for (i = 0; i < outlen && i < HARAKAS_RATE; i++) { out[i] = s_inc[i]; } out += i; outlen -= i; s_inc[64] = (uint8_t)(HARAKAS_RATE - i); } } void haraka_S(unsigned char *out, unsigned long long outlen, const unsigned char *in, unsigned long long inlen, const spx_ctx *ctx) { unsigned long long i; unsigned char s[64]; unsigned char d[32]; for (i = 0; i < 64; i++) { s[i] = 0; } haraka_S_absorb(s, 32, in, inlen, 0x1F, ctx); haraka_S_squeezeblocks(out, outlen / 32, s, 32, ctx); out += (outlen / 32) * 32; if (outlen % 32) { haraka_S_squeezeblocks(d, 1, s, 32, ctx); for (i = 0; i < outlen % 32; i++) { out[i] = d[i]; } } } void haraka512_perm(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { uint32_t w[16]; uint64_t q[8], tmp_q; unsigned int i, j; br_range_dec32le(w, 16, in); for (i = 0; i < 4; i++) { br_aes_ct64_interleave_in(&q[i], &q[i + 4], w + (i << 2)); } br_aes_ct64_ortho(q); /* AES rounds */ for (i = 0; i < 5; i++) { for (j = 0; j < 2; j++) { br_aes_ct64_bitslice_Sbox(q); shift_rows(q); mix_columns(q); add_round_key(q, ctx->tweaked512_rc64[2*i + j]); } /* Mix states */ for (j = 0; j < 8; j++) { tmp_q = q[j]; q[j] = (tmp_q & 0x0001000100010001) << 5 | (tmp_q & 0x0002000200020002) << 12 | (tmp_q & 0x0004000400040004) >> 1 | (tmp_q & 0x0008000800080008) << 6 | (tmp_q & 0x0020002000200020) << 9 | (tmp_q & 0x0040004000400040) >> 4 | (tmp_q & 0x0080008000800080) << 3 | (tmp_q & 0x2100210021002100) >> 5 | (tmp_q & 0x0210021002100210) << 2 | (tmp_q & 0x0800080008000800) << 4 | (tmp_q & 0x1000100010001000) >> 12 | (tmp_q & 0x4000400040004000) >> 10 | (tmp_q & 0x8400840084008400) >> 3; } } br_aes_ct64_ortho(q); for (i = 0; i < 4; i ++) { br_aes_ct64_interleave_out(w + (i << 2), q[i], q[i + 4]); } br_range_enc32le(out, w, 16); } void haraka512(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { int i; unsigned char buf[64]; haraka512_perm(buf, in, ctx); /* Feed-forward */ for (i = 0; i < 64; i++) { buf[i] = buf[i] ^ in[i]; } /* Truncated */ memcpy(out, buf + 8, 8); memcpy(out + 8, buf + 24, 8); memcpy(out + 16, buf + 32, 8); memcpy(out + 24, buf + 48, 8); } void haraka256(unsigned char *out, const unsigned char *in, const spx_ctx *ctx) { uint32_t q[8], tmp_q; int i, j; for (i = 0; i < 4; i++) { q[2*i] = br_dec32le(in + 4*i); q[2*i + 1] = br_dec32le(in + 4*i + 16); } br_aes_ct_ortho(q); /* AES rounds */ for (i = 0; i < 5; i++) { for (j = 0; j < 2; j++) { br_aes_ct_bitslice_Sbox(q); shift_rows32(q); mix_columns32(q); add_round_key32(q, ctx->tweaked256_rc32[2*i + j]); } /* Mix states */ for (j = 0; j < 8; j++) { tmp_q = q[j]; q[j] = (tmp_q & 0x81818181) | (tmp_q & 0x02020202) << 1 | (tmp_q & 0x04040404) << 2 | (tmp_q & 0x08080808) << 3 | (tmp_q & 0x10101010) >> 3 | (tmp_q & 0x20202020) >> 2 | (tmp_q & 0x40404040) >> 1; } } br_aes_ct_ortho(q); for (i = 0; i < 4; i++) { br_enc32le(out + 4*i, q[2*i]); br_enc32le(out + 4*i + 16, q[2*i + 1]); } for (i = 0; i < 32; i++) { out[i] ^= in[i]; } } ================================================ FILE: ref/haraka.h ================================================ #ifndef SPX_HARAKA_H #define SPX_HARAKA_H #include "context.h" /* Tweak constants with seed */ #define tweak_constants SPX_NAMESPACE(tweak_constants) void tweak_constants(spx_ctx *ctx); /* Haraka Sponge */ #define haraka_S_inc_init SPX_NAMESPACE(haraka_S_inc_init) void haraka_S_inc_init(uint8_t *s_inc); #define haraka_S_inc_absorb SPX_NAMESPACE(haraka_S_inc_absorb) void haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen, const spx_ctx *ctx); #define haraka_S_inc_finalize SPX_NAMESPACE(haraka_S_inc_finalize) void haraka_S_inc_finalize(uint8_t *s_inc); #define haraka_S_inc_squeeze SPX_NAMESPACE(haraka_S_inc_squeeze) void haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc, const spx_ctx *ctx); #define haraka_S SPX_NAMESPACE(haraka_S) void haraka_S(unsigned char *out, unsigned long long outlen, const unsigned char *in, unsigned long long inlen, const spx_ctx *ctx); /* Applies the 512-bit Haraka permutation to in. */ #define haraka512_perm SPX_NAMESPACE(haraka512_perm) void haraka512_perm(unsigned char *out, const unsigned char *in, const spx_ctx *ctx); /* Implementation of Haraka-512 */ #define haraka512 SPX_NAMESPACE(haraka512) void haraka512(unsigned char *out, const unsigned char *in, const spx_ctx *ctx); /* Implementation of Haraka-256 */ #define haraka256 SPX_NAMESPACE(haraka256) void haraka256(unsigned char *out, const unsigned char *in, const spx_ctx *ctx); #endif ================================================ FILE: ref/haraka_offsets.h ================================================ #if !defined( HARAKA_OFFSETS_H_ ) #define HARAKA_OFFSETS_H_ /* * Offsets of various fields in the address structure when we use Haraka as * the Sphincs+ hash function */ #define SPX_OFFSET_LAYER 3 /* The byte used to specify the Merkle tree layer */ #define SPX_OFFSET_TREE 8 /* The start of the 8 byte field used to specify the tree */ #define SPX_OFFSET_TYPE 19 /* The byte used to specify the hash type (reason) */ #define SPX_OFFSET_KP_ADDR 20 /* The start of the 4 byte field used to specify the key pair address */ #define SPX_OFFSET_CHAIN_ADDR 27 /* The byte used to specify the chain address (which Winternitz chain) */ #define SPX_OFFSET_HASH_ADDR 31 /* The byte used to specify the hash address (where in the Winternitz chain) */ #define SPX_OFFSET_TREE_HGT 27 /* The byte used to specify the height of this node in the FORS or Merkle tree */ #define SPX_OFFSET_TREE_INDEX 28 /* The start of the 4 byte field used to specify the node in the FORS or Merkle tree */ #define SPX_HARAKA 1 #endif /* HARAKA_OFFSETS_H_ */ ================================================ FILE: ref/hash.h ================================================ #ifndef SPX_HASH_H #define SPX_HASH_H #include #include "context.h" #include "params.h" #define initialize_hash_function SPX_NAMESPACE(initialize_hash_function) void initialize_hash_function(spx_ctx *ctx); #define prf_addr SPX_NAMESPACE(prf_addr) void prf_addr(unsigned char *out, const spx_ctx *ctx, const uint32_t addr[8]); #define gen_message_random SPX_NAMESPACE(gen_message_random) void gen_message_random(unsigned char *R, const unsigned char *sk_prf, const unsigned char *optrand, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx); #define hash_message SPX_NAMESPACE(hash_message) void hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, const unsigned char *R, const unsigned char *pk, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx); #endif ================================================ FILE: ref/hash_haraka.c ================================================ #include #include #include "address.h" #include "utils.h" #include "params.h" #include "haraka.h" #include "hash.h" void initialize_hash_function(spx_ctx* ctx) { tweak_constants(ctx); } /* * Computes PRF(key, addr), given a secret key of SPX_N bytes and an address */ void prf_addr(unsigned char *out, const spx_ctx *ctx, const uint32_t addr[8]) { /* Since SPX_N may be smaller than 32, we need temporary buffers. */ unsigned char outbuf[32]; unsigned char buf[64] = {0}; memcpy(buf, addr, SPX_ADDR_BYTES); memcpy(buf + SPX_ADDR_BYTES, ctx->sk_seed, SPX_N); haraka512(outbuf, (const void *)buf, ctx); memcpy(out, outbuf, SPX_N); } /** * Computes the message-dependent randomness R, using a secret seed and an * optional randomization value as well as the message. */ void gen_message_random(unsigned char *R, const unsigned char* sk_prf, const unsigned char *optrand, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx) { uint8_t s_inc[65]; haraka_S_inc_init(s_inc); haraka_S_inc_absorb(s_inc, sk_prf, SPX_N, ctx); haraka_S_inc_absorb(s_inc, optrand, SPX_N, ctx); haraka_S_inc_absorb(s_inc, m, mlen, ctx); haraka_S_inc_finalize(s_inc); haraka_S_inc_squeeze(R, SPX_N, s_inc, ctx); } /** * Computes the message hash using R, the public key, and the message. * Outputs the message digest and the index of the leaf. The index is split in * the tree index and the leaf index, for convenient copying to an address. */ void hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, const unsigned char *R, const unsigned char *pk, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx) { #define SPX_TREE_BITS (SPX_TREE_HEIGHT * (SPX_D - 1)) #define SPX_TREE_BYTES ((SPX_TREE_BITS + 7) / 8) #define SPX_LEAF_BITS SPX_TREE_HEIGHT #define SPX_LEAF_BYTES ((SPX_LEAF_BITS + 7) / 8) #define SPX_DGST_BYTES (SPX_FORS_MSG_BYTES + SPX_TREE_BYTES + SPX_LEAF_BYTES) unsigned char buf[SPX_DGST_BYTES]; unsigned char *bufp = buf; uint8_t s_inc[65]; haraka_S_inc_init(s_inc); haraka_S_inc_absorb(s_inc, R, SPX_N, ctx); haraka_S_inc_absorb(s_inc, pk + SPX_N, SPX_N, ctx); // Only absorb root part of pk haraka_S_inc_absorb(s_inc, m, mlen, ctx); haraka_S_inc_finalize(s_inc); haraka_S_inc_squeeze(buf, SPX_DGST_BYTES, s_inc, ctx); memcpy(digest, bufp, SPX_FORS_MSG_BYTES); bufp += SPX_FORS_MSG_BYTES; #if SPX_TREE_BITS > 64 #error For given height and depth, 64 bits cannot represent all subtrees #endif if (SPX_D == 1) { *tree = 0; } else { *tree = bytes_to_ull(bufp, SPX_TREE_BYTES); *tree &= (~(uint64_t)0) >> (64 - SPX_TREE_BITS); } bufp += SPX_TREE_BYTES; *leaf_idx = (uint32_t)bytes_to_ull(bufp, SPX_LEAF_BYTES); *leaf_idx &= (~(uint32_t)0) >> (32 - SPX_LEAF_BITS); } ================================================ FILE: ref/hash_sha2.c ================================================ #include #include #include "address.h" #include "utils.h" #include "params.h" #include "hash.h" #include "sha2.h" #if SPX_N >= 24 #define SPX_SHAX_OUTPUT_BYTES SPX_SHA512_OUTPUT_BYTES #define SPX_SHAX_BLOCK_BYTES SPX_SHA512_BLOCK_BYTES #define shaX_inc_init sha512_inc_init #define shaX_inc_blocks sha512_inc_blocks #define shaX_inc_finalize sha512_inc_finalize #define shaX sha512 #define mgf1_X mgf1_512 #else #define SPX_SHAX_OUTPUT_BYTES SPX_SHA256_OUTPUT_BYTES #define SPX_SHAX_BLOCK_BYTES SPX_SHA256_BLOCK_BYTES #define shaX_inc_init sha256_inc_init #define shaX_inc_blocks sha256_inc_blocks #define shaX_inc_finalize sha256_inc_finalize #define shaX sha256 #define mgf1_X mgf1_256 #endif /* For SHA, there is no immediate reason to initialize at the start, so this function is an empty operation. */ void initialize_hash_function(spx_ctx *ctx) { seed_state(ctx); } /* * Computes PRF(pk_seed, sk_seed, addr). */ void prf_addr(unsigned char *out, const spx_ctx *ctx, const uint32_t addr[8]) { uint8_t sha2_state[40]; unsigned char buf[SPX_SHA256_ADDR_BYTES + SPX_N]; unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES]; /* Retrieve precomputed state containing pub_seed */ memcpy(sha2_state, ctx->state_seeded, 40 * sizeof(uint8_t)); /* Remainder: ADDR^c ‖ SK.seed */ memcpy(buf, addr, SPX_SHA256_ADDR_BYTES); memcpy(buf + SPX_SHA256_ADDR_BYTES, ctx->sk_seed, SPX_N); sha256_inc_finalize(outbuf, sha2_state, buf, SPX_SHA256_ADDR_BYTES + SPX_N); memcpy(out, outbuf, SPX_N); } /** * Computes the message-dependent randomness R, using a secret seed as a key * for HMAC, and an optional randomization value prefixed to the message. * This requires m to have at least SPX_SHAX_BLOCK_BYTES + SPX_N space * available in front of the pointer, i.e. before the message to use for the * prefix. This is necessary to prevent having to move the message around (and * allocate memory for it). */ void gen_message_random(unsigned char *R, const unsigned char *sk_prf, const unsigned char *optrand, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx) { (void)ctx; unsigned char buf[SPX_SHAX_BLOCK_BYTES + SPX_SHAX_OUTPUT_BYTES]; uint8_t state[8 + SPX_SHAX_OUTPUT_BYTES]; int i; #if SPX_N > SPX_SHAX_BLOCK_BYTES #error "Currently only supports SPX_N of at most SPX_SHAX_BLOCK_BYTES" #endif /* This implements HMAC-SHA */ for (i = 0; i < SPX_N; i++) { buf[i] = 0x36 ^ sk_prf[i]; } memset(buf + SPX_N, 0x36, SPX_SHAX_BLOCK_BYTES - SPX_N); shaX_inc_init(state); shaX_inc_blocks(state, buf, 1); memcpy(buf, optrand, SPX_N); /* If optrand + message cannot fill up an entire block */ if (SPX_N + mlen < SPX_SHAX_BLOCK_BYTES) { memcpy(buf + SPX_N, m, mlen); shaX_inc_finalize(buf + SPX_SHAX_BLOCK_BYTES, state, buf, mlen + SPX_N); } /* Otherwise first fill a block, so that finalize only uses the message */ else { memcpy(buf + SPX_N, m, SPX_SHAX_BLOCK_BYTES - SPX_N); shaX_inc_blocks(state, buf, 1); m += SPX_SHAX_BLOCK_BYTES - SPX_N; mlen -= SPX_SHAX_BLOCK_BYTES - SPX_N; shaX_inc_finalize(buf + SPX_SHAX_BLOCK_BYTES, state, m, mlen); } for (i = 0; i < SPX_N; i++) { buf[i] = 0x5c ^ sk_prf[i]; } memset(buf + SPX_N, 0x5c, SPX_SHAX_BLOCK_BYTES - SPX_N); shaX(buf, buf, SPX_SHAX_BLOCK_BYTES + SPX_SHAX_OUTPUT_BYTES); memcpy(R, buf, SPX_N); } /** * Computes the message hash using R, the public key, and the message. * Outputs the message digest and the index of the leaf. The index is split in * the tree index and the leaf index, for convenient copying to an address. */ void hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, const unsigned char *R, const unsigned char *pk, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx) { (void)ctx; #define SPX_TREE_BITS (SPX_TREE_HEIGHT * (SPX_D - 1)) #define SPX_TREE_BYTES ((SPX_TREE_BITS + 7) / 8) #define SPX_LEAF_BITS SPX_TREE_HEIGHT #define SPX_LEAF_BYTES ((SPX_LEAF_BITS + 7) / 8) #define SPX_DGST_BYTES (SPX_FORS_MSG_BYTES + SPX_TREE_BYTES + SPX_LEAF_BYTES) unsigned char seed[2*SPX_N + SPX_SHAX_OUTPUT_BYTES]; /* Round to nearest multiple of SPX_SHAX_BLOCK_BYTES */ #if (SPX_SHAX_BLOCK_BYTES & (SPX_SHAX_BLOCK_BYTES-1)) != 0 #error "Assumes that SPX_SHAX_BLOCK_BYTES is a power of 2" #endif #define SPX_INBLOCKS (((SPX_N + SPX_PK_BYTES + SPX_SHAX_BLOCK_BYTES - 1) & \ -SPX_SHAX_BLOCK_BYTES) / SPX_SHAX_BLOCK_BYTES) unsigned char inbuf[SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES]; unsigned char buf[SPX_DGST_BYTES]; unsigned char *bufp = buf; uint8_t state[8 + SPX_SHAX_OUTPUT_BYTES]; shaX_inc_init(state); // seed: SHA-X(R ‖ PK.seed ‖ PK.root ‖ M) memcpy(inbuf, R, SPX_N); memcpy(inbuf + SPX_N, pk, SPX_PK_BYTES); /* If R + pk + message cannot fill up an entire block */ if (SPX_N + SPX_PK_BYTES + mlen < SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES) { memcpy(inbuf + SPX_N + SPX_PK_BYTES, m, mlen); shaX_inc_finalize(seed + 2*SPX_N, state, inbuf, SPX_N + SPX_PK_BYTES + mlen); } /* Otherwise first fill a block, so that finalize only uses the message */ else { memcpy(inbuf + SPX_N + SPX_PK_BYTES, m, SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES - SPX_N - SPX_PK_BYTES); shaX_inc_blocks(state, inbuf, SPX_INBLOCKS); m += SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES - SPX_N - SPX_PK_BYTES; mlen -= SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES - SPX_N - SPX_PK_BYTES; shaX_inc_finalize(seed + 2*SPX_N, state, m, mlen); } // H_msg: MGF1-SHA-X(R ‖ PK.seed ‖ seed) memcpy(seed, R, SPX_N); memcpy(seed + SPX_N, pk, SPX_N); /* By doing this in two steps, we prevent hashing the message twice; otherwise each iteration in MGF1 would hash the message again. */ mgf1_X(bufp, SPX_DGST_BYTES, seed, 2*SPX_N + SPX_SHAX_OUTPUT_BYTES); memcpy(digest, bufp, SPX_FORS_MSG_BYTES); bufp += SPX_FORS_MSG_BYTES; #if SPX_TREE_BITS > 64 #error For given height and depth, 64 bits cannot represent all subtrees #endif if (SPX_D == 1) { *tree = 0; } else { *tree = bytes_to_ull(bufp, SPX_TREE_BYTES); *tree &= (~(uint64_t)0) >> (64 - SPX_TREE_BITS); } bufp += SPX_TREE_BYTES; *leaf_idx = (uint32_t)bytes_to_ull(bufp, SPX_LEAF_BYTES); *leaf_idx &= (~(uint32_t)0) >> (32 - SPX_LEAF_BITS); } ================================================ FILE: ref/hash_shake.c ================================================ #include #include #include "address.h" #include "utils.h" #include "params.h" #include "hash.h" #include "fips202.h" /* For SHAKE256, there is no immediate reason to initialize at the start, so this function is an empty operation. */ void initialize_hash_function(spx_ctx* ctx) { (void)ctx; /* Suppress an 'unused parameter' warning. */ } /* * Computes PRF(pk_seed, sk_seed, addr) */ void prf_addr(unsigned char *out, const spx_ctx *ctx, const uint32_t addr[8]) { unsigned char buf[2*SPX_N + SPX_ADDR_BYTES]; memcpy(buf, ctx->pub_seed, SPX_N); memcpy(buf + SPX_N, addr, SPX_ADDR_BYTES); memcpy(buf + SPX_N + SPX_ADDR_BYTES, ctx->sk_seed, SPX_N); shake256(out, SPX_N, buf, 2*SPX_N + SPX_ADDR_BYTES); } /** * Computes the message-dependent randomness R, using a secret seed and an * optional randomization value as well as the message. */ void gen_message_random(unsigned char *R, const unsigned char *sk_prf, const unsigned char *optrand, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx) { (void)ctx; uint64_t s_inc[26]; shake256_inc_init(s_inc); shake256_inc_absorb(s_inc, sk_prf, SPX_N); shake256_inc_absorb(s_inc, optrand, SPX_N); shake256_inc_absorb(s_inc, m, mlen); shake256_inc_finalize(s_inc); shake256_inc_squeeze(R, SPX_N, s_inc); } /** * Computes the message hash using R, the public key, and the message. * Outputs the message digest and the index of the leaf. The index is split in * the tree index and the leaf index, for convenient copying to an address. */ void hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, const unsigned char *R, const unsigned char *pk, const unsigned char *m, unsigned long long mlen, const spx_ctx *ctx) { (void)ctx; #define SPX_TREE_BITS (SPX_TREE_HEIGHT * (SPX_D - 1)) #define SPX_TREE_BYTES ((SPX_TREE_BITS + 7) / 8) #define SPX_LEAF_BITS SPX_TREE_HEIGHT #define SPX_LEAF_BYTES ((SPX_LEAF_BITS + 7) / 8) #define SPX_DGST_BYTES (SPX_FORS_MSG_BYTES + SPX_TREE_BYTES + SPX_LEAF_BYTES) unsigned char buf[SPX_DGST_BYTES]; unsigned char *bufp = buf; uint64_t s_inc[26]; shake256_inc_init(s_inc); shake256_inc_absorb(s_inc, R, SPX_N); shake256_inc_absorb(s_inc, pk, SPX_PK_BYTES); shake256_inc_absorb(s_inc, m, mlen); shake256_inc_finalize(s_inc); shake256_inc_squeeze(buf, SPX_DGST_BYTES, s_inc); memcpy(digest, bufp, SPX_FORS_MSG_BYTES); bufp += SPX_FORS_MSG_BYTES; #if SPX_TREE_BITS > 64 #error For given height and depth, 64 bits cannot represent all subtrees #endif if (SPX_D == 1) { *tree = 0; } else { *tree = bytes_to_ull(bufp, SPX_TREE_BYTES); *tree &= (~(uint64_t)0) >> (64 - SPX_TREE_BITS); } bufp += SPX_TREE_BYTES; *leaf_idx = (uint32_t)bytes_to_ull(bufp, SPX_LEAF_BYTES); *leaf_idx &= (~(uint32_t)0) >> (32 - SPX_LEAF_BITS); } ================================================ FILE: ref/merkle.c ================================================ #include #include #include "utils.h" #include "utilsx1.h" #include "wots.h" #include "wotsx1.h" #include "merkle.h" #include "address.h" #include "params.h" /* * This generates a Merkle signature (WOTS signature followed by the Merkle * authentication path). This is in this file because most of the complexity * is involved with the WOTS signature; the Merkle authentication path logic * is mostly hidden in treehashx4 */ void merkle_sign(uint8_t *sig, unsigned char *root, const spx_ctx *ctx, uint32_t wots_addr[8], uint32_t tree_addr[8], uint32_t idx_leaf) { unsigned char *auth_path = sig + SPX_WOTS_BYTES; struct leaf_info_x1 info = { 0 }; unsigned steps[ SPX_WOTS_LEN ]; info.wots_sig = sig; chain_lengths(steps, root); info.wots_steps = steps; set_type(&tree_addr[0], SPX_ADDR_TYPE_HASHTREE); set_type(&info.pk_addr[0], SPX_ADDR_TYPE_WOTSPK); copy_subtree_addr(&info.leaf_addr[0], wots_addr); copy_subtree_addr(&info.pk_addr[0], wots_addr); info.wots_sign_leaf = idx_leaf; treehashx1(root, auth_path, ctx, idx_leaf, 0, SPX_TREE_HEIGHT, wots_gen_leafx1, tree_addr, &info); } /* Compute root node of the top-most subtree. */ void merkle_gen_root(unsigned char *root, const spx_ctx *ctx) { /* We do not need the auth path in key generation, but it simplifies the code to have just one treehash routine that computes both root and path in one function. */ unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES]; uint32_t top_tree_addr[8] = {0}; uint32_t wots_addr[8] = {0}; set_layer_addr(top_tree_addr, SPX_D - 1); set_layer_addr(wots_addr, SPX_D - 1); merkle_sign(auth_path, root, ctx, wots_addr, top_tree_addr, (uint32_t)~0 /* ~0 means "don't bother generating an auth path */ ); } ================================================ FILE: ref/merkle.h ================================================ #if !defined( MERKLE_H_ ) #define MERKLE_H_ #include /* Generate a Merkle signature (WOTS signature followed by the Merkle */ /* authentication path) */ #define merkle_sign SPX_NAMESPACE(merkle_sign) void merkle_sign(uint8_t *sig, unsigned char *root, const spx_ctx* ctx, uint32_t wots_addr[8], uint32_t tree_addr[8], uint32_t idx_leaf); /* Compute the root node of the top-most subtree. */ #define merkle_gen_root SPX_NAMESPACE(merkle_gen_root) void merkle_gen_root(unsigned char *root, const spx_ctx* ctx); #endif /* MERKLE_H_ */ ================================================ FILE: ref/params/params-sphincs-haraka-128f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 16 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 66 /* Number of subtree layer. */ #define SPX_D 22 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 6 #define SPX_FORS_TREES 33 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../haraka_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-haraka-128s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 16 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 63 /* Number of subtree layer. */ #define SPX_D 7 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 12 #define SPX_FORS_TREES 14 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../haraka_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-haraka-192f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 24 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 66 /* Number of subtree layer. */ #define SPX_D 22 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 8 #define SPX_FORS_TREES 33 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../haraka_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-haraka-192s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 24 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 63 /* Number of subtree layer. */ #define SPX_D 7 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 14 #define SPX_FORS_TREES 17 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../haraka_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-haraka-256f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 32 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 68 /* Number of subtree layer. */ #define SPX_D 17 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 9 #define SPX_FORS_TREES 35 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../haraka_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-haraka-256s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 32 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 64 /* Number of subtree layer. */ #define SPX_D 8 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 14 #define SPX_FORS_TREES 22 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../haraka_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-sha2-128f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 16 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 66 /* Number of subtree layer. */ #define SPX_D 22 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 6 #define SPX_FORS_TREES 33 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* This is a SHA2-based parameter set, hence whether we use SHA-256 * exclusively or we use both SHA-256 and SHA-512 is controlled by * the following #define */ #define SPX_SHA512 0 /* Use SHA-256 for all hashes */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../sha2_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-sha2-128s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 16 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 63 /* Number of subtree layer. */ #define SPX_D 7 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 12 #define SPX_FORS_TREES 14 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* This is a SHA2-based parameter set, hence whether we use SHA-256 * exclusively or we use both SHA-256 and SHA-512 is controlled by * the following #define */ #define SPX_SHA512 0 /* Use SHA-256 for all hashes */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../sha2_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-sha2-192f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 24 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 66 /* Number of subtree layer. */ #define SPX_D 22 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 8 #define SPX_FORS_TREES 33 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* This is a SHA2-based parameter set, hence whether we use SHA-256 * exclusively or we use both SHA-256 and SHA-512 is controlled by * the following #define */ #define SPX_SHA512 1 /* Use SHA-512 for H and T_l, l >= 2 */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../sha2_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-sha2-192s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 24 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 63 /* Number of subtree layer. */ #define SPX_D 7 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 14 #define SPX_FORS_TREES 17 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* This is a SHA2-based parameter set, hence whether we use SHA-256 * exclusively or we use both SHA-256 and SHA-512 is controlled by * the following #define */ #define SPX_SHA512 1 /* Use SHA-512 for H and T_l, l >= 2 */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../sha2_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-sha2-256f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 32 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 68 /* Number of subtree layer. */ #define SPX_D 17 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 9 #define SPX_FORS_TREES 35 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* This is a SHA2-based parameter set, hence whether we use SHA-256 * exclusively or we use both SHA-256 and SHA-512 is controlled by * the following #define */ #define SPX_SHA512 1 /* Use SHA-512 for H and T_l, l >= 2 */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../sha2_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-sha2-256s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 32 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 64 /* Number of subtree layer. */ #define SPX_D 8 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 14 #define SPX_FORS_TREES 22 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* This is a SHA2-based parameter set, hence whether we use SHA-256 * exclusively or we use both SHA-256 and SHA-512 is controlled by * the following #define */ #define SPX_SHA512 1 /* Use SHA-512 for H and T_l, l >= 2 */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../sha2_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-shake-128f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 16 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 66 /* Number of subtree layer. */ #define SPX_D 22 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 6 #define SPX_FORS_TREES 33 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../shake_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-shake-128s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 16 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 63 /* Number of subtree layer. */ #define SPX_D 7 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 12 #define SPX_FORS_TREES 14 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../shake_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-shake-192f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 24 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 66 /* Number of subtree layer. */ #define SPX_D 22 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 8 #define SPX_FORS_TREES 33 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../shake_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-shake-192s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 24 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 63 /* Number of subtree layer. */ #define SPX_D 7 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 14 #define SPX_FORS_TREES 17 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../shake_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-shake-256f.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 32 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 68 /* Number of subtree layer. */ #define SPX_D 17 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 9 #define SPX_FORS_TREES 35 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../shake_offsets.h" #endif ================================================ FILE: ref/params/params-sphincs-shake-256s.h ================================================ #ifndef SPX_PARAMS_H #define SPX_PARAMS_H #define SPX_NAMESPACE(s) SPX_##s /* Hash output length in bytes. */ #define SPX_N 32 /* Height of the hypertree. */ #define SPX_FULL_HEIGHT 64 /* Number of subtree layer. */ #define SPX_D 8 /* FORS tree dimensions. */ #define SPX_FORS_HEIGHT 14 #define SPX_FORS_TREES 22 /* Winternitz parameter, */ #define SPX_WOTS_W 16 /* The hash function is defined by linking a different hash.c file, as opposed to setting a #define constant. */ /* For clarity */ #define SPX_ADDR_BYTES 32 /* WOTS parameters. */ #if SPX_WOTS_W == 256 #define SPX_WOTS_LOGW 8 #elif SPX_WOTS_W == 16 #define SPX_WOTS_LOGW 4 #else #error SPX_WOTS_W assumed 16 or 256 #endif #define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW) /* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ #if SPX_WOTS_W == 256 #if SPX_N <= 1 #define SPX_WOTS_LEN2 1 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 2 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #elif SPX_WOTS_W == 16 #if SPX_N <= 8 #define SPX_WOTS_LEN2 2 #elif SPX_N <= 136 #define SPX_WOTS_LEN2 3 #elif SPX_N <= 256 #define SPX_WOTS_LEN2 4 #else #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256} #endif #endif #define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2) #define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N) #define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES /* Subtree size. */ #define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D) #if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT #error SPX_D should always divide SPX_FULL_HEIGHT #endif /* FORS parameters. */ #define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8) #define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N) #define SPX_FORS_PK_BYTES SPX_N /* Resulting SPX sizes. */ #define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\ SPX_FULL_HEIGHT * SPX_N) #define SPX_PK_BYTES (2 * SPX_N) #define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES) #include "../shake_offsets.h" #endif ================================================ FILE: ref/params.h ================================================ #define str(s) #s #define xstr(s) str(s) #include xstr(params/params-PARAMS.h) ================================================ FILE: ref/randombytes.c ================================================ /* This code was taken from the SPHINCS reference implementation and is public domain. */ #include #include #include "randombytes.h" static int fd = -1; void randombytes(unsigned char *x, unsigned long long xlen) { unsigned long long i; if (fd == -1) { for (;;) { fd = open("/dev/urandom", O_RDONLY); if (fd != -1) { break; } sleep(1); } } while (xlen > 0) { if (xlen < 1048576) { i = xlen; } else { i = 1048576; } i = (unsigned long long)read(fd, x, i); if (i < 1) { sleep(1); continue; } x += i; xlen -= i; } } ================================================ FILE: ref/randombytes.h ================================================ #ifndef SPX_RANDOMBYTES_H #define SPX_RANDOMBYTES_H extern void randombytes(unsigned char * x,unsigned long long xlen); #endif ================================================ FILE: ref/rng.c ================================================ // // rng.c // // Created by Bassham, Lawrence E (Fed) on 8/29/17. // Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. // #include #include "rng.h" #include #include #include AES256_CTR_DRBG_struct DRBG_ctx; void AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer); /* seedexpander_init() ctx - stores the current state of an instance of the seed expander seed - a 32 byte random value diversifier - an 8 byte diversifier maxlen - maximum number of bytes (less than 2**32) generated under this seed and diversifier */ int seedexpander_init(AES_XOF_struct *ctx, unsigned char *seed, unsigned char *diversifier, unsigned long maxlen) { if ( maxlen >= 0x100000000 ) return RNG_BAD_MAXLEN; ctx->length_remaining = maxlen; memcpy(ctx->key, seed, 32); memcpy(ctx->ctr, diversifier, 8); ctx->ctr[11] = (unsigned char)(maxlen % 256); maxlen >>= 8; ctx->ctr[10] = (unsigned char)(maxlen % 256); maxlen >>= 8; ctx->ctr[9] = (unsigned char)(maxlen % 256); maxlen >>= 8; ctx->ctr[8] = (unsigned char)(maxlen % 256); memset(ctx->ctr+12, 0x00, 4); ctx->buffer_pos = 16; memset(ctx->buffer, 0x00, 16); return RNG_SUCCESS; } /* seedexpander() ctx - stores the current state of an instance of the seed expander x - returns the XOF data xlen - number of bytes to return */ int seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen) { unsigned long offset; if ( x == NULL ) return RNG_BAD_OUTBUF; if ( xlen >= ctx->length_remaining ) return RNG_BAD_REQ_LEN; ctx->length_remaining -= xlen; offset = 0; while ( xlen > 0 ) { if ( xlen <= (16-ctx->buffer_pos) ) { // buffer has what we need memcpy(x+offset, ctx->buffer+ctx->buffer_pos, xlen); ctx->buffer_pos += xlen; return RNG_SUCCESS; } // take what's in the buffer memcpy(x+offset, ctx->buffer+ctx->buffer_pos, 16-ctx->buffer_pos); xlen -= 16-ctx->buffer_pos; offset += 16-ctx->buffer_pos; AES256_ECB(ctx->key, ctx->ctr, ctx->buffer); ctx->buffer_pos = 0; //increment the counter for (int i=15; i>=12; i--) { if ( ctx->ctr[i] == 0xff ) ctx->ctr[i] = 0x00; else { ctx->ctr[i]++; break; } } } return RNG_SUCCESS; } static void handleErrors(void) { ERR_print_errors_fp(stderr); abort(); } // Use whatever AES implementation you have. This uses AES from openSSL library // key - 256-bit AES key // ctr - a 128-bit plaintext value // buffer - a 128-bit ciphertext value void AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer) { EVP_CIPHER_CTX *ctx; int len; /* Create and initialise the context */ if(!(ctx = EVP_CIPHER_CTX_new())) handleErrors(); if(1 != EVP_EncryptInit_ex(ctx, EVP_aes_256_ecb(), NULL, key, NULL)) handleErrors(); if(1 != EVP_EncryptUpdate(ctx, buffer, &len, ctr, 16)) handleErrors(); /* Clean up */ EVP_CIPHER_CTX_free(ctx); } void randombytes_init(unsigned char *entropy_input, unsigned char *personalization_string) { unsigned char seed_material[48]; memcpy(seed_material, entropy_input, 48); if (personalization_string) for (int i=0; i<48; i++) seed_material[i] ^= personalization_string[i]; memset(DRBG_ctx.Key, 0x00, 32); memset(DRBG_ctx.V, 0x00, 16); AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V); DRBG_ctx.reseed_counter = 1; } int randombytes(unsigned char *x, unsigned long long xlen) { unsigned char block[16]; int i = 0; while ( xlen > 0 ) { //increment V for (int j=15; j>=0; j--) { if ( DRBG_ctx.V[j] == 0xff ) DRBG_ctx.V[j] = 0x00; else { DRBG_ctx.V[j]++; break; } } AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block); if ( xlen > 15 ) { memcpy(x+i, block, 16); i += 16; xlen -= 16; } else { memcpy(x+i, block, xlen); xlen = 0; } } AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V); DRBG_ctx.reseed_counter++; return RNG_SUCCESS; } void AES256_CTR_DRBG_Update(unsigned char *provided_data, unsigned char *Key, unsigned char *V) { unsigned char temp[48]; for (int i=0; i<3; i++) { //increment V for (int j=15; j>=0; j--) { if ( V[j] == 0xff ) V[j] = 0x00; else { V[j]++; break; } } AES256_ECB(Key, V, temp+16*i); } if ( provided_data != NULL ) for (int i=0; i<48; i++) temp[i] ^= provided_data[i]; memcpy(Key, temp, 32); memcpy(V, temp+32, 16); } ================================================ FILE: ref/rng.h ================================================ // // rng.h // // Created by Bassham, Lawrence E (Fed) on 8/29/17. // Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. // #ifndef rng_h #define rng_h #include #define RNG_SUCCESS 0 #define RNG_BAD_MAXLEN -1 #define RNG_BAD_OUTBUF -2 #define RNG_BAD_REQ_LEN -3 typedef struct { unsigned char buffer[16]; unsigned long buffer_pos; unsigned long length_remaining; unsigned char key[32]; unsigned char ctr[16]; } AES_XOF_struct; typedef struct { unsigned char Key[32]; unsigned char V[16]; int reseed_counter; } AES256_CTR_DRBG_struct; void AES256_CTR_DRBG_Update(unsigned char *provided_data, unsigned char *Key, unsigned char *V); int seedexpander_init(AES_XOF_struct *ctx, unsigned char *seed, unsigned char *diversifier, unsigned long maxlen); int seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen); void randombytes_init(unsigned char *entropy_input, unsigned char *personalization_string); int randombytes(unsigned char *x, unsigned long long xlen); #endif /* rng_h */ ================================================ FILE: ref/sha2.c ================================================ /* Based on the public domain implementation in * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html * by D. J. Bernstein */ #include #include #include #include "utils.h" #include "sha2.h" static uint32_t load_bigendian_32(const uint8_t *x) { return (uint32_t)(x[3]) | (((uint32_t)(x[2])) << 8) | (((uint32_t)(x[1])) << 16) | (((uint32_t)(x[0])) << 24); } static uint64_t load_bigendian_64(const uint8_t *x) { return (uint64_t)(x[7]) | (((uint64_t)(x[6])) << 8) | (((uint64_t)(x[5])) << 16) | (((uint64_t)(x[4])) << 24) | (((uint64_t)(x[3])) << 32) | (((uint64_t)(x[2])) << 40) | (((uint64_t)(x[1])) << 48) | (((uint64_t)(x[0])) << 56); } static void store_bigendian_32(uint8_t *x, uint64_t u) { x[3] = (uint8_t) u; u >>= 8; x[2] = (uint8_t) u; u >>= 8; x[1] = (uint8_t) u; u >>= 8; x[0] = (uint8_t) u; } static void store_bigendian_64(uint8_t *x, uint64_t u) { x[7] = (uint8_t) u; u >>= 8; x[6] = (uint8_t) u; u >>= 8; x[5] = (uint8_t) u; u >>= 8; x[4] = (uint8_t) u; u >>= 8; x[3] = (uint8_t) u; u >>= 8; x[2] = (uint8_t) u; u >>= 8; x[1] = (uint8_t) u; u >>= 8; x[0] = (uint8_t) u; } #define SHR(x, c) ((x) >> (c)) #define ROTR_32(x, c) (((x) >> (c)) | ((x) << (32 - (c)))) #define ROTR_64(x,c) (((x) >> (c)) | ((x) << (64 - (c)))) #define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) #define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) #define Sigma0_32(x) (ROTR_32(x, 2) ^ ROTR_32(x,13) ^ ROTR_32(x,22)) #define Sigma1_32(x) (ROTR_32(x, 6) ^ ROTR_32(x,11) ^ ROTR_32(x,25)) #define sigma0_32(x) (ROTR_32(x, 7) ^ ROTR_32(x,18) ^ SHR(x, 3)) #define sigma1_32(x) (ROTR_32(x,17) ^ ROTR_32(x,19) ^ SHR(x,10)) #define Sigma0_64(x) (ROTR_64(x,28) ^ ROTR_64(x,34) ^ ROTR_64(x,39)) #define Sigma1_64(x) (ROTR_64(x,14) ^ ROTR_64(x,18) ^ ROTR_64(x,41)) #define sigma0_64(x) (ROTR_64(x, 1) ^ ROTR_64(x, 8) ^ SHR(x,7)) #define sigma1_64(x) (ROTR_64(x,19) ^ ROTR_64(x,61) ^ SHR(x,6)) #define M_32(w0, w14, w9, w1) w0 = sigma1_32(w14) + (w9) + sigma0_32(w1) + (w0); #define M_64(w0, w14, w9, w1) w0 = sigma1_64(w14) + (w9) + sigma0_64(w1) + (w0); #define EXPAND_32 \ M_32(w0, w14, w9, w1) \ M_32(w1, w15, w10, w2) \ M_32(w2, w0, w11, w3) \ M_32(w3, w1, w12, w4) \ M_32(w4, w2, w13, w5) \ M_32(w5, w3, w14, w6) \ M_32(w6, w4, w15, w7) \ M_32(w7, w5, w0, w8) \ M_32(w8, w6, w1, w9) \ M_32(w9, w7, w2, w10) \ M_32(w10, w8, w3, w11) \ M_32(w11, w9, w4, w12) \ M_32(w12, w10, w5, w13) \ M_32(w13, w11, w6, w14) \ M_32(w14, w12, w7, w15) \ M_32(w15, w13, w8, w0) #define EXPAND_64 \ M_64(w0 ,w14,w9 ,w1 ) \ M_64(w1 ,w15,w10,w2 ) \ M_64(w2 ,w0 ,w11,w3 ) \ M_64(w3 ,w1 ,w12,w4 ) \ M_64(w4 ,w2 ,w13,w5 ) \ M_64(w5 ,w3 ,w14,w6 ) \ M_64(w6 ,w4 ,w15,w7 ) \ M_64(w7 ,w5 ,w0 ,w8 ) \ M_64(w8 ,w6 ,w1 ,w9 ) \ M_64(w9 ,w7 ,w2 ,w10) \ M_64(w10,w8 ,w3 ,w11) \ M_64(w11,w9 ,w4 ,w12) \ M_64(w12,w10,w5 ,w13) \ M_64(w13,w11,w6 ,w14) \ M_64(w14,w12,w7 ,w15) \ M_64(w15,w13,w8 ,w0 ) #define F_32(w, k) \ T1 = h + Sigma1_32(e) + Ch(e, f, g) + (k) + (w); \ T2 = Sigma0_32(a) + Maj(a, b, c); \ h = g; \ g = f; \ f = e; \ e = d + T1; \ d = c; \ c = b; \ b = a; \ a = T1 + T2; #define F_64(w,k) \ T1 = h + Sigma1_64(e) + Ch(e,f,g) + k + w; \ T2 = Sigma0_64(a) + Maj(a,b,c); \ h = g; \ g = f; \ f = e; \ e = d + T1; \ d = c; \ c = b; \ b = a; \ a = T1 + T2; static size_t crypto_hashblocks_sha256(uint8_t *statebytes, const uint8_t *in, size_t inlen) { uint32_t state[8]; uint32_t a; uint32_t b; uint32_t c; uint32_t d; uint32_t e; uint32_t f; uint32_t g; uint32_t h; uint32_t T1; uint32_t T2; a = load_bigendian_32(statebytes + 0); state[0] = a; b = load_bigendian_32(statebytes + 4); state[1] = b; c = load_bigendian_32(statebytes + 8); state[2] = c; d = load_bigendian_32(statebytes + 12); state[3] = d; e = load_bigendian_32(statebytes + 16); state[4] = e; f = load_bigendian_32(statebytes + 20); state[5] = f; g = load_bigendian_32(statebytes + 24); state[6] = g; h = load_bigendian_32(statebytes + 28); state[7] = h; while (inlen >= 64) { uint32_t w0 = load_bigendian_32(in + 0); uint32_t w1 = load_bigendian_32(in + 4); uint32_t w2 = load_bigendian_32(in + 8); uint32_t w3 = load_bigendian_32(in + 12); uint32_t w4 = load_bigendian_32(in + 16); uint32_t w5 = load_bigendian_32(in + 20); uint32_t w6 = load_bigendian_32(in + 24); uint32_t w7 = load_bigendian_32(in + 28); uint32_t w8 = load_bigendian_32(in + 32); uint32_t w9 = load_bigendian_32(in + 36); uint32_t w10 = load_bigendian_32(in + 40); uint32_t w11 = load_bigendian_32(in + 44); uint32_t w12 = load_bigendian_32(in + 48); uint32_t w13 = load_bigendian_32(in + 52); uint32_t w14 = load_bigendian_32(in + 56); uint32_t w15 = load_bigendian_32(in + 60); F_32(w0, 0x428a2f98) F_32(w1, 0x71374491) F_32(w2, 0xb5c0fbcf) F_32(w3, 0xe9b5dba5) F_32(w4, 0x3956c25b) F_32(w5, 0x59f111f1) F_32(w6, 0x923f82a4) F_32(w7, 0xab1c5ed5) F_32(w8, 0xd807aa98) F_32(w9, 0x12835b01) F_32(w10, 0x243185be) F_32(w11, 0x550c7dc3) F_32(w12, 0x72be5d74) F_32(w13, 0x80deb1fe) F_32(w14, 0x9bdc06a7) F_32(w15, 0xc19bf174) EXPAND_32 F_32(w0, 0xe49b69c1) F_32(w1, 0xefbe4786) F_32(w2, 0x0fc19dc6) F_32(w3, 0x240ca1cc) F_32(w4, 0x2de92c6f) F_32(w5, 0x4a7484aa) F_32(w6, 0x5cb0a9dc) F_32(w7, 0x76f988da) F_32(w8, 0x983e5152) F_32(w9, 0xa831c66d) F_32(w10, 0xb00327c8) F_32(w11, 0xbf597fc7) F_32(w12, 0xc6e00bf3) F_32(w13, 0xd5a79147) F_32(w14, 0x06ca6351) F_32(w15, 0x14292967) EXPAND_32 F_32(w0, 0x27b70a85) F_32(w1, 0x2e1b2138) F_32(w2, 0x4d2c6dfc) F_32(w3, 0x53380d13) F_32(w4, 0x650a7354) F_32(w5, 0x766a0abb) F_32(w6, 0x81c2c92e) F_32(w7, 0x92722c85) F_32(w8, 0xa2bfe8a1) F_32(w9, 0xa81a664b) F_32(w10, 0xc24b8b70) F_32(w11, 0xc76c51a3) F_32(w12, 0xd192e819) F_32(w13, 0xd6990624) F_32(w14, 0xf40e3585) F_32(w15, 0x106aa070) EXPAND_32 F_32(w0, 0x19a4c116) F_32(w1, 0x1e376c08) F_32(w2, 0x2748774c) F_32(w3, 0x34b0bcb5) F_32(w4, 0x391c0cb3) F_32(w5, 0x4ed8aa4a) F_32(w6, 0x5b9cca4f) F_32(w7, 0x682e6ff3) F_32(w8, 0x748f82ee) F_32(w9, 0x78a5636f) F_32(w10, 0x84c87814) F_32(w11, 0x8cc70208) F_32(w12, 0x90befffa) F_32(w13, 0xa4506ceb) F_32(w14, 0xbef9a3f7) F_32(w15, 0xc67178f2) a += state[0]; b += state[1]; c += state[2]; d += state[3]; e += state[4]; f += state[5]; g += state[6]; h += state[7]; state[0] = a; state[1] = b; state[2] = c; state[3] = d; state[4] = e; state[5] = f; state[6] = g; state[7] = h; in += 64; inlen -= 64; } store_bigendian_32(statebytes + 0, state[0]); store_bigendian_32(statebytes + 4, state[1]); store_bigendian_32(statebytes + 8, state[2]); store_bigendian_32(statebytes + 12, state[3]); store_bigendian_32(statebytes + 16, state[4]); store_bigendian_32(statebytes + 20, state[5]); store_bigendian_32(statebytes + 24, state[6]); store_bigendian_32(statebytes + 28, state[7]); return inlen; } static int crypto_hashblocks_sha512(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) { uint64_t state[8]; uint64_t a; uint64_t b; uint64_t c; uint64_t d; uint64_t e; uint64_t f; uint64_t g; uint64_t h; uint64_t T1; uint64_t T2; a = load_bigendian_64(statebytes + 0); state[0] = a; b = load_bigendian_64(statebytes + 8); state[1] = b; c = load_bigendian_64(statebytes + 16); state[2] = c; d = load_bigendian_64(statebytes + 24); state[3] = d; e = load_bigendian_64(statebytes + 32); state[4] = e; f = load_bigendian_64(statebytes + 40); state[5] = f; g = load_bigendian_64(statebytes + 48); state[6] = g; h = load_bigendian_64(statebytes + 56); state[7] = h; while (inlen >= 128) { uint64_t w0 = load_bigendian_64(in + 0); uint64_t w1 = load_bigendian_64(in + 8); uint64_t w2 = load_bigendian_64(in + 16); uint64_t w3 = load_bigendian_64(in + 24); uint64_t w4 = load_bigendian_64(in + 32); uint64_t w5 = load_bigendian_64(in + 40); uint64_t w6 = load_bigendian_64(in + 48); uint64_t w7 = load_bigendian_64(in + 56); uint64_t w8 = load_bigendian_64(in + 64); uint64_t w9 = load_bigendian_64(in + 72); uint64_t w10 = load_bigendian_64(in + 80); uint64_t w11 = load_bigendian_64(in + 88); uint64_t w12 = load_bigendian_64(in + 96); uint64_t w13 = load_bigendian_64(in + 104); uint64_t w14 = load_bigendian_64(in + 112); uint64_t w15 = load_bigendian_64(in + 120); F_64(w0 ,0x428a2f98d728ae22ULL) F_64(w1 ,0x7137449123ef65cdULL) F_64(w2 ,0xb5c0fbcfec4d3b2fULL) F_64(w3 ,0xe9b5dba58189dbbcULL) F_64(w4 ,0x3956c25bf348b538ULL) F_64(w5 ,0x59f111f1b605d019ULL) F_64(w6 ,0x923f82a4af194f9bULL) F_64(w7 ,0xab1c5ed5da6d8118ULL) F_64(w8 ,0xd807aa98a3030242ULL) F_64(w9 ,0x12835b0145706fbeULL) F_64(w10,0x243185be4ee4b28cULL) F_64(w11,0x550c7dc3d5ffb4e2ULL) F_64(w12,0x72be5d74f27b896fULL) F_64(w13,0x80deb1fe3b1696b1ULL) F_64(w14,0x9bdc06a725c71235ULL) F_64(w15,0xc19bf174cf692694ULL) EXPAND_64 F_64(w0 ,0xe49b69c19ef14ad2ULL) F_64(w1 ,0xefbe4786384f25e3ULL) F_64(w2 ,0x0fc19dc68b8cd5b5ULL) F_64(w3 ,0x240ca1cc77ac9c65ULL) F_64(w4 ,0x2de92c6f592b0275ULL) F_64(w5 ,0x4a7484aa6ea6e483ULL) F_64(w6 ,0x5cb0a9dcbd41fbd4ULL) F_64(w7 ,0x76f988da831153b5ULL) F_64(w8 ,0x983e5152ee66dfabULL) F_64(w9 ,0xa831c66d2db43210ULL) F_64(w10,0xb00327c898fb213fULL) F_64(w11,0xbf597fc7beef0ee4ULL) F_64(w12,0xc6e00bf33da88fc2ULL) F_64(w13,0xd5a79147930aa725ULL) F_64(w14,0x06ca6351e003826fULL) F_64(w15,0x142929670a0e6e70ULL) EXPAND_64 F_64(w0 ,0x27b70a8546d22ffcULL) F_64(w1 ,0x2e1b21385c26c926ULL) F_64(w2 ,0x4d2c6dfc5ac42aedULL) F_64(w3 ,0x53380d139d95b3dfULL) F_64(w4 ,0x650a73548baf63deULL) F_64(w5 ,0x766a0abb3c77b2a8ULL) F_64(w6 ,0x81c2c92e47edaee6ULL) F_64(w7 ,0x92722c851482353bULL) F_64(w8 ,0xa2bfe8a14cf10364ULL) F_64(w9 ,0xa81a664bbc423001ULL) F_64(w10,0xc24b8b70d0f89791ULL) F_64(w11,0xc76c51a30654be30ULL) F_64(w12,0xd192e819d6ef5218ULL) F_64(w13,0xd69906245565a910ULL) F_64(w14,0xf40e35855771202aULL) F_64(w15,0x106aa07032bbd1b8ULL) EXPAND_64 F_64(w0 ,0x19a4c116b8d2d0c8ULL) F_64(w1 ,0x1e376c085141ab53ULL) F_64(w2 ,0x2748774cdf8eeb99ULL) F_64(w3 ,0x34b0bcb5e19b48a8ULL) F_64(w4 ,0x391c0cb3c5c95a63ULL) F_64(w5 ,0x4ed8aa4ae3418acbULL) F_64(w6 ,0x5b9cca4f7763e373ULL) F_64(w7 ,0x682e6ff3d6b2b8a3ULL) F_64(w8 ,0x748f82ee5defb2fcULL) F_64(w9 ,0x78a5636f43172f60ULL) F_64(w10,0x84c87814a1f0ab72ULL) F_64(w11,0x8cc702081a6439ecULL) F_64(w12,0x90befffa23631e28ULL) F_64(w13,0xa4506cebde82bde9ULL) F_64(w14,0xbef9a3f7b2c67915ULL) F_64(w15,0xc67178f2e372532bULL) EXPAND_64 F_64(w0 ,0xca273eceea26619cULL) F_64(w1 ,0xd186b8c721c0c207ULL) F_64(w2 ,0xeada7dd6cde0eb1eULL) F_64(w3 ,0xf57d4f7fee6ed178ULL) F_64(w4 ,0x06f067aa72176fbaULL) F_64(w5 ,0x0a637dc5a2c898a6ULL) F_64(w6 ,0x113f9804bef90daeULL) F_64(w7 ,0x1b710b35131c471bULL) F_64(w8 ,0x28db77f523047d84ULL) F_64(w9 ,0x32caab7b40c72493ULL) F_64(w10,0x3c9ebe0a15c9bebcULL) F_64(w11,0x431d67c49c100d4cULL) F_64(w12,0x4cc5d4becb3e42b6ULL) F_64(w13,0x597f299cfc657e2aULL) F_64(w14,0x5fcb6fab3ad6faecULL) F_64(w15,0x6c44198c4a475817ULL) a += state[0]; b += state[1]; c += state[2]; d += state[3]; e += state[4]; f += state[5]; g += state[6]; h += state[7]; state[0] = a; state[1] = b; state[2] = c; state[3] = d; state[4] = e; state[5] = f; state[6] = g; state[7] = h; in += 128; inlen -= 128; } store_bigendian_64(statebytes + 0,state[0]); store_bigendian_64(statebytes + 8,state[1]); store_bigendian_64(statebytes + 16,state[2]); store_bigendian_64(statebytes + 24,state[3]); store_bigendian_64(statebytes + 32,state[4]); store_bigendian_64(statebytes + 40,state[5]); store_bigendian_64(statebytes + 48,state[6]); store_bigendian_64(statebytes + 56,state[7]); return inlen; } static const uint8_t iv_256[32] = { 0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85, 0x3c, 0x6e, 0xf3, 0x72, 0xa5, 0x4f, 0xf5, 0x3a, 0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, 0x68, 0x8c, 0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19 }; static const uint8_t iv_512[64] = { 0x6a, 0x09, 0xe6, 0x67, 0xf3, 0xbc, 0xc9, 0x08, 0xbb, 0x67, 0xae, 0x85, 0x84, 0xca, 0xa7, 0x3b, 0x3c, 0x6e, 0xf3, 0x72, 0xfe, 0x94, 0xf8, 0x2b, 0xa5, 0x4f, 0xf5, 0x3a, 0x5f, 0x1d, 0x36, 0xf1, 0x51, 0x0e, 0x52, 0x7f, 0xad, 0xe6, 0x82, 0xd1, 0x9b, 0x05, 0x68, 0x8c, 0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd, 0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, 0x7e, 0x21, 0x79 }; void sha256_inc_init(uint8_t *state) { for (size_t i = 0; i < 32; ++i) { state[i] = iv_256[i]; } for (size_t i = 32; i < 40; ++i) { state[i] = 0; } } void sha512_inc_init(uint8_t *state) { for (size_t i = 0; i < 64; ++i) { state[i] = iv_512[i]; } for (size_t i = 64; i < 72; ++i) { state[i] = 0; } } void sha256_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks) { uint64_t bytes = load_bigendian_64(state + 32); crypto_hashblocks_sha256(state, in, 64 * inblocks); bytes += 64 * inblocks; store_bigendian_64(state + 32, bytes); } void sha512_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks) { uint64_t bytes = load_bigendian_64(state + 64); crypto_hashblocks_sha512(state, in, 128 * inblocks); bytes += 128 * inblocks; store_bigendian_64(state + 64, bytes); } void sha256_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen) { uint8_t padded[128]; uint64_t bytes = load_bigendian_64(state + 32) + inlen; crypto_hashblocks_sha256(state, in, inlen); in += inlen; inlen &= 63; in -= inlen; for (size_t i = 0; i < inlen; ++i) { padded[i] = in[i]; } padded[inlen] = 0x80; if (inlen < 56) { for (size_t i = inlen + 1; i < 56; ++i) { padded[i] = 0; } padded[56] = (uint8_t) (bytes >> 53); padded[57] = (uint8_t) (bytes >> 45); padded[58] = (uint8_t) (bytes >> 37); padded[59] = (uint8_t) (bytes >> 29); padded[60] = (uint8_t) (bytes >> 21); padded[61] = (uint8_t) (bytes >> 13); padded[62] = (uint8_t) (bytes >> 5); padded[63] = (uint8_t) (bytes << 3); crypto_hashblocks_sha256(state, padded, 64); } else { for (size_t i = inlen + 1; i < 120; ++i) { padded[i] = 0; } padded[120] = (uint8_t) (bytes >> 53); padded[121] = (uint8_t) (bytes >> 45); padded[122] = (uint8_t) (bytes >> 37); padded[123] = (uint8_t) (bytes >> 29); padded[124] = (uint8_t) (bytes >> 21); padded[125] = (uint8_t) (bytes >> 13); padded[126] = (uint8_t) (bytes >> 5); padded[127] = (uint8_t) (bytes << 3); crypto_hashblocks_sha256(state, padded, 128); } for (size_t i = 0; i < 32; ++i) { out[i] = state[i]; } } void sha512_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen) { uint8_t padded[256]; uint64_t bytes = load_bigendian_64(state + 64) + inlen; crypto_hashblocks_sha512(state, in, inlen); in += inlen; inlen &= 127; in -= inlen; for (size_t i = 0; i < inlen; ++i) { padded[i] = in[i]; } padded[inlen] = 0x80; if (inlen < 112) { for (size_t i = inlen + 1; i < 119; ++i) { padded[i] = 0; } padded[119] = (uint8_t) (bytes >> 61); padded[120] = (uint8_t) (bytes >> 53); padded[121] = (uint8_t) (bytes >> 45); padded[122] = (uint8_t) (bytes >> 37); padded[123] = (uint8_t) (bytes >> 29); padded[124] = (uint8_t) (bytes >> 21); padded[125] = (uint8_t) (bytes >> 13); padded[126] = (uint8_t) (bytes >> 5); padded[127] = (uint8_t) (bytes << 3); crypto_hashblocks_sha512(state, padded, 128); } else { for (size_t i = inlen + 1; i < 247; ++i) { padded[i] = 0; } padded[247] = (uint8_t) (bytes >> 61); padded[248] = (uint8_t) (bytes >> 53); padded[249] = (uint8_t) (bytes >> 45); padded[250] = (uint8_t) (bytes >> 37); padded[251] = (uint8_t) (bytes >> 29); padded[252] = (uint8_t) (bytes >> 21); padded[253] = (uint8_t) (bytes >> 13); padded[254] = (uint8_t) (bytes >> 5); padded[255] = (uint8_t) (bytes << 3); crypto_hashblocks_sha512(state, padded, 256); } for (size_t i = 0; i < 64; ++i) { out[i] = state[i]; } } void sha256(uint8_t *out, const uint8_t *in, size_t inlen) { uint8_t state[40]; sha256_inc_init(state); sha256_inc_finalize(out, state, in, inlen); } void sha512(uint8_t *out, const uint8_t *in, size_t inlen) { uint8_t state[72]; sha512_inc_init(state); sha512_inc_finalize(out, state, in, inlen); } /** * mgf1 function based on the SHA-256 hash function * Note that inlen should be sufficiently small that it still allows for * an array to be allocated on the stack. Typically 'in' is merely a seed. * Outputs outlen number of bytes */ void mgf1_256(unsigned char *out, unsigned long outlen, const unsigned char *in, unsigned long inlen) { SPX_VLA(uint8_t, inbuf, inlen+4); unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES]; unsigned long i; memcpy(inbuf, in, inlen); /* While we can fit in at least another full block of SHA256 output.. */ for (i = 0; (i+1)*SPX_SHA256_OUTPUT_BYTES <= outlen; i++) { u32_to_bytes(inbuf + inlen, i); sha256(out, inbuf, inlen + 4); out += SPX_SHA256_OUTPUT_BYTES; } /* Until we cannot anymore, and we fill the remainder. */ if (outlen > i*SPX_SHA256_OUTPUT_BYTES) { u32_to_bytes(inbuf + inlen, i); sha256(outbuf, inbuf, inlen + 4); memcpy(out, outbuf, outlen - i*SPX_SHA256_OUTPUT_BYTES); } } /* * mgf1 function based on the SHA-512 hash function */ void mgf1_512(unsigned char *out, unsigned long outlen, const unsigned char *in, unsigned long inlen) { SPX_VLA(uint8_t, inbuf, inlen+4); unsigned char outbuf[SPX_SHA512_OUTPUT_BYTES]; unsigned long i; memcpy(inbuf, in, inlen); /* While we can fit in at least another full block of SHA512 output.. */ for (i = 0; (i+1)*SPX_SHA512_OUTPUT_BYTES <= outlen; i++) { u32_to_bytes(inbuf + inlen, i); sha512(out, inbuf, inlen + 4); out += SPX_SHA512_OUTPUT_BYTES; } /* Until we cannot anymore, and we fill the remainder. */ if (outlen > i*SPX_SHA512_OUTPUT_BYTES) { u32_to_bytes(inbuf + inlen, i); sha512(outbuf, inbuf, inlen + 4); memcpy(out, outbuf, outlen - i*SPX_SHA512_OUTPUT_BYTES); } } /** * Absorb the constant pub_seed using one round of the compression function * This initializes state_seeded and state_seeded_512, which can then be * reused in thash **/ void seed_state(spx_ctx *ctx) { uint8_t block[SPX_SHA512_BLOCK_BYTES]; size_t i; for (i = 0; i < SPX_N; ++i) { block[i] = ctx->pub_seed[i]; } for (i = SPX_N; i < SPX_SHA512_BLOCK_BYTES; ++i) { block[i] = 0; } /* block has been properly initialized for both SHA-256 and SHA-512 */ sha256_inc_init(ctx->state_seeded); sha256_inc_blocks(ctx->state_seeded, block, 1); #if SPX_SHA512 sha512_inc_init(ctx->state_seeded_512); sha512_inc_blocks(ctx->state_seeded_512, block, 1); #endif } ================================================ FILE: ref/sha2.h ================================================ #ifndef SPX_SHA2_H #define SPX_SHA2_H #include "params.h" #define SPX_SHA256_BLOCK_BYTES 64 #define SPX_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal SPX_N */ #define SPX_SHA512_BLOCK_BYTES 128 #define SPX_SHA512_OUTPUT_BYTES 64 #if SPX_SHA256_OUTPUT_BYTES < SPX_N #error Linking against SHA-256 with N larger than 32 bytes is not supported #endif #define SPX_SHA256_ADDR_BYTES 22 #include #include void sha256_inc_init(uint8_t *state); void sha256_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks); void sha256_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen); void sha256(uint8_t *out, const uint8_t *in, size_t inlen); void sha512_inc_init(uint8_t *state); void sha512_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks); void sha512_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen); void sha512(uint8_t *out, const uint8_t *in, size_t inlen); #define mgf1_256 SPX_NAMESPACE(mgf1_256) void mgf1_256(unsigned char *out, unsigned long outlen, const unsigned char *in, unsigned long inlen); #define mgf1_512 SPX_NAMESPACE(mgf1_512) void mgf1_512(unsigned char *out, unsigned long outlen, const unsigned char *in, unsigned long inlen); #define seed_state SPX_NAMESPACE(seed_state) void seed_state(spx_ctx *ctx); #endif ================================================ FILE: ref/sha2_offsets.h ================================================ #ifndef SHA2_OFFSETS_H_ #define SHA2_OFFSETS_H_ /* * Offsets of various fields in the address structure when we use SHA2 as * the Sphincs+ hash function */ #define SPX_OFFSET_LAYER 0 /* The byte used to specify the Merkle tree layer */ #define SPX_OFFSET_TREE 1 /* The start of the 8 byte field used to specify the tree */ #define SPX_OFFSET_TYPE 9 /* The byte used to specify the hash type (reason) */ #define SPX_OFFSET_KP_ADDR 10 /* The start of the 4 byte field used to specify the key pair address */ #define SPX_OFFSET_CHAIN_ADDR 17 /* The byte used to specify the chain address (which Winternitz chain) */ #define SPX_OFFSET_HASH_ADDR 21 /* The byte used to specify the hash address (where in the Winternitz chain) */ #define SPX_OFFSET_TREE_HGT 17 /* The byte used to specify the height of this node in the FORS or Merkle tree */ #define SPX_OFFSET_TREE_INDEX 18 /* The start of the 4 byte field used to specify the node in the FORS or Merkle tree */ #define SPX_SHA2 1 #endif /* SHA2_OFFSETS_H_ */ ================================================ FILE: ref/shake_offsets.h ================================================ #if !defined( SHAKE_OFFSETS_H_ ) #define SHAKE_OFFSETS_H_ /* * Offsets of various fields in the address structure when we use SHAKE as * the Sphincs+ hash function */ #define SPX_OFFSET_LAYER 3 /* The byte used to specify the Merkle tree layer */ #define SPX_OFFSET_TREE 8 /* The start of the 8 byte field used to specify the tree */ #define SPX_OFFSET_TYPE 19 /* The byte used to specify the hash type (reason) */ #define SPX_OFFSET_KP_ADDR 20 /* The start of the 4 byte field used to specify the key pair address */ #define SPX_OFFSET_CHAIN_ADDR 27 /* The byte used to specify the chain address (which Winternitz chain) */ #define SPX_OFFSET_HASH_ADDR 31 /* The byte used to specify the hash address (where in the Winternitz chain) */ #define SPX_OFFSET_TREE_HGT 27 /* The byte used to specify the height of this node in the FORS or Merkle tree */ #define SPX_OFFSET_TREE_INDEX 28 /* The start of the 4 byte field used to specify the node in the FORS or Merkle tree */ #define SPX_SHAKE 1 #endif /* SHAKE_OFFSETS_H_ */ ================================================ FILE: ref/sign.c ================================================ #include #include #include #include "api.h" #include "params.h" #include "wots.h" #include "fors.h" #include "hash.h" #include "thash.h" #include "address.h" #include "randombytes.h" #include "utils.h" #include "merkle.h" /* * Returns the length of a secret key, in bytes */ unsigned long long crypto_sign_secretkeybytes(void) { return CRYPTO_SECRETKEYBYTES; } /* * Returns the length of a public key, in bytes */ unsigned long long crypto_sign_publickeybytes(void) { return CRYPTO_PUBLICKEYBYTES; } /* * Returns the length of a signature, in bytes */ unsigned long long crypto_sign_bytes(void) { return CRYPTO_BYTES; } /* * Returns the length of the seed required to generate a key pair, in bytes */ unsigned long long crypto_sign_seedbytes(void) { return CRYPTO_SEEDBYTES; } /* * Generates an SPX key pair given a seed of length * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] * Format pk: [PUB_SEED || root] */ int crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk, const unsigned char *seed) { spx_ctx ctx; /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ memcpy(sk, seed, CRYPTO_SEEDBYTES); memcpy(pk, sk + 2*SPX_N, SPX_N); memcpy(ctx.pub_seed, pk, SPX_N); memcpy(ctx.sk_seed, sk, SPX_N); /* This hook allows the hash function instantiation to do whatever preparation or computation it needs, based on the public seed. */ initialize_hash_function(&ctx); /* Compute root node of the top-most subtree. */ merkle_gen_root(sk + 3*SPX_N, &ctx); memcpy(pk + SPX_N, sk + 3*SPX_N, SPX_N); return 0; } /* * Generates an SPX key pair. * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] * Format pk: [PUB_SEED || root] */ int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { unsigned char seed[CRYPTO_SEEDBYTES]; randombytes(seed, CRYPTO_SEEDBYTES); crypto_sign_seed_keypair(pk, sk, seed); return 0; } /** * Returns an array containing a detached signature. */ int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { spx_ctx ctx; const unsigned char *sk_prf = sk + SPX_N; const unsigned char *pk = sk + 2*SPX_N; unsigned char optrand[SPX_N]; unsigned char mhash[SPX_FORS_MSG_BYTES]; unsigned char root[SPX_N]; uint32_t i; uint64_t tree; uint32_t idx_leaf; uint32_t wots_addr[8] = {0}; uint32_t tree_addr[8] = {0}; memcpy(ctx.sk_seed, sk, SPX_N); memcpy(ctx.pub_seed, pk, SPX_N); /* This hook allows the hash function instantiation to do whatever preparation or computation it needs, based on the public seed. */ initialize_hash_function(&ctx); set_type(wots_addr, SPX_ADDR_TYPE_WOTS); set_type(tree_addr, SPX_ADDR_TYPE_HASHTREE); /* Optionally, signing can be made non-deterministic using optrand. This can help counter side-channel attacks that would benefit from getting a large number of traces when the signer uses the same nodes. */ randombytes(optrand, SPX_N); /* Compute the digest randomization value. */ gen_message_random(sig, sk_prf, optrand, m, mlen, &ctx); /* Derive the message digest and leaf index from R, PK and M. */ hash_message(mhash, &tree, &idx_leaf, sig, pk, m, mlen, &ctx); sig += SPX_N; set_tree_addr(wots_addr, tree); set_keypair_addr(wots_addr, idx_leaf); /* Sign the message hash using FORS. */ fors_sign(sig, root, mhash, &ctx, wots_addr); sig += SPX_FORS_BYTES; for (i = 0; i < SPX_D; i++) { set_layer_addr(tree_addr, i); set_tree_addr(tree_addr, tree); copy_subtree_addr(wots_addr, tree_addr); set_keypair_addr(wots_addr, idx_leaf); merkle_sign(sig, root, &ctx, wots_addr, tree_addr, idx_leaf); sig += SPX_WOTS_BYTES + SPX_TREE_HEIGHT * SPX_N; /* Update the indices for the next layer. */ idx_leaf = (tree & ((1 << SPX_TREE_HEIGHT)-1)); tree = tree >> SPX_TREE_HEIGHT; } *siglen = SPX_BYTES; return 0; } /** * Verifies a detached signature and message under a given public key. */ int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { spx_ctx ctx; const unsigned char *pub_root = pk + SPX_N; unsigned char mhash[SPX_FORS_MSG_BYTES]; unsigned char wots_pk[SPX_WOTS_BYTES]; unsigned char root[SPX_N]; unsigned char leaf[SPX_N]; unsigned int i; uint64_t tree; uint32_t idx_leaf; uint32_t wots_addr[8] = {0}; uint32_t tree_addr[8] = {0}; uint32_t wots_pk_addr[8] = {0}; if (siglen != SPX_BYTES) { return -1; } memcpy(ctx.pub_seed, pk, SPX_N); /* This hook allows the hash function instantiation to do whatever preparation or computation it needs, based on the public seed. */ initialize_hash_function(&ctx); set_type(wots_addr, SPX_ADDR_TYPE_WOTS); set_type(tree_addr, SPX_ADDR_TYPE_HASHTREE); set_type(wots_pk_addr, SPX_ADDR_TYPE_WOTSPK); /* Derive the message digest and leaf index from R || PK || M. */ /* The additional SPX_N is a result of the hash domain separator. */ hash_message(mhash, &tree, &idx_leaf, sig, pk, m, mlen, &ctx); sig += SPX_N; /* Layer correctly defaults to 0, so no need to set_layer_addr */ set_tree_addr(wots_addr, tree); set_keypair_addr(wots_addr, idx_leaf); fors_pk_from_sig(root, sig, mhash, &ctx, wots_addr); sig += SPX_FORS_BYTES; /* For each subtree.. */ for (i = 0; i < SPX_D; i++) { set_layer_addr(tree_addr, i); set_tree_addr(tree_addr, tree); copy_subtree_addr(wots_addr, tree_addr); set_keypair_addr(wots_addr, idx_leaf); copy_keypair_addr(wots_pk_addr, wots_addr); /* The WOTS public key is only correct if the signature was correct. */ /* Initially, root is the FORS pk, but on subsequent iterations it is the root of the subtree below the currently processed subtree. */ wots_pk_from_sig(wots_pk, sig, root, &ctx, wots_addr); sig += SPX_WOTS_BYTES; /* Compute the leaf node using the WOTS public key. */ thash(leaf, wots_pk, SPX_WOTS_LEN, &ctx, wots_pk_addr); /* Compute the root node of this subtree. */ compute_root(root, leaf, idx_leaf, 0, sig, SPX_TREE_HEIGHT, &ctx, tree_addr); sig += SPX_TREE_HEIGHT * SPX_N; /* Update the indices for the next layer. */ idx_leaf = (tree & ((1 << SPX_TREE_HEIGHT)-1)); tree = tree >> SPX_TREE_HEIGHT; } /* Check if the root node equals the root node in the public key. */ if (memcmp(root, pub_root, SPX_N)) { return -1; } return 0; } /** * Returns an array containing the signature followed by the message. */ int crypto_sign(unsigned char *sm, unsigned long long *smlen, const unsigned char *m, unsigned long long mlen, const unsigned char *sk) { size_t siglen; crypto_sign_signature(sm, &siglen, m, (size_t)mlen, sk); memmove(sm + SPX_BYTES, m, mlen); *smlen = siglen + mlen; return 0; } /** * Verifies a given signature-message pair under a given public key. */ int crypto_sign_open(unsigned char *m, unsigned long long *mlen, const unsigned char *sm, unsigned long long smlen, const unsigned char *pk) { /* The API caller does not necessarily know what size a signature should be but SPHINCS+ signatures are always exactly SPX_BYTES. */ if (smlen < SPX_BYTES) { memset(m, 0, smlen); *mlen = 0; return -1; } *mlen = smlen - SPX_BYTES; if (crypto_sign_verify(sm, SPX_BYTES, sm + SPX_BYTES, (size_t)*mlen, pk)) { memset(m, 0, smlen); *mlen = 0; return -1; } /* If verification was successful, move the message to the right place. */ memmove(m, sm + SPX_BYTES, *mlen); return 0; } ================================================ FILE: ref/test/benchmark.c ================================================ #define _POSIX_C_SOURCE 199309L #include #include #include #include "../thash.h" #include "../api.h" #include "../fors.h" #include "../wotsx1.h" #include "../params.h" #include "../randombytes.h" #include "cycles.h" #define SPX_MLEN 32 #define NTESTS 10 static void wots_gen_pkx1(unsigned char *pk, const spx_ctx* ctx, uint32_t addr[8]); static int cmp_llu(const void *a, const void*b) { if(*(unsigned long long *)a < *(unsigned long long *)b) return -1; if(*(unsigned long long *)a > *(unsigned long long *)b) return 1; return 0; } static unsigned long long median(unsigned long long *l, size_t llen) { qsort(l,llen,sizeof(unsigned long long),cmp_llu); if(llen%2) return l[llen/2]; else return (l[llen/2-1]+l[llen/2])/2; } static void delta(unsigned long long *l, size_t llen) { unsigned int i; for(i = 0; i < llen - 1; i++) { l[i] = l[i+1] - l[i]; } } static void printfcomma (unsigned long long n) { if (n < 1000) { printf("%llu", n); return; } printfcomma(n / 1000); printf (",%03llu", n % 1000); } static void printfalignedcomma (unsigned long long n, int len) { unsigned long long ncopy = n; int i = 0; while (ncopy > 9) { len -= 1; ncopy /= 10; i += 1; // to account for commas } i = i/3 - 1; // to account for commas for (; i < len; i++) { printf(" "); } printfcomma(n); } static void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul) { unsigned long long med; result /= NTESTS; delta(l, NTESTS + 1); med = median(l, llen); printf("avg. %11.2lf us (%2.2lf sec); median ", result, result / 1e6); printfalignedcomma(med, 12); printf(" cycles, %5llux: ", mul); printfalignedcomma(mul*med, 12); printf(" cycles\n"); } #define MEASURE_GENERIC(TEXT, MUL, FNCALL, CORR)\ printf(TEXT);\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\ for(i = 0; i < NTESTS; i++) {\ t[i] = cpucycles() / CORR;\ FNCALL;\ }\ t[NTESTS] = cpucycles();\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\ result = ((stop.tv_sec - start.tv_sec) * 1e6 + \ (stop.tv_nsec - start.tv_nsec) / 1e3) / (double)CORR;\ display_result(result, t, NTESTS, MUL); #define MEASURT(TEXT, MUL, FNCALL)\ MEASURE_GENERIC(\ TEXT, MUL,\ do {\ for (int j = 0; j < 1000; j++) {\ FNCALL;\ }\ } while (0);,\ 1000); #define MEASURE(TEXT, MUL, FNCALL) MEASURE_GENERIC(TEXT, MUL, FNCALL, 1) int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); init_cpucycles(); spx_ctx ctx; unsigned char pk[SPX_PK_BYTES]; unsigned char sk[SPX_SK_BYTES]; unsigned char *m = malloc(SPX_MLEN); unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN); unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN); unsigned char fors_pk[SPX_FORS_PK_BYTES]; unsigned char fors_m[SPX_FORS_MSG_BYTES]; unsigned char fors_sig[SPX_FORS_BYTES]; unsigned char addr[SPX_ADDR_BYTES]; unsigned char block[SPX_N]; unsigned char wots_pk[SPX_WOTS_PK_BYTES]; unsigned long long smlen; unsigned long long mlen; unsigned long long t[NTESTS+1]; struct timespec start, stop; double result; int i; randombytes(m, SPX_MLEN); randombytes(addr, SPX_ADDR_BYTES); printf("Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\n", SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES, SPX_WOTS_W); printf("Running %d iterations.\n", NTESTS); MEASURT("thash ", 1, thash(block, block, 1, &ctx, (uint32_t*)addr)); MEASURE("Generating keypair.. ", 1, crypto_sign_keypair(pk, sk)); MEASURE(" - WOTS pk gen.. ", (1 << SPX_TREE_HEIGHT), wots_gen_pkx1(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Signing.. ", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk)); MEASURE(" - FORS signing.. ", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr)); MEASURE(" - WOTS pk gen.. ", SPX_D * (1 << SPX_TREE_HEIGHT), wots_gen_pkx1(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Verifying.. ", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk)); printf("Signature size: %d (%.2f KiB)\n", SPX_BYTES, SPX_BYTES / 1024.0); printf("Public key size: %d (%.2f KiB)\n", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0); printf("Secret key size: %d (%.2f KiB)\n", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0); free(m); free(sm); free(mout); return 0; } static void wots_gen_pkx1(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) { struct leaf_info_x1 leaf; unsigned steps[ SPX_WOTS_LEN ] = { 0 }; INITIALIZE_LEAF_INFO_X1(leaf, addr, steps); wots_gen_leafx1(pk, ctx, 0, &leaf); } ================================================ FILE: ref/test/cycles.c ================================================ #include "cycles.h" #if defined(__aarch64__) && defined(__APPLE__) // Adapted from // https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2021/03/24/ #include #include #include #include #define KPERF_LIST \ F(int, kpc_force_all_ctrs_set, int) \ F(int, kpc_set_counting, uint32_t) \ F(int, kpc_set_thread_counting, uint32_t) \ F(int, kpc_set_config, uint32_t, void *) \ F(uint32_t, kpc_get_counter_count, uint32_t) \ F(uint32_t, kpc_get_config_count, uint32_t) \ F(int, kpc_get_thread_counters, int, unsigned int, void *) #define F(ret, name, ...) \ typedef ret name##proc(__VA_ARGS__); \ static name##proc *name; KPERF_LIST #undef F uint64_t g_counters[10]; uint64_t g_config[10]; static void configure_rdtsc(void) { if (kpc_set_config(3, g_config)) { printf("kpc_set_config failed\n"); return; } if (kpc_force_all_ctrs_set(1)) { printf("kpc_force_all_ctrs_set failed\n"); return; } if (kpc_set_counting(3)) { printf("kpc_set_counting failed\n"); return; } if (kpc_set_thread_counting(3)) { printf("kpc_set_thread_counting failed\n"); return; } } void init_cpucycles(void) { void *kperf = dlopen( "/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY); if (!kperf) { printf("kperf = %p\n", kperf); return; } #define F(ret, name, ...) \ name = (name##proc *)(dlsym(kperf, #name)); \ if (!name) { \ printf("%s = %p\n", #name, (void *)name); \ return; \ } KPERF_LIST #undef F if (kpc_get_counter_count(3) != 10) { printf("wrong fixed counters count\n"); return; } if (kpc_get_config_count(3) != 8) { printf("wrong fixed config count\n"); return; } g_config[0] = 0x02 | 0x20000; g_config[3] = 0x8d | 0x20000; g_config[4] = 0xcb | 0x20000; g_config[5] = 0x8c | 0x20000; configure_rdtsc(); } unsigned long long cpucycles(void) { static int warned = 0; if (kpc_get_thread_counters(0, 10, g_counters)) { if (!warned) { printf("kpc_get_thread_counters failed, run as sudo?\n"); warned = 1; } return 1; } // g_counters[3 + 2] gives you the number of instructions 'decoded' // whereas g_counters[1] might give you the number of instructions 'retired'. return g_counters[0 + 2]; } #else void init_cpucycles(void) { } unsigned long long cpucycles(void) { unsigned long long result; __asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } #endif ================================================ FILE: ref/test/cycles.h ================================================ #ifndef SPX_CYCLES_H #define SPX_CYCLES_H void init_cpucycles(void); unsigned long long cpucycles(void); #endif ================================================ FILE: ref/test/fors.c ================================================ #include #include #include "../context.h" #include "../hash.h" #include "../fors.h" #include "../randombytes.h" #include "../params.h" int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); spx_ctx ctx; unsigned char pk1[SPX_FORS_PK_BYTES]; unsigned char pk2[SPX_FORS_PK_BYTES]; unsigned char sig[SPX_FORS_BYTES]; unsigned char m[SPX_FORS_MSG_BYTES]; uint32_t addr[8] = {0}; randombytes(ctx.sk_seed, SPX_N); randombytes(ctx.pub_seed, SPX_N); randombytes(m, SPX_FORS_MSG_BYTES); randombytes((unsigned char *)addr, 8 * sizeof(uint32_t)); printf("Testing FORS signature and PK derivation.. "); initialize_hash_function(&ctx); fors_sign(sig, pk1, m, &ctx, addr); fors_pk_from_sig(pk2, sig, m, &ctx, addr); if (memcmp(pk1, pk2, SPX_FORS_PK_BYTES)) { printf("failed!\n"); return -1; } printf("successful.\n"); return 0; } ================================================ FILE: ref/test/haraka.c ================================================ #include #include #include #include #include "../haraka.c" #include "../randombytes.h" static int test_haraka_S_incremental(void) { unsigned char input[521]; unsigned char check[521]; unsigned char output[521]; uint8_t s_inc_absorb[65]; uint8_t s_inc_squeeze[65]; uint8_t s_inc_squeeze_all[65]; uint8_t s_inc_both[65]; uint8_t s_combined[64]; int i; int absorbed; int squeezed; int returncode = 0; randombytes(input, 521); haraka_S(check, 521, input, 521); haraka_S_inc_init(s_inc_absorb); absorbed = 0; for (i = 0; i < 521 && absorbed + i <= 521; i++) { haraka_S_inc_absorb(s_inc_absorb, input + absorbed, i); absorbed += i; } haraka_S_inc_absorb(s_inc_absorb, input + absorbed, 521 - absorbed); haraka_S_inc_finalize(s_inc_absorb); memset(s_combined, 0, 64); haraka_S_absorb(s_combined, HARAKAS_RATE, input, 521, 0x1F); if (memcmp(s_inc_absorb, s_combined, 64 * sizeof(uint8_t))) { printf("ERROR haraka_S state after incremental absorb did not match all-at-once absorb.\n"); printf(" Expected: "); for (i = 0; i < 64; i++) { printf("%02X", s_combined[i]); } printf("\n"); printf(" State: "); for (i = 0; i < 64; i++) { printf("%02X", s_inc_absorb[i]); } printf("\n"); returncode = 1; } memcpy(s_inc_both, s_inc_absorb, 65 * sizeof(uint8_t)); haraka_S_squeezeblocks(output, 3, s_inc_absorb, HARAKAS_RATE); if (memcmp(check, output, 3*HARAKAS_RATE)) { printf("ERROR haraka_S incremental absorb did not match haraka_S.\n"); printf(" Expected: "); for (i = 0; i < 3*HARAKAS_RATE; i++) { printf("%02X", check[i]); } printf("\n"); printf(" Received: "); for (i = 0; i < 3*HARAKAS_RATE; i++) { printf("%02X", output[i]); } printf("\n"); returncode = 1; } memset(s_inc_squeeze, 0, 65); haraka_S_absorb(s_inc_squeeze, HARAKAS_RATE, input, 521, 0x1F); s_inc_squeeze[64] = 0; memcpy(s_inc_squeeze_all, s_inc_squeeze, 65 * sizeof(uint8_t)); haraka_S_inc_squeeze(output, 521, s_inc_squeeze_all); if (memcmp(check, output, 521)) { printf("ERROR haraka_S incremental squeeze-all did not match haraka_S.\n"); printf(" Expected: "); for (i = 0; i < 521; i++) { printf("%02X", check[i]); } printf("\n"); printf(" Received: "); for (i = 0; i < 521; i++) { printf("%02X", output[i]); } printf("\n"); returncode = 1; } squeezed = 0; memset(output, 0, 521); for (i = 0; i < 521 && squeezed + i <= 521; i++) { haraka_S_inc_squeeze(output + squeezed, i, s_inc_squeeze); squeezed += i; } haraka_S_inc_squeeze(output + squeezed, 521 - squeezed, s_inc_squeeze); if (memcmp(check, output, 521)) { printf("ERROR haraka_S incremental squeeze did not match haraka_S.\n"); printf(" Expected: "); for (i = 0; i < 521; i++) { printf("%02X", check[i]); } printf("\n"); printf(" Received: "); for (i = 0; i < 521; i++) { printf("%02X", output[i]); } printf("\n"); returncode = 1; } squeezed = 0; memset(output, 0, 521); for (i = 0; i < 521 && squeezed + i <= 521; i++) { haraka_S_inc_squeeze(output + squeezed, i, s_inc_both); squeezed += i; } haraka_S_inc_squeeze(output + squeezed, 521 - squeezed, s_inc_both); if (memcmp(check, output, 521)) { printf("ERROR haraka_S incremental absorb + squeeze did not match haraka_S.\n"); printf(" Expected: "); for (i = 0; i < 521; i++) { printf("%02X", check[i]); } printf("\n"); printf(" Received: "); for (i = 0; i < 521; i++) { printf("%02X", output[i]); } printf("\n"); returncode = 1; } return returncode; } int main(void) { int result = 0; result += test_haraka_S_incremental(); if (result != 0) { puts("Errors occurred"); } return result; } ================================================ FILE: ref/test/spx.c ================================================ #include #include #include #include "../api.h" #include "../params.h" #include "../randombytes.h" #define SPX_MLEN 32 #define SPX_SIGNATURES 1 int main(void) { int ret = 0; int i; /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); unsigned char pk[SPX_PK_BYTES]; unsigned char sk[SPX_SK_BYTES]; unsigned char *m = malloc(SPX_MLEN); unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN); unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN); unsigned long long smlen; unsigned long long mlen; randombytes(m, SPX_MLEN); printf("Generating keypair.. "); if (crypto_sign_keypair(pk, sk)) { printf("failed!\n"); return -1; } printf("successful.\n"); printf("Testing %d signatures.. \n", SPX_SIGNATURES); for (i = 0; i < SPX_SIGNATURES; i++) { printf(" - iteration #%d:\n", i); crypto_sign(sm, &smlen, m, SPX_MLEN, sk); if (smlen != SPX_BYTES + SPX_MLEN) { printf(" X smlen incorrect [%llu != %u]!\n", smlen, SPX_BYTES); ret = -1; } else { printf(" smlen as expected [%llu].\n", smlen); } /* Test if signature is valid. */ if (crypto_sign_open(mout, &mlen, sm, smlen, pk)) { printf(" X verification failed!\n"); ret = -1; } else { printf(" verification succeeded.\n"); } /* Test if the correct message was recovered. */ if (mlen != SPX_MLEN) { printf(" X mlen incorrect [%llu != %u]!\n", mlen, SPX_MLEN); ret = -1; } else { printf(" mlen as expected [%llu].\n", mlen); } if (memcmp(m, mout, SPX_MLEN)) { printf(" X output message incorrect!\n"); ret = -1; } else { printf(" output message as expected.\n"); } /* Test if signature is valid when validating in-place. */ if (crypto_sign_open(sm, &mlen, sm, smlen, pk)) { printf(" X in-place verification failed!\n"); ret = -1; } else { printf(" in-place verification succeeded.\n"); } /* Test if flipping bits invalidates the signature (it should). */ /* Flip the first bit of the message. Should invalidate. */ sm[smlen - 1] ^= 1; if (!crypto_sign_open(mout, &mlen, sm, smlen, pk)) { printf(" X flipping a bit of m DID NOT invalidate signature!\n"); ret = -1; } else { printf(" flipping a bit of m invalidates signature.\n"); } sm[smlen - 1] ^= 1; #ifdef SPX_TEST_INVALIDSIG int j; /* Flip one bit per hash; the signature is entirely hashes. */ for (j = 0; j < (int)(smlen - SPX_MLEN); j += SPX_N) { sm[j] ^= 1; if (!crypto_sign_open(mout, &mlen, sm, smlen, pk)) { printf(" X flipping bit %d DID NOT invalidate sig + m!\n", j); sm[j] ^= 1; ret = -1; break; } sm[j] ^= 1; } if (j >= (int)(smlen - SPX_MLEN)) { printf(" changing any signature hash invalidates signature.\n"); } #endif } free(m); free(sm); free(mout); return ret; } ================================================ FILE: ref/thash.h ================================================ #ifndef SPX_THASH_H #define SPX_THASH_H #include "context.h" #include "params.h" #include #define thash SPX_NAMESPACE(thash) void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]); #endif ================================================ FILE: ref/thash_haraka_robust.c ================================================ #include #include #include "thash.h" #include "address.h" #include "params.h" #include "utils.h" #include "haraka.h" /** * Takes an array of inblocks concatenated arrays of SPX_N bytes. */ void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { SPX_VLA(uint8_t, buf, SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(uint8_t, bitmask, inblocks*SPX_N); unsigned char outbuf[32]; unsigned char buf_tmp[64]; unsigned int i; if (inblocks == 1) { /* F function */ /* Since SPX_N may be smaller than 32, we need a temporary buffer. */ memset(buf_tmp, 0, 64); memcpy(buf_tmp, addr, 32); haraka256(outbuf, buf_tmp, ctx); for (i = 0; i < inblocks * SPX_N; i++) { buf_tmp[SPX_ADDR_BYTES + i] = in[i] ^ outbuf[i]; } haraka512(outbuf, buf_tmp, ctx); memcpy(out, outbuf, SPX_N); } else { /* All other tweakable hashes*/ memcpy(buf, addr, 32); haraka_S(bitmask, inblocks * SPX_N, buf, SPX_ADDR_BYTES, ctx); for (i = 0; i < inblocks * SPX_N; i++) { buf[SPX_ADDR_BYTES + i] = in[i] ^ bitmask[i]; } haraka_S(out, SPX_N, buf, SPX_ADDR_BYTES + inblocks*SPX_N, ctx); } } ================================================ FILE: ref/thash_haraka_simple.c ================================================ #include #include #include "thash.h" #include "address.h" #include "params.h" #include "utils.h" #include "haraka.h" /** * Takes an array of inblocks concatenated arrays of SPX_N bytes. */ void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { SPX_VLA(uint8_t, buf, SPX_ADDR_BYTES + inblocks*SPX_N); unsigned char outbuf[32]; unsigned char buf_tmp[64]; if (inblocks == 1) { /* F function */ /* Since SPX_N may be smaller than 32, we need a temporary buffer. */ memset(buf_tmp, 0, 64); memcpy(buf_tmp, addr, 32); memcpy(buf_tmp + SPX_ADDR_BYTES, in, SPX_N); haraka512(outbuf, buf_tmp, ctx); memcpy(out, outbuf, SPX_N); } else { /* All other tweakable hashes*/ memcpy(buf, addr, 32); memcpy(buf + SPX_ADDR_BYTES, in, inblocks * SPX_N); haraka_S(out, SPX_N, buf, SPX_ADDR_BYTES + inblocks*SPX_N, ctx); } } ================================================ FILE: ref/thash_sha2_robust.c ================================================ #include #include #include "thash.h" #include "address.h" #include "params.h" #include "utils.h" #include "sha2.h" #if SPX_SHA512 static void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]); #endif /** * Takes an array of inblocks concatenated arrays of SPX_N bytes. */ void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { #if SPX_SHA512 if (inblocks > 1) { thash_512(out, in, inblocks, ctx, addr); return; } #endif unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES]; SPX_VLA(uint8_t, bitmask, inblocks * SPX_N); SPX_VLA(uint8_t, buf, SPX_N + SPX_SHA256_OUTPUT_BYTES + inblocks*SPX_N); uint8_t sha2_state[40]; unsigned int i; memcpy(buf, ctx->pub_seed, SPX_N); memcpy(buf + SPX_N, addr, SPX_SHA256_ADDR_BYTES); mgf1_256(bitmask, inblocks * SPX_N, buf, SPX_N + SPX_SHA256_ADDR_BYTES); /* Retrieve precomputed state containing pub_seed */ memcpy(sha2_state, ctx->state_seeded, 40 * sizeof(uint8_t)); for (i = 0; i < inblocks * SPX_N; i++) { buf[SPX_N + SPX_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; } sha256_inc_finalize(outbuf, sha2_state, buf + SPX_N, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); memcpy(out, outbuf, SPX_N); } #if SPX_SHA512 static void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { unsigned char outbuf[SPX_SHA512_OUTPUT_BYTES]; SPX_VLA(uint8_t, bitmask, inblocks * SPX_N); SPX_VLA(uint8_t, buf, SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); uint8_t sha2_state[72]; unsigned int i; memcpy(buf, ctx->pub_seed, SPX_N); memcpy(buf + SPX_N, addr, SPX_SHA256_ADDR_BYTES); mgf1_512(bitmask, inblocks * SPX_N, buf, SPX_N + SPX_SHA256_ADDR_BYTES); /* Retrieve precomputed state containing pub_seed */ memcpy(sha2_state, ctx->state_seeded_512, 72 * sizeof(uint8_t)); for (i = 0; i < inblocks * SPX_N; i++) { buf[SPX_N + SPX_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; } sha512_inc_finalize(outbuf, sha2_state, buf + SPX_N, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); memcpy(out, outbuf, SPX_N); } #endif ================================================ FILE: ref/thash_sha2_simple.c ================================================ #include #include #include "thash.h" #include "address.h" #include "params.h" #include "utils.h" #include "sha2.h" #if SPX_SHA512 static void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]); #endif /** * Takes an array of inblocks concatenated arrays of SPX_N bytes. */ void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { #if SPX_SHA512 if (inblocks > 1) { thash_512(out, in, inblocks, ctx, addr); return; } #endif unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES]; uint8_t sha2_state[40]; SPX_VLA(uint8_t, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); /* Retrieve precomputed state containing pub_seed */ memcpy(sha2_state, ctx->state_seeded, 40 * sizeof(uint8_t)); memcpy(buf, addr, SPX_SHA256_ADDR_BYTES); memcpy(buf + SPX_SHA256_ADDR_BYTES, in, inblocks * SPX_N); sha256_inc_finalize(outbuf, sha2_state, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); memcpy(out, outbuf, SPX_N); } #if SPX_SHA512 static void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { unsigned char outbuf[SPX_SHA512_OUTPUT_BYTES]; uint8_t sha2_state[72]; SPX_VLA(uint8_t, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); /* Retrieve precomputed state containing pub_seed */ memcpy(sha2_state, ctx->state_seeded_512, 72 * sizeof(uint8_t)); memcpy(buf, addr, SPX_SHA256_ADDR_BYTES); memcpy(buf + SPX_SHA256_ADDR_BYTES, in, inblocks * SPX_N); sha512_inc_finalize(outbuf, sha2_state, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N); memcpy(out, outbuf, SPX_N); } #endif ================================================ FILE: ref/thash_shake_robust.c ================================================ #include #include #include "thash.h" #include "address.h" #include "params.h" #include "utils.h" #include "fips202.h" /** * Takes an array of inblocks concatenated arrays of SPX_N bytes. */ void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { SPX_VLA(uint8_t, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(uint8_t, bitmask, inblocks * SPX_N); unsigned int i; memcpy(buf, ctx->pub_seed, SPX_N); memcpy(buf + SPX_N, addr, SPX_ADDR_BYTES); shake256(bitmask, inblocks * SPX_N, buf, SPX_N + SPX_ADDR_BYTES); for (i = 0; i < inblocks * SPX_N; i++) { buf[SPX_N + SPX_ADDR_BYTES + i] = in[i] ^ bitmask[i]; } shake256(out, SPX_N, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); } ================================================ FILE: ref/thash_shake_simple.c ================================================ #include #include #include "thash.h" #include "address.h" #include "params.h" #include "utils.h" #include "fips202.h" /** * Takes an array of inblocks concatenated arrays of SPX_N bytes. */ void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { SPX_VLA(uint8_t, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); memcpy(buf, ctx->pub_seed, SPX_N); memcpy(buf + SPX_N, addr, SPX_ADDR_BYTES); memcpy(buf + SPX_N + SPX_ADDR_BYTES, in, inblocks * SPX_N); shake256(out, SPX_N, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); } ================================================ FILE: ref/utils.c ================================================ #include #include "utils.h" #include "params.h" #include "hash.h" #include "thash.h" #include "address.h" /** * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. */ void ull_to_bytes(unsigned char *out, unsigned int outlen, unsigned long long in) { int i; /* Iterate over out in decreasing order, for big-endianness. */ for (i = (signed int)outlen - 1; i >= 0; i--) { out[i] = in & 0xff; in = in >> 8; } } void u32_to_bytes(unsigned char *out, uint32_t in) { out[0] = (unsigned char)(in >> 24); out[1] = (unsigned char)(in >> 16); out[2] = (unsigned char)(in >> 8); out[3] = (unsigned char)in; } /** * Converts the inlen bytes in 'in' from big-endian byte order to an integer. */ unsigned long long bytes_to_ull(const unsigned char *in, unsigned int inlen) { unsigned long long retval = 0; unsigned int i; for (i = 0; i < inlen; i++) { retval |= ((unsigned long long)in[i]) << (8*(inlen - 1 - i)); } return retval; } /** * Computes a root node given a leaf and an auth path. * Expects address to be complete other than the tree_height and tree_index. */ void compute_root(unsigned char *root, const unsigned char *leaf, uint32_t leaf_idx, uint32_t idx_offset, const unsigned char *auth_path, uint32_t tree_height, const spx_ctx *ctx, uint32_t addr[8]) { uint32_t i; unsigned char buffer[2 * SPX_N]; /* If leaf_idx is odd (last bit = 1), current path element is a right child and auth_path has to go left. Otherwise it is the other way around. */ if (leaf_idx & 1) { memcpy(buffer + SPX_N, leaf, SPX_N); memcpy(buffer, auth_path, SPX_N); } else { memcpy(buffer, leaf, SPX_N); memcpy(buffer + SPX_N, auth_path, SPX_N); } auth_path += SPX_N; for (i = 0; i < tree_height - 1; i++) { leaf_idx >>= 1; idx_offset >>= 1; /* Set the address of the node we're creating. */ set_tree_height(addr, i + 1); set_tree_index(addr, leaf_idx + idx_offset); /* Pick the right or left neighbor, depending on parity of the node. */ if (leaf_idx & 1) { thash(buffer + SPX_N, buffer, 2, ctx, addr); memcpy(buffer, auth_path, SPX_N); } else { thash(buffer, buffer, 2, ctx, addr); memcpy(buffer + SPX_N, auth_path, SPX_N); } auth_path += SPX_N; } /* The last iteration is exceptional; we do not copy an auth_path node. */ leaf_idx >>= 1; idx_offset >>= 1; set_tree_height(addr, tree_height); set_tree_index(addr, leaf_idx + idx_offset); thash(root, buffer, 2, ctx, addr); } /** * For a given leaf index, computes the authentication path and the resulting * root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE). * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. */ void treehash(unsigned char *root, unsigned char *auth_path, const spx_ctx* ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leaf)( unsigned char* /* leaf */, const spx_ctx* /* ctx */, uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */), uint32_t tree_addr[8]) { SPX_VLA(uint8_t, stack, (tree_height+1)*SPX_N); SPX_VLA(unsigned int, heights, tree_height+1); unsigned int offset = 0; uint32_t idx; uint32_t tree_idx; for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { /* Add the next leaf node to the stack. */ gen_leaf(stack + offset*SPX_N, ctx, idx + idx_offset, tree_addr); offset++; heights[offset - 1] = 0; /* If this is a node we need for the auth path.. */ if ((leaf_idx ^ 0x1) == idx) { memcpy(auth_path, stack + (offset - 1)*SPX_N, SPX_N); } /* While the top-most nodes are of equal height.. */ while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { /* Compute index of the new node, in the next layer. */ tree_idx = (idx >> (heights[offset - 1] + 1)); /* Set the address of the node we're creating. */ set_tree_height(tree_addr, heights[offset - 1] + 1); set_tree_index(tree_addr, tree_idx + (idx_offset >> (heights[offset-1] + 1))); /* Hash the top-most nodes from the stack together. */ thash(stack + (offset - 2)*SPX_N, stack + (offset - 2)*SPX_N, 2, ctx, tree_addr); offset--; /* Note that the top-most node is now one layer higher. */ heights[offset - 1]++; /* If this is a node we need for the auth path.. */ if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { memcpy(auth_path + heights[offset - 1]*SPX_N, stack + (offset - 1)*SPX_N, SPX_N); } } } memcpy(root, stack, SPX_N); } ================================================ FILE: ref/utils.h ================================================ #ifndef SPX_UTILS_H #define SPX_UTILS_H #include #include "params.h" #include "context.h" /* To support MSVC use alloca() instead of VLAs. See #20. */ #ifdef _MSC_VER /* MSVC defines _alloca in malloc.h */ # include /* Note: _malloca(), which is recommended over deprecated _alloca, requires that you call _freea(). So we stick with _alloca */ # define SPX_VLA(__t,__x,__s) __t *__x = (__t*)_alloca((__s)*sizeof(__t)) #else # define SPX_VLA(__t,__x,__s) __t __x[__s] #endif /** * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. */ #define ull_to_bytes SPX_NAMESPACE(ull_to_bytes) void ull_to_bytes(unsigned char *out, unsigned int outlen, unsigned long long in); #define u32_to_bytes SPX_NAMESPACE(u32_to_bytes) void u32_to_bytes(unsigned char *out, uint32_t in); /** * Converts the inlen bytes in 'in' from big-endian byte order to an integer. */ #define bytes_to_ull SPX_NAMESPACE(bytes_to_ull) unsigned long long bytes_to_ull(const unsigned char *in, unsigned int inlen); /** * Computes a root node given a leaf and an auth path. * Expects address to be complete other than the tree_height and tree_index. */ #define compute_root SPX_NAMESPACE(compute_root) void compute_root(unsigned char *root, const unsigned char *leaf, uint32_t leaf_idx, uint32_t idx_offset, const unsigned char *auth_path, uint32_t tree_height, const spx_ctx *ctx, uint32_t addr[8]); /** * For a given leaf index, computes the authentication path and the resulting * root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE). * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. */ #define treehash SPX_NAMESPACE(treehash) void treehash(unsigned char *root, unsigned char *auth_path, const spx_ctx* ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leaf)( unsigned char* /* leaf */, const spx_ctx* ctx /* ctx */, uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */), uint32_t tree_addr[8]); #endif ================================================ FILE: ref/utilsx1.c ================================================ #include #include "utils.h" #include "utilsx1.h" #include "params.h" #include "thash.h" #include "address.h" /* * Generate the entire Merkle tree, computing the authentication path for * leaf_idx, and the resulting root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE) * * This expects tree_addr to be initialized to the addr structures for the * Merkle tree nodes * * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This works by using the standard Merkle tree building algorithm, */ void treehashx1(unsigned char *root, unsigned char *auth_path, const spx_ctx* ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leaf)( unsigned char* /* Where to write the leaves */, const spx_ctx* /* ctx */, uint32_t idx, void *info), uint32_t tree_addr[8], void *info) { /* This is where we keep the intermediate nodes */ SPX_VLA(uint8_t, stack, tree_height*SPX_N); uint32_t idx; uint32_t max_idx = (uint32_t)((1 << tree_height) - 1); for (idx = 0;; idx++) { unsigned char current[2*SPX_N]; /* Current logical node is at */ /* index[SPX_N]. We do this to minimize the number of copies */ /* needed during a thash */ gen_leaf( ¤t[SPX_N], ctx, idx + idx_offset, info ); /* Now combine the freshly generated right node with previously */ /* generated left ones */ uint32_t internal_idx_offset = idx_offset; uint32_t internal_idx = idx; uint32_t internal_leaf = leaf_idx; uint32_t h; /* The height we are in the Merkle tree */ for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) { /* Check if we hit the top of the tree */ if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[SPX_N], SPX_N ); return; } /* * Check if the node we have is a part of the * authentication path; if it is, write it out */ if ((internal_idx ^ internal_leaf) == 0x01) { memcpy( &auth_path[ h * SPX_N ], ¤t[SPX_N], SPX_N ); } /* * Check if we're at a left child; if so, stop going up the stack * Exception: if we've reached the end of the tree, keep on going * (so we combine the last 4 nodes into the one root node in two * more iterations) */ if ((internal_idx & 1) == 0 && idx < max_idx) { break; } /* Ok, we're at a right node */ /* Now combine the left and right logical nodes together */ /* Set the address of the node we're creating. */ internal_idx_offset >>= 1; set_tree_height(tree_addr, h + 1); set_tree_index(tree_addr, internal_idx/2 + internal_idx_offset ); unsigned char *left = &stack[h * SPX_N]; memcpy( ¤t[0], left, SPX_N ); thash( ¤t[1 * SPX_N], ¤t[0 * SPX_N], 2, ctx, tree_addr); } /* We've hit a left child; save the current for when we get the */ /* corresponding right right */ memcpy( &stack[h * SPX_N], ¤t[SPX_N], SPX_N); } } ================================================ FILE: ref/utilsx1.h ================================================ #ifndef SPX_UTILSX4_H #define SPX_UTILSX4_H #include #include "params.h" #include "context.h" /** * For a given leaf index, computes the authentication path and the resulting * root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE). * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. */ #define treehashx1 SPX_NAMESPACE(treehashx1) void treehashx1(unsigned char *root, unsigned char *auth_path, const spx_ctx* ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leaf)( unsigned char* /* Where to write the leaf */, const spx_ctx* /* ctx */, uint32_t addr_idx, void *info), uint32_t tree_addrx4[8], void *info); #endif ================================================ FILE: ref/wots.c ================================================ #include #include #include "utils.h" #include "utilsx1.h" #include "hash.h" #include "thash.h" #include "wots.h" #include "wotsx1.h" #include "address.h" #include "params.h" // TODO clarify address expectations, and make them more uniform. // TODO i.e. do we expect types to be set already? // TODO and do we expect modifications or copies? /** * Computes the chaining function. * out and in have to be n-byte arrays. * * Interprets in as start-th value of the chain. * addr has to contain the address of the chain. */ static void gen_chain(unsigned char *out, const unsigned char *in, unsigned int start, unsigned int steps, const spx_ctx *ctx, uint32_t addr[8]) { uint32_t i; /* Initialize out with the value at position 'start'. */ memcpy(out, in, SPX_N); /* Iterate 'steps' calls to the hash function. */ for (i = start; i < (start+steps) && i < SPX_WOTS_W; i++) { set_hash_addr(addr, i); thash(out, out, 1, ctx, addr); } } /** * base_w algorithm as described in draft. * Interprets an array of bytes as integers in base w. * This only works when log_w is a divisor of 8. */ static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { int in = 0; int out = 0; unsigned char total; int bits = 0; int consumed; for (consumed = 0; consumed < out_len; consumed++) { if (bits == 0) { total = input[in]; in++; bits += 8; } bits -= SPX_WOTS_LOGW; output[out] = (total >> bits) & (SPX_WOTS_W - 1); out++; } } /* Computes the WOTS+ checksum over a message (in base_w). */ static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { unsigned int csum = 0; unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8]; unsigned int i; /* Compute checksum. */ for (i = 0; i < SPX_WOTS_LEN1; i++) { csum += SPX_WOTS_W - 1 - msg_base_w[i]; } /* Convert checksum to base_w. */ /* Make sure expected empty zero bits are the least significant bits. */ csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8); ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes); } /* Takes a message and derives the matching chain lengths. */ void chain_lengths(unsigned int *lengths, const unsigned char *msg) { base_w(lengths, SPX_WOTS_LEN1, msg); wots_checksum(lengths + SPX_WOTS_LEN1, lengths); } /** * Takes a WOTS signature and an n-byte message, computes a WOTS public key. * * Writes the computed public key to 'pk'. */ void wots_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *msg, const spx_ctx *ctx, uint32_t addr[8]) { unsigned int lengths[SPX_WOTS_LEN]; uint32_t i; chain_lengths(lengths, msg); for (i = 0; i < SPX_WOTS_LEN; i++) { set_chain_addr(addr, i); gen_chain(pk + i*SPX_N, sig + i*SPX_N, lengths[i], SPX_WOTS_W - 1 - lengths[i], ctx, addr); } } ================================================ FILE: ref/wots.h ================================================ #ifndef SPX_WOTS_H #define SPX_WOTS_H #include #include "params.h" #include "context.h" /** * Takes a WOTS signature and an n-byte message, computes a WOTS public key. * * Writes the computed public key to 'pk'. */ #define wots_pk_from_sig SPX_NAMESPACE(wots_pk_from_sig) void wots_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *msg, const spx_ctx *ctx, uint32_t addr[8]); /* * Compute the chain lengths needed for a given message hash */ #define chain_lengths SPX_NAMESPACE(chain_lengths) void chain_lengths(unsigned int *lengths, const unsigned char *msg); #endif ================================================ FILE: ref/wotsx1.c ================================================ #include #include #include "utils.h" #include "hash.h" #include "thash.h" #include "wots.h" #include "wotsx1.h" #include "address.h" #include "params.h" /* * This generates a WOTS public key * It also generates the WOTS signature if leaf_info indicates * that we're signing with this WOTS key */ void wots_gen_leafx1(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info) { struct leaf_info_x1 *info = v_info; uint32_t *leaf_addr = info->leaf_addr; uint32_t *pk_addr = info->pk_addr; unsigned int i, k; unsigned char pk_buffer[ SPX_WOTS_BYTES ]; unsigned char *buffer; uint32_t wots_k_mask; if (leaf_idx == info->wots_sign_leaf) { /* We're traversing the leaf that's signing; generate the WOTS */ /* signature */ wots_k_mask = 0; } else { /* Nope, we're just generating pk's; turn off the signature logic */ wots_k_mask = (uint32_t)~0; } set_keypair_addr( leaf_addr, leaf_idx ); set_keypair_addr( pk_addr, leaf_idx ); for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) { uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k to */ /* the step if we're generating a signature, ~0 if we're not */ /* Start with the secret seed */ set_chain_addr(leaf_addr, i); set_hash_addr(leaf_addr, 0); set_type(leaf_addr, SPX_ADDR_TYPE_WOTSPRF); prf_addr(buffer, ctx, leaf_addr); set_type(leaf_addr, SPX_ADDR_TYPE_WOTS); /* Iterate down the WOTS chain */ for (k=0;; k++) { /* Check if this is the value that needs to be saved as a */ /* part of the WOTS signature */ if (k == wots_k) { memcpy( info->wots_sig + i * SPX_N, buffer, SPX_N ); } /* Check if we hit the top of the chain */ if (k == SPX_WOTS_W - 1) break; /* Iterate one step on the chain */ set_hash_addr(leaf_addr, k); thash(buffer, buffer, 1, ctx, leaf_addr); } } /* Do the final thash to generate the public keys */ thash(dest, pk_buffer, SPX_WOTS_LEN, ctx, pk_addr); } ================================================ FILE: ref/wotsx1.h ================================================ #if !defined( WOTSX1_H_ ) #define WOTSX1_H_ #include /* * This is here to provide an interface to the internal wots_gen_leafx1 * routine. While this routine is not referenced in the package outside of * wots.c, it is called from the stand-alone benchmark code to characterize * the performance */ struct leaf_info_x1 { unsigned char *wots_sig; uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */ uint32_t *wots_steps; uint32_t leaf_addr[8]; uint32_t pk_addr[8]; }; /* Macro to set the leaf_info to something 'benign', that is, it would */ /* run with the same time as it does during the real signing process */ /* Used only by the benchmark code */ #define INITIALIZE_LEAF_INFO_X1(info, addr, step_buffer) { \ info.wots_sig = 0; \ info.wots_sign_leaf = ~0u; \ info.wots_steps = step_buffer; \ memcpy( &info.leaf_addr[0], addr, 32 ); \ memcpy( &info.pk_addr[0], addr, 32 ); \ } #define wots_gen_leafx1 SPX_NAMESPACE(wots_gen_leafx1) void wots_gen_leafx1(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info); #endif /* WOTSX1_H_ */ ================================================ FILE: sha2-avx2/.gitignore ================================================ test/* !test/*.c PQCsignKAT_*.rsp PQCsignKAT_*.req PQCgenKAT_sign keccak4x/KeccakP-1600-times4-SIMD256.o ================================================ FILE: sha2-avx2/Makefile ================================================ PARAMS = sphincs-sha2-128f THASH = robust CC = /usr/bin/gcc CFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -march=native -flto -fomit-frame-pointer -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS) SOURCES = hash_sha2.c hash_sha2x8.c thash_sha2_$(THASH).c thash_sha2_$(THASH)x8.c sha2.c sha256x8.c sha512x4.c sha256avx.c address.c randombytes.c merkle.c wots.c utils.c utilsx8.c fors.c sign.c HEADERS = params.h hash.h hashx8.h thash.h thashx8.h sha2.h sha256x8.h sha512x4.h sha256avx.h address.h randombytes.h merkle.h wots.h utils.h utilsx8.h fors.h api.h DET_SOURCES = $(SOURCES:randombytes.%=rng.%) DET_HEADERS = $(HEADERS:randombytes.%=rng.%) TESTS = test/fors \ test/spx \ test/thashx8 \ BENCHMARK = test/benchmark .PHONY: clean test benchmark default: PQCgenKAT_sign all: PQCgenKAT_sign tests benchmarks tests: $(TESTS) test: $(TESTS:=.exec) benchmarks: $(BENCHMARK) benchmark: $(BENCHMARK:=.exec) PQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS) $(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto test/%: test/%.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS) test/%.exec: test/% @$< clean: -$(RM) $(TESTS) -$(RM) $(BENCHMARK) -$(RM) PQCgenKAT_sign -$(RM) PQCsignKAT_*.rsp -$(RM) PQCsignKAT_*.req ================================================ FILE: sha2-avx2/context.h ================================================ #ifndef SPX_CONTEXT_H #define SPX_CONTEXT_H #include #include "params.h" typedef struct { uint8_t pub_seed[SPX_N]; uint8_t sk_seed[SPX_N]; uint8_t state_seeded[40]; #if SPX_SHA512 uint8_t state_seeded_512[72]; #endif } spx_ctx; #endif ================================================ FILE: sha2-avx2/fors.c ================================================ #include #include #include #include "fors.h" #include "utils.h" #include "utilsx8.h" #include "hash.h" #include "hashx8.h" #include "thash.h" #include "thashx8.h" #include "address.h" static void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { prf_addr(sk, ctx, fors_leaf_addr); } static void fors_gen_skx8(unsigned char *sk0, unsigned char *sk1, unsigned char *sk2, unsigned char *sk3, unsigned char *sk4, unsigned char *sk5, unsigned char *sk6, unsigned char *sk7, const spx_ctx *ctx, uint32_t fors_leaf_addrx8[8*8]) { prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, ctx, fors_leaf_addrx8); } static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { thash(leaf, sk, 1, ctx, fors_leaf_addr); } static void fors_sk_to_leafx8(unsigned char *leaf0, unsigned char *leaf1, unsigned char *leaf2, unsigned char *leaf3, unsigned char *leaf4, unsigned char *leaf5, unsigned char *leaf6, unsigned char *leaf7, const unsigned char *sk0, const unsigned char *sk1, const unsigned char *sk2, const unsigned char *sk3, const unsigned char *sk4, const unsigned char *sk5, const unsigned char *sk6, const unsigned char *sk7, const spx_ctx *ctx, uint32_t fors_leaf_addrx8[8*8]) { thashx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, 1, ctx, fors_leaf_addrx8); } struct fors_gen_leaf_info { uint32_t leaf_addrx[8*8]; }; static void fors_gen_leafx8(unsigned char *leaf, const spx_ctx *ctx, uint32_t addr_idx, void *info) { struct fors_gen_leaf_info *fors_info = info; uint32_t *fors_leaf_addrx8 = fors_info->leaf_addrx; unsigned int j; /* Only set the parts that the caller doesn't set */ for (j = 0; j < 8; j++) { set_tree_index(fors_leaf_addrx8 + j*8, addr_idx + j); set_type(fors_leaf_addrx8 + j*8, SPX_ADDR_TYPE_FORSPRF); } fors_gen_skx8(leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 2*SPX_N, leaf + 3*SPX_N, leaf + 4*SPX_N, leaf + 5*SPX_N, leaf + 6*SPX_N, leaf + 7*SPX_N, ctx, fors_leaf_addrx8); for (j = 0; j < 8; j++) { set_type(fors_leaf_addrx8 + j*8, SPX_ADDR_TYPE_FORSTREE); } fors_sk_to_leafx8(leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 2*SPX_N, leaf + 3*SPX_N, leaf + 4*SPX_N, leaf + 5*SPX_N, leaf + 6*SPX_N, leaf + 7*SPX_N, leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 2*SPX_N, leaf + 3*SPX_N, leaf + 4*SPX_N, leaf + 5*SPX_N, leaf + 6*SPX_N, leaf + 7*SPX_N, ctx, fors_leaf_addrx8); } /** * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. * Assumes indices has space for SPX_FORS_TREES integers. */ static void message_to_indices(uint32_t *indices, const unsigned char *m) { unsigned int i, j; unsigned int offset = 0; for (i = 0; i < SPX_FORS_TREES; i++) { indices[i] = 0; for (j = 0; j < SPX_FORS_HEIGHT; j++) { indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; offset++; } } } /** * Signs a message m, deriving the secret key from sk_seed and the FTS address. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_sign(unsigned char *sig, unsigned char *pk, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; uint32_t fors_tree_addr[8*8] = {0}; struct fors_gen_leaf_info fors_info = {0}; uint32_t *fors_leaf_addr = fors_info.leaf_addrx; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; for (i=0; i<8; i++) { copy_keypair_addr(fors_tree_addr + 8*i, fors_addr); set_type(fors_tree_addr + 8*i, SPX_ADDR_TYPE_FORSTREE); copy_keypair_addr(fors_leaf_addr + 8*i, fors_addr); } copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Include the secret key part that produces the selected leaf node. */ set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF); fors_gen_sk(sig, ctx, fors_tree_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); sig += SPX_N; /* Compute the authentication path for this leaf node. */ treehashx8(roots + i*SPX_N, sig, ctx, indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx8, fors_tree_addr, &fors_info); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } /** * Derives the FORS public key from a signature. * This can be used for verification by comparing to a known public key, or to * subsequently verify a signature on the derived public key. The latter is the * typical use-case when used as an FTS below an OTS in a hypertree. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; unsigned char leaf[SPX_N]; uint32_t fors_tree_addr[8] = {0}; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; copy_keypair_addr(fors_tree_addr, fors_addr); copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Derive the leaf from the included secret key part. */ fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr); sig += SPX_N; /* Derive the corresponding root node of this tree. */ compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset, sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } ================================================ FILE: sha2-avx2/hash_sha2x8.c ================================================ #include #include #include "address.h" #include "utils.h" #include "params.h" #include "hashx8.h" #include "sha2.h" #include "sha256x8.h" #include "sha256avx.h" /* * 8-way parallel version of prf_addr; takes 8x as much input and output */ void prf_addrx8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const spx_ctx *ctx, const uint32_t addrx8[8*8]) { unsigned char bufx8[8 * (SPX_N + SPX_SHA256_ADDR_BYTES)]; unsigned char outbufx8[8 * SPX_SHA256_OUTPUT_BYTES]; unsigned int j; for (j = 0; j < 8; j++) { memcpy(bufx8 + j*(SPX_N + SPX_SHA256_ADDR_BYTES), addrx8 + j*8, SPX_SHA256_ADDR_BYTES); memcpy( bufx8 + j*(SPX_N + SPX_SHA256_ADDR_BYTES) + SPX_SHA256_ADDR_BYTES, ctx->sk_seed, SPX_N ); } sha256x8_seeded( /* out */ outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, /* seed */ ctx->state_seeded, 512, /* in */ bufx8 + 0*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 1*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 2*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 3*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 4*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 5*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 6*(SPX_SHA256_ADDR_BYTES + SPX_N), bufx8 + 7*(SPX_SHA256_ADDR_BYTES + SPX_N), SPX_SHA256_ADDR_BYTES + SPX_N /* len */ ); memcpy(out0, outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out1, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out2, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out3, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out4, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out5, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out6, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out7, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, SPX_N); } ================================================ FILE: sha2-avx2/hashx8.h ================================================ #ifndef SPX_HASHX8_H #define SPX_HASHX8_H #include #include "params.h" #define prf_addrx8 SPX_NAMESPACE(prf_addrx8) void prf_addrx8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const spx_ctx *ctx, const uint32_t addrx8[8*8]); #endif ================================================ FILE: sha2-avx2/merkle.c ================================================ #include #include #include "utils.h" #include "utilsx8.h" #include "wots.h" #include "wotsx8.h" #include "wotsx8.h" #include "merkle.h" #include "address.h" #include "params.h" /* * This generates a Merkle signature (WOTS signature followed by the Merkle * authentication path). */ void merkle_sign(uint8_t *sig, unsigned char *root, const spx_ctx *ctx, uint32_t wots_addr[8], uint32_t tree_addr[8], uint32_t idx_leaf) { unsigned char *auth_path = sig + SPX_WOTS_BYTES; uint32_t tree_addrx8[8*8] = { 0 }; int j; struct leaf_info_x8 info = { 0 }; unsigned steps[ SPX_WOTS_LEN ]; info.wots_sig = sig; chain_lengths(steps, root); info.wots_steps = steps; for (j=0; j<8; j++) { set_type(&tree_addrx8[8*j], SPX_ADDR_TYPE_HASHTREE); set_type(&info.leaf_addr[8*j], SPX_ADDR_TYPE_WOTS); set_type(&info.pk_addr[8*j], SPX_ADDR_TYPE_WOTSPK); copy_subtree_addr(&tree_addrx8[8*j], tree_addr); copy_subtree_addr(&info.leaf_addr[8*j], wots_addr); copy_subtree_addr(&info.pk_addr[8*j], wots_addr); } info.wots_sign_leaf = idx_leaf; treehashx8(root, auth_path, ctx, idx_leaf, 0, SPX_TREE_HEIGHT, wots_gen_leafx8, tree_addrx8, &info); } /* Compute root node of the top-most subtree. */ /* Again, in this file because wots_gen_leaf is most of the work */ void merkle_gen_root(unsigned char *root, const spx_ctx *ctx) { /* We do not need the auth path in key generation, but it simplifies the code to have just one treehash routine that computes both root and path in one function. */ unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES]; uint32_t top_tree_addr[8] = {0}; uint32_t wots_addr[8] = {0}; set_layer_addr(top_tree_addr, SPX_D - 1); set_layer_addr(wots_addr, SPX_D - 1); merkle_sign(auth_path, root, ctx, wots_addr, top_tree_addr, ~0 /* ~0 means "don't bother generating an auth path */ ); } ================================================ FILE: sha2-avx2/sha256avx.c ================================================ #include #include #include #include "sha256avx.h" // Transpose 8 vectors containing 32-bit values void transpose(u256 s[8]) { u256 tmp0[8]; u256 tmp1[8]; tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); } void sha256_init8x(sha256ctx *ctx) { ctx->s[0] = _mm256_set_epi32(0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667); ctx->s[1] = _mm256_set_epi32(0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85); ctx->s[2] = _mm256_set_epi32(0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372); ctx->s[3] = _mm256_set_epi32(0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a); ctx->s[4] = _mm256_set_epi32(0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f); ctx->s[5] = _mm256_set_epi32(0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c); ctx->s[6] = _mm256_set_epi32(0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab); ctx->s[7] = _mm256_set_epi32(0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19); ctx->datalen = 0; ctx->msglen = 0; } void sha256_final8x(sha256ctx *ctx, unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7) { unsigned int i, curlen; // Padding if (ctx->datalen < 56) { for (i = 0; i < 8; ++i) { curlen = ctx->datalen; ctx->msgblocks[64*i + curlen++] = 0x80; while(curlen < 64) { ctx->msgblocks[64*i + curlen++] = 0x00; } } } else { for (i = 0; i < 8; ++i) { curlen = ctx->datalen; ctx->msgblocks[64*i + curlen++] = 0x80; while(curlen < 64) { ctx->msgblocks[64*i + curlen++] = 0x00; } } sha256_transform8x(ctx, &ctx->msgblocks[64*0], &ctx->msgblocks[64*1], &ctx->msgblocks[64*2], &ctx->msgblocks[64*3], &ctx->msgblocks[64*4], &ctx->msgblocks[64*5], &ctx->msgblocks[64*6], &ctx->msgblocks[64*7] ); memset(ctx->msgblocks, 0, 8 * 64); } // Add length of the message to each block ctx->msglen += ctx->datalen * 8; for (i = 0; i < 8; i++) { ctx->msgblocks[64*i + 63] = ctx->msglen; ctx->msgblocks[64*i + 62] = ctx->msglen >> 8; ctx->msgblocks[64*i + 61] = ctx->msglen >> 16; ctx->msgblocks[64*i + 60] = ctx->msglen >> 24; ctx->msgblocks[64*i + 59] = ctx->msglen >> 32; ctx->msgblocks[64*i + 58] = ctx->msglen >> 40; ctx->msgblocks[64*i + 57] = ctx->msglen >> 48; ctx->msgblocks[64*i + 56] = ctx->msglen >> 56; } sha256_transform8x(ctx, &ctx->msgblocks[64*0], &ctx->msgblocks[64*1], &ctx->msgblocks[64*2], &ctx->msgblocks[64*3], &ctx->msgblocks[64*4], &ctx->msgblocks[64*5], &ctx->msgblocks[64*6], &ctx->msgblocks[64*7] ); // Compute final hash output transpose(ctx->s); // Store Hash value STORE(out0, BYTESWAP(ctx->s[0])); STORE(out1, BYTESWAP(ctx->s[1])); STORE(out2, BYTESWAP(ctx->s[2])); STORE(out3, BYTESWAP(ctx->s[3])); STORE(out4, BYTESWAP(ctx->s[4])); STORE(out5, BYTESWAP(ctx->s[5])); STORE(out6, BYTESWAP(ctx->s[6])); STORE(out7, BYTESWAP(ctx->s[7])); } void sha256_transform8x(sha256ctx *ctx, const unsigned char* data0, const unsigned char* data1, const unsigned char* data2, const unsigned char* data3, const unsigned char* data4, const unsigned char* data5, const unsigned char* data6, const unsigned char* data7) { u256 s[8], w[64], T0, T1; // Load words and transform data correctly w[0] = BYTESWAP(LOAD(data0)); w[0 + 8] = BYTESWAP(LOAD(data0 + 32)); w[1] = BYTESWAP(LOAD(data1)); w[1 + 8] = BYTESWAP(LOAD(data1 + 32)); w[2] = BYTESWAP(LOAD(data2)); w[2 + 8] = BYTESWAP(LOAD(data2 + 32)); w[3] = BYTESWAP(LOAD(data3)); w[3 + 8] = BYTESWAP(LOAD(data3 + 32)); w[4] = BYTESWAP(LOAD(data4)); w[4 + 8] = BYTESWAP(LOAD(data4 + 32)); w[5] = BYTESWAP(LOAD(data5)); w[5 + 8] = BYTESWAP(LOAD(data5 + 32)); w[6] = BYTESWAP(LOAD(data6)); w[6 + 8] = BYTESWAP(LOAD(data6 + 32)); w[7] = BYTESWAP(LOAD(data7)); w[7 + 8] = BYTESWAP(LOAD(data7 + 32)); transpose(w); transpose(w + 8); // Initial State s[0] = ctx->s[0]; s[1] = ctx->s[1]; s[2] = ctx->s[2]; s[3] = ctx->s[3]; s[4] = ctx->s[4]; s[5] = ctx->s[5]; s[6] = ctx->s[6]; s[7] = ctx->s[7]; SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); // Feed Forward ctx->s[0] = ADD32(s[0], ctx->s[0]); ctx->s[1] = ADD32(s[1], ctx->s[1]); ctx->s[2] = ADD32(s[2], ctx->s[2]); ctx->s[3] = ADD32(s[3], ctx->s[3]); ctx->s[4] = ADD32(s[4], ctx->s[4]); ctx->s[5] = ADD32(s[5], ctx->s[5]); ctx->s[6] = ADD32(s[6], ctx->s[6]); ctx->s[7] = ADD32(s[7], ctx->s[7]); } ================================================ FILE: sha2-avx2/sha256avx.h ================================================ #ifndef SHA256AVX_H #define SHA256AVX_H #include "immintrin.h" #include static const unsigned int RC[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; #define u32 uint32_t #define u256 __m256i #define XOR _mm256_xor_si256 #define OR _mm256_or_si256 #define AND _mm256_and_si256 #define ADD32 _mm256_add_epi32 #define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) #define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) #define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) #define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) #define SHIFTR32(x, y) _mm256_srli_epi32(x, y) #define SHIFTL32(x, y) _mm256_slli_epi32(x, y) #define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - y)) #define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - y)) #define XOR3(a, b, c) XOR(XOR(a, b), c) #define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) #define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) #define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) #define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) #define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) #define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) #define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) #define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) #define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) #define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32(RC[rc]), w); \ d = ADD32(d, T0); \ T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ h = ADD32(T0, T1); typedef struct SHA256state { u256 s[8]; unsigned char msgblocks[8*64]; int datalen; unsigned long long msglen; } sha256ctx; void transpose(u256 s[8]); void sha256_init8x(sha256ctx *ctx); void sha256_final8x(sha256ctx *ctx, unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7); void sha256_transform8x(sha256ctx *ctx, const unsigned char *data0, const unsigned char *data1, const unsigned char *data2, const unsigned char *data3, const unsigned char *data4, const unsigned char *data5, const unsigned char *data6, const unsigned char *data7); #endif ================================================ FILE: sha2-avx2/sha256x8.c ================================================ #include #include "sha256x8.h" #include "sha256avx.h" #include "utils.h" static uint32_t load_bigendian_32(const uint8_t *x) { return (uint32_t)(x[3]) | (((uint32_t)(x[2])) << 8) | (((uint32_t)(x[1])) << 16) | (((uint32_t)(x[0])) << 24); } // Performs sha256x8 on an initialized (and perhaps seeded) state. static void _sha256x8( sha256ctx *ctx, unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long long inlen) { unsigned long long i = 0; while(inlen - i >= 64) { sha256_transform8x(ctx, in0 + i, in1 + i, in2 + i, in3 + i, in4 + i, in5 + i, in6 + i, in7 + i ); i += 64; ctx->msglen += 512; } int bytes_to_copy = inlen - i; memcpy(&ctx->msgblocks[64*0], in0 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*1], in1 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*2], in2 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*3], in3 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*4], in4 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*5], in5 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*6], in6 + i, bytes_to_copy); memcpy(&ctx->msgblocks[64*7], in7 + i, bytes_to_copy); ctx->datalen = bytes_to_copy; sha256_final8x(ctx, out0, out1, out2, out3, out4, out5, out6, out7); } void sha256x8_seeded( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *seed, unsigned long long seedlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long long inlen) { uint32_t t; sha256ctx ctx; for (size_t i = 0; i < 8; i++) { t = load_bigendian_32(seed + 4*i); ctx.s[i] = _mm256_set_epi32(t, t, t, t, t, t, t, t); } ctx.datalen = 0; ctx.msglen = seedlen; _sha256x8(&ctx, out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, inlen); } /* This provides a wrapper around the internals of 8x parallel SHA256 */ void sha256x8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long long inlen) { sha256ctx ctx; sha256_init8x(&ctx); _sha256x8(&ctx, out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, inlen); } /** * Note that inlen should be sufficiently small that it still allows for * an array to be allocated on the stack. Typically 'in' is merely a seed. * Outputs outlen number of bytes */ void mgf1x8(unsigned char *outx8, unsigned long outlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long inlen) { SPX_VLA(unsigned char, inbufx8, 8 * (inlen + 4)); unsigned char outbufx8[8*SPX_SHA256_OUTPUT_BYTES]; unsigned long i; unsigned int j; memcpy(inbufx8 + 0*(inlen + 4), in0, inlen); memcpy(inbufx8 + 1*(inlen + 4), in1, inlen); memcpy(inbufx8 + 2*(inlen + 4), in2, inlen); memcpy(inbufx8 + 3*(inlen + 4), in3, inlen); memcpy(inbufx8 + 4*(inlen + 4), in4, inlen); memcpy(inbufx8 + 5*(inlen + 4), in5, inlen); memcpy(inbufx8 + 6*(inlen + 4), in6, inlen); memcpy(inbufx8 + 7*(inlen + 4), in7, inlen); /* While we can fit in at least another full block of SHA256 output.. */ for (i = 0; (i+1)*SPX_SHA256_OUTPUT_BYTES <= outlen; i++) { for (j = 0; j < 8; j++) { u32_to_bytes(inbufx8 + inlen + j*(inlen + 4), i); } sha256x8(outx8 + 0*outlen, outx8 + 1*outlen, outx8 + 2*outlen, outx8 + 3*outlen, outx8 + 4*outlen, outx8 + 5*outlen, outx8 + 6*outlen, outx8 + 7*outlen, inbufx8 + 0*(inlen + 4), inbufx8 + 1*(inlen + 4), inbufx8 + 2*(inlen + 4), inbufx8 + 3*(inlen + 4), inbufx8 + 4*(inlen + 4), inbufx8 + 5*(inlen + 4), inbufx8 + 6*(inlen + 4), inbufx8 + 7*(inlen + 4), inlen + 4); outx8 += SPX_SHA256_OUTPUT_BYTES; } /* Until we cannot anymore, and we fill the remainder. */ for (j = 0; j < 8; j++) { u32_to_bytes(inbufx8 + inlen + j*(inlen + 4), i); } sha256x8(outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, inbufx8 + 0*(inlen + 4), inbufx8 + 1*(inlen + 4), inbufx8 + 2*(inlen + 4), inbufx8 + 3*(inlen + 4), inbufx8 + 4*(inlen + 4), inbufx8 + 5*(inlen + 4), inbufx8 + 6*(inlen + 4), inbufx8 + 7*(inlen + 4), inlen + 4); for (j = 0; j < 8; j++) { memcpy(outx8 + j*outlen, outbufx8 + j*SPX_SHA256_OUTPUT_BYTES, outlen - i*SPX_SHA256_OUTPUT_BYTES); } } ================================================ FILE: sha2-avx2/sha256x8.h ================================================ #ifndef SPX_SHA256X8_H #define SPX_SHA256X8_H #include "params.h" #define SPX_SHA256_BLOCK_BYTES 64 #define SPX_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal SPX_N */ #if SPX_SHA256_OUTPUT_BYTES < SPX_N #error Linking against SHA-256 with N larger than 32 bytes is not supported #endif #define sha256x8_seeded SPX_NAMESPACE(sha256x8_seeded) void sha256x8_seeded( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *seed, unsigned long long seedlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long long inlen); /* This provides a wrapper around the internals of 8x parallel SHA256 */ #define sha256x8 SPX_NAMESPACE(sha256x8) void sha256x8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long long inlen); /** * Note that inlen should be sufficiently small that it still allows for * an array to be allocated on the stack. Typically 'in' is merely a seed. * Outputs outlen number of bytes */ #define mgf1x8 SPX_NAMESPACE(mgf1x8) void mgf1x8(unsigned char *outx8, unsigned long outlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned long inlen); #endif ================================================ FILE: sha2-avx2/sha512x4.c ================================================ #include #include #include #define SPX_SHA512_OUTPUT_BYTES 64 /* In sha256.h, but we don't want to */ /* pull in the entire thing */ #include "sha512x4.h" #include "utils.h" typedef uint64_t u64; typedef __m256i u256; static void sha512_transform4x( sha512ctx4x *ctx, const unsigned char *d0, const unsigned char *d1, const unsigned char *d2, const unsigned char *d3 ); #define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0x8,0x9,0xa,0xb,0xc,0xd,0xe,0xf,0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xa,0xb,0xc,0xd,0xe,0xf,0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7)) #define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) // Transpose 4 vectors containing 64-bit values // That is, it rearranges the array: // A B C D // E F G H // I J K L // M N O P // into // A E I M // B F J N // C G K O // D H L P // where each letter stands for 64 bits (and lsbits on the left) static void transpose(u256 s[4]) { u256 tmp[4]; tmp[0] = _mm256_unpacklo_epi64(s[0], s[1]); tmp[1] = _mm256_unpackhi_epi64(s[0], s[1]); tmp[2] = _mm256_unpacklo_epi64(s[2], s[3]); tmp[3] = _mm256_unpackhi_epi64(s[2], s[3]); // tmp is in the order of // A E C G // B F D H // I M K O // J N L P s[0] = _mm256_permute2x128_si256(tmp[0], tmp[2], 0x20); s[1] = _mm256_permute2x128_si256(tmp[1], tmp[3], 0x20); s[2] = _mm256_permute2x128_si256(tmp[0], tmp[2], 0x31); s[3] = _mm256_permute2x128_si256(tmp[1], tmp[3], 0x31); } static void sha512_init4x(sha512ctx4x *ctx) { #define SET4(x) _mm256_set_epi64x(x, x, x, x) ctx->s[0] = SET4(0x6a09e667f3bcc908ULL); ctx->s[1] = SET4(0xbb67ae8584caa73bULL); ctx->s[2] = SET4(0x3c6ef372fe94f82bULL); ctx->s[3] = SET4(0xa54ff53a5f1d36f1ULL); ctx->s[4] = SET4(0x510e527fade682d1ULL); ctx->s[5] = SET4(0x9b05688c2b3e6c1fULL); ctx->s[6] = SET4(0x1f83d9abfb41bd6bULL); ctx->s[7] = SET4(0x5be0cd19137e2179ULL); #undef SET4 ctx->datalen = 0; ctx->msglen = 0; } #define XOR _mm256_xor_si256 #define OR _mm256_or_si256 #define AND _mm256_and_si256 #define ADD64 _mm256_add_epi64 #define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) #define SHIFTR64(x, y) _mm256_srli_epi64(x, y) #define SHIFTL64(x, y) _mm256_slli_epi64(x, y) #define ROTR64(x, y) OR(SHIFTR64(x, y), SHIFTL64(x, 64 - y)) static u256 XOR3(u256 a, u256 b, u256 c) { return XOR(XOR(a, b), c); } #define ADD3_64(a, b, c) ADD64(ADD64(a, b), c) #define ADD4_64(a, b, c, d) ADD64(ADD64(ADD64(a, b), c), d) #define ADD5_64(a, b, c, d, e) ADD64(ADD64(ADD64(ADD64(a, b), c), d), e) static u256 MAJ_AVX(u256 a, u256 b, u256 c) { return XOR(c, AND(XOR(a, c), XOR(b, c))); } static u256 CH_AVX(u256 a, u256 b, u256 c) { return XOR(c, AND(a, XOR(b, c))); } static u256 SIGMA0_AVX(u256 x) { return XOR3(ROTR64(x, 28), ROTR64(x, 34), ROTR64(x, 39)); } static u256 SIGMA1_AVX(u256 x) { return XOR3(ROTR64(x, 14), ROTR64(x, 18), ROTR64(x, 41)); } static u256 GAMMA0_AVX(u256 x) { return XOR3(ROTR64(x, 1), ROTR64(x, 8), SHIFTR64(x, 7)); } static u256 GAMMA1_AVX(u256 x) { return XOR3(ROTR64(x, 19), ROTR64(x, 61), SHIFTR64(x, 6)); } #define SHA512ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ T0 = ADD5_64(h, w, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi64x(RC[rc])); \ T1 = ADD64(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ d = ADD64(d, T0); \ h = ADD64(T0, T1); static const unsigned long long RC[80] = { 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL, }; static void sha512_transform4x( sha512ctx4x *ctx, const unsigned char *d0, const unsigned char *d1, const unsigned char *d2, const unsigned char *d3) { u256 s0, s1, s2, s3, s4, s5, s6, s7, w[16], T0, T1, nw; // Load words and transform data correctly w[0 ] = BYTESWAP(LOAD(d0 )); w[0 + 4] = BYTESWAP(LOAD(d0 + 32)); w[0 + 8] = BYTESWAP(LOAD(d0 + 64)); w[0 + 12] = BYTESWAP(LOAD(d0 + 96)); w[1 ] = BYTESWAP(LOAD(d1 )); w[1 + 4] = BYTESWAP(LOAD(d1 + 32)); w[1 + 8] = BYTESWAP(LOAD(d1 + 64)); w[1 + 12] = BYTESWAP(LOAD(d1 + 96)); w[2 ] = BYTESWAP(LOAD(d2 )); w[2 + 4] = BYTESWAP(LOAD(d2 + 32)); w[2 + 8] = BYTESWAP(LOAD(d2 + 64)); w[2 + 12] = BYTESWAP(LOAD(d2 + 96)); w[3 ] = BYTESWAP(LOAD(d3 )); w[3 + 4] = BYTESWAP(LOAD(d3 + 32)); w[3 + 8] = BYTESWAP(LOAD(d3 + 64)); w[3 + 12] = BYTESWAP(LOAD(d3 + 96)); transpose(w); transpose(w + 4); transpose(w + 8); transpose(w + 12); // Initial State s0 = ctx->s[0]; s1 = ctx->s[1]; s2 = ctx->s[2]; s3 = ctx->s[3]; s4 = ctx->s[4]; s5 = ctx->s[5]; s6 = ctx->s[6]; s7 = ctx->s[7]; // The first 16 rounds (where the w inputs are directly from the data) SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, 0, w[0]); SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, 1, w[1]); SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, 2, w[2]); SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, 3, w[3]); SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, 4, w[4]); SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, 5, w[5]); SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, 6, w[6]); SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, 7, w[7]); SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, 8, w[8]); SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, 9, w[9]); SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, 10, w[10]); SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, 11, w[11]); SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, 12, w[12]); SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, 13, w[13]); SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, 14, w[14]); SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, 15, w[15]); #define M(i) (((i)+16) & 0xf) #define NextW(i) \ w[M(i)] = ADD4_64(GAMMA1_AVX(w[M((i)-2)]), w[M((i)-7)], GAMMA0_AVX(w[M((i)-15)]), w[M((i)-16)]); // The remaining 64 rounds (where the w inputs are a linear fix of the data) for (unsigned i = 16; i<80; i+=16) { nw = NextW(0); SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, i+0, nw); nw = NextW(1); SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, i+1, nw); nw = NextW(2); SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, i+2, nw); nw = NextW(3); SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, i+3, nw); nw = NextW(4); SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, i+4, nw); nw = NextW(5); SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, i+5, nw); nw = NextW(6); SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, i+6, nw); nw = NextW(7); SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, i+7, nw); nw = NextW(8); SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, i+8, nw); nw = NextW(9); SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, i+9, nw); nw = NextW(10); SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, i+10, nw); nw = NextW(11); SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, i+11, nw); nw = NextW(12); SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, i+12, nw); nw = NextW(13); SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, i+13, nw); nw = NextW(14); SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, i+14, nw); nw = NextW(15); SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, i+15, nw); } // Feed Forward ctx->s[0] = ADD64(s0, ctx->s[0]); ctx->s[1] = ADD64(s1, ctx->s[1]); ctx->s[2] = ADD64(s2, ctx->s[2]); ctx->s[3] = ADD64(s3, ctx->s[3]); ctx->s[4] = ADD64(s4, ctx->s[4]); ctx->s[5] = ADD64(s5, ctx->s[5]); ctx->s[6] = ADD64(s6, ctx->s[6]); ctx->s[7] = ADD64(s7, ctx->s[7]); } static void _sha512x4( sha512ctx4x* ctx, unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long long inlen) { unsigned int i = 0; while(inlen - i >= 128) { sha512_transform4x( ctx, in0 + i, in1 + i, in2 + i, in3 + i ); ctx->msglen += 1024; i += 128; } ctx->datalen = inlen - i; memcpy(&ctx->msgblocks[128*0], in0 + i, ctx->datalen); memcpy(&ctx->msgblocks[128*1], in1 + i, ctx->datalen); memcpy(&ctx->msgblocks[128*2], in2 + i, ctx->datalen); memcpy(&ctx->msgblocks[128*3], in3 + i, ctx->datalen); // Padding unsigned long curlen; if (ctx->datalen < 112) { for (i = 0; i < 4; ++i) { curlen = ctx->datalen; ctx->msgblocks[128*i + curlen++] = 0x80; while(curlen < 128) { ctx->msgblocks[128*i + curlen++] = 0x00; } } } else { for (i = 0; i < 4; ++i) { curlen = ctx->datalen; ctx->msgblocks[128*i + curlen++] = 0x80; while(curlen < 128) { ctx->msgblocks[128*i + curlen++] = 0x00; } } sha512_transform4x( ctx, ctx->msgblocks, ctx->msgblocks + 128, ctx->msgblocks + 256, ctx->msgblocks + 384 ); memset(ctx->msgblocks, 0, 4 * 128); } // Add length of the message to each block ctx->msglen += ctx->datalen * 8; for (i = 0; i < 4; i++) { ctx->msgblocks[128*i + 127] = ctx->msglen; ctx->msgblocks[128*i + 126] = ctx->msglen >> 8; ctx->msgblocks[128*i + 125] = ctx->msglen >> 16; ctx->msgblocks[128*i + 124] = ctx->msglen >> 24; ctx->msgblocks[128*i + 123] = ctx->msglen >> 32; ctx->msgblocks[128*i + 122] = ctx->msglen >> 40; ctx->msgblocks[128*i + 121] = ctx->msglen >> 48; ctx->msgblocks[128*i + 120] = ctx->msglen >> 56; memset( &ctx->msgblocks[128*i + 112], 0, 8 ); } sha512_transform4x( ctx, ctx->msgblocks, ctx->msgblocks + 128, ctx->msgblocks + 256, ctx->msgblocks + 384 ); // Compute final hash output transpose(ctx->s); transpose(ctx->s+4); // Store Hash value __m256i out[2]; STORE(out, BYTESWAP(ctx->s[0])); STORE(out+1, BYTESWAP(ctx->s[4])); memcpy(out0, out, 64); STORE(out, BYTESWAP(ctx->s[1])); STORE(out+1, BYTESWAP(ctx->s[5])); memcpy(out1, out, 64); STORE(out, BYTESWAP(ctx->s[2])); STORE(out+1, BYTESWAP(ctx->s[6])); memcpy(out2, out, 64); STORE(out, BYTESWAP(ctx->s[3])); STORE(out+1, BYTESWAP(ctx->s[7])); memcpy(out3, out, 64); } /** * Note that inlen should be sufficiently small that it still allows for * an array to be allocated on the stack. Typically 'in' is merely a seed. * Outputs outlen number of bytes */ void mgf1x4_512(unsigned char *outx4, unsigned long outlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long inlen) { SPX_VLA(unsigned char, inbufx4, 4*(inlen + 4)); unsigned char outbuf[4*64]; unsigned long i; unsigned int j; memcpy(inbufx4 + 0*(inlen + 4), in0, inlen); memcpy(inbufx4 + 1*(inlen + 4), in1, inlen); memcpy(inbufx4 + 2*(inlen + 4), in2, inlen); memcpy(inbufx4 + 3*(inlen + 4), in3, inlen); /* While we can fit in at least another full block of SHA512 output.. */ unsigned long remaining = outlen; for (i = 0; remaining > 0; i++) { unsigned this_step = SPX_SHA512_OUTPUT_BYTES; if (this_step > remaining) this_step = remaining; remaining -= this_step; for (j = 0; j < 4; j++) { u32_to_bytes(inbufx4 + inlen + j*(inlen + 4), i); } sha512ctx4x ctx; sha512_init4x(&ctx); _sha512x4( &ctx, outbuf + 0*64, outbuf + 1*64, outbuf + 2*64, outbuf + 3*64, inbufx4 + 0*(inlen + 4), inbufx4 + 1*(inlen + 4), inbufx4 + 2*(inlen + 4), inbufx4 + 3*(inlen + 4), inlen+4 ); memcpy(outx4 + 0*outlen, outbuf+0*64, this_step); memcpy(outx4 + 1*outlen, outbuf+1*64, this_step); memcpy(outx4 + 2*outlen, outbuf+2*64, this_step); memcpy(outx4 + 3*outlen, outbuf+3*64, this_step); outx4 += this_step; } } void sha512x4_seeded( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *seed, unsigned long long seedlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long long inlen) { sha512ctx4x ctx; unsigned long i; for (i = 0; i < 8; i++) { uint64_t t = (uint64_t)(seed[7]) | (((uint64_t)(seed[6])) << 8) | (((uint64_t)(seed[5])) << 16) | (((uint64_t)(seed[4])) << 24) | (((uint64_t)(seed[3])) << 32) | (((uint64_t)(seed[2])) << 40) | (((uint64_t)(seed[1])) << 48) | (((uint64_t)(seed[0])) << 56); ctx.s[i] = _mm256_set_epi64x(t, t, t, t); seed += 8; } ctx.msglen = seedlen; _sha512x4( &ctx, out0, out1, out2, out3, in0, in1, in2, in3, inlen ); } ================================================ FILE: sha2-avx2/sha512x4.h ================================================ #ifndef SHA512AVX_H #define SHA512AVX_H #include #include "immintrin.h" #include "params.h" typedef struct SHA512state4x { __m256i s[8]; unsigned char msgblocks[4*128]; int datalen; unsigned long long msglen; } sha512ctx4x; #define sha512x4_seeded SPX_NAMESPACE(sha512x4_seeded) void sha512x4_seeded( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *seed, unsigned long long seedlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long long inlen); /** * Note that inlen should be sufficiently small that it still allows for * an array to be allocated on the stack. Typically 'in' is merely a seed. * Outputs outlen number of bytes */ #define mgf1x4_512 SPX_NAMESPACE(mgf1x4_512) void mgf1x4_512(unsigned char *outx4, unsigned long outlen, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned long inlen); #endif ================================================ FILE: sha2-avx2/test/benchmark.c ================================================ #define _POSIX_C_SOURCE 199309L #include #include #include #include "../api.h" #include "../fors.h" #include "../wots.h" #include "../wotsx8.h" #include "../params.h" #include "../randombytes.h" #define SPX_MLEN 32 #define NTESTS 10 static void wots_gen_pkx8(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]); static int cmp_llu(const void *a, const void*b) { if(*(unsigned long long *)a < *(unsigned long long *)b) return -1; if(*(unsigned long long *)a > *(unsigned long long *)b) return 1; return 0; } static unsigned long long median(unsigned long long *l, size_t llen) { qsort(l,llen,sizeof(unsigned long long),cmp_llu); if(llen%2) return l[llen/2]; else return (l[llen/2-1]+l[llen/2])/2; } static void delta(unsigned long long *l, size_t llen) { unsigned int i; for(i = 0; i < llen - 1; i++) { l[i] = l[i+1] - l[i]; } } static unsigned long long cpucycles(void) { unsigned long long result; __asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } static void printfcomma (unsigned long long n) { if (n < 1000) { printf("%llu", n); return; } printfcomma(n / 1000); printf (",%03llu", n % 1000); } static void printfalignedcomma (unsigned long long n, int len) { unsigned long long ncopy = n; int i = 0; while (ncopy > 9) { len -= 1; ncopy /= 10; i += 1; // to account for commas } i = i/3 - 1; // to account for commas for (; i < len; i++) { printf(" "); } printfcomma(n); } static void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul) { unsigned long long med; result /= NTESTS; delta(l, NTESTS + 1); med = median(l, llen); printf("avg. %11.2lf us (%2.2lf sec); median ", result, result / 1e6); printfalignedcomma(med, 12); printf(" cycles, %5llux: ", mul); printfalignedcomma(mul*med, 12); printf(" cycles\n"); } #define MEASURE(TEXT, MUL, FNCALL)\ printf(TEXT);\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\ for(i = 0; i < NTESTS; i++) {\ t[i] = cpucycles();\ FNCALL;\ }\ t[NTESTS] = cpucycles();\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\ result = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) / 1e3;\ display_result(result, t, NTESTS, MUL); int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); spx_ctx ctx; unsigned char pk[SPX_PK_BYTES]; unsigned char sk[SPX_SK_BYTES]; unsigned char *m = malloc(SPX_MLEN); unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN); unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN); unsigned char fors_pk[SPX_FORS_PK_BYTES]; unsigned char fors_m[SPX_FORS_MSG_BYTES]; unsigned char fors_sig[SPX_FORS_BYTES]; unsigned char addr[SPX_ADDR_BYTES]; unsigned char wots_pk[8*SPX_WOTS_PK_BYTES]; unsigned long long smlen; unsigned long long mlen; unsigned long long t[NTESTS+1]; struct timespec start, stop; double result; int i; randombytes(m, SPX_MLEN); randombytes(addr, SPX_ADDR_BYTES); printf("Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\n", SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES, SPX_WOTS_W); printf("Running %d iterations.\n", NTESTS); MEASURE("Generating keypair.. ", 1, crypto_sign_keypair(pk, sk)); MEASURE(" - WOTS pk gen 8x.. ", (1 << SPX_TREE_HEIGHT) / 8, wots_gen_pkx8(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Signing.. ", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk)); MEASURE(" - FORS signing.. ", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr)); MEASURE(" - WOTS pk gen x8.. ", SPX_D * (1 << SPX_TREE_HEIGHT) / 8, wots_gen_pkx8(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Verifying.. ", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk)); printf("Signature size: %d (%.2f KiB)\n", SPX_BYTES, SPX_BYTES / 1024.0); printf("Public key size: %d (%.2f KiB)\n", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0); printf("Secret key size: %d (%.2f KiB)\n", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0); free(m); free(sm); free(mout); return 0; } static void wots_gen_pkx8(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) { struct leaf_info_x8 leaf; unsigned steps[ SPX_WOTS_LEN ] = { 0 }; INITIALIZE_LEAF_INFO_X8(leaf, addr, steps); wots_gen_leafx8(pk, ctx, 0, &leaf); } ================================================ FILE: sha2-avx2/test/thashx8.c ================================================ #include #include #include "../thashx8.h" #include "../thash.h" #include "../randombytes.h" #include "../params.h" #include "../hash.h" #if SPX_SHA512 #include "../sha2.h" #include "../sha512x4.h" #endif int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); unsigned char input[16*SPX_N]; spx_ctx ctx; unsigned char output[8*SPX_N]; unsigned char out8[8*SPX_N]; uint32_t addr[8*8] = {0}; unsigned int j; randombytes(ctx.pub_seed, SPX_N); randombytes(input, 16*SPX_N); randombytes((unsigned char *)addr, 8 * 8 * sizeof(uint32_t)); initialize_hash_function(&ctx); printf("Testing if thash matches thashx8 on one block ... "); for (j = 0; j < 8; j++) { thash(out8 + j * SPX_N, input + j * SPX_N, 1, &ctx, addr + j*8); } thashx8(output + 0*SPX_N, output + 1*SPX_N, output + 2*SPX_N, output + 3*SPX_N, output + 4*SPX_N, output + 5*SPX_N, output + 6*SPX_N, output + 7*SPX_N, input + 0*SPX_N, input + 1*SPX_N, input + 2*SPX_N, input + 3*SPX_N, input + 4*SPX_N, input + 5*SPX_N, input + 6*SPX_N, input + 7*SPX_N, 1, &ctx, addr); if (memcmp(out8, output, 8 * SPX_N)) { printf("failed!\n"); return -1; } printf("successful.\n"); printf("Testing if thash matches thashx8 on two blocks ... "); for (j = 0; j < 8; j++) { thash(out8 + j * SPX_N, input + (2*j) * SPX_N, 2, &ctx, addr + j*8); } thashx8(output + 0*SPX_N, output + 1*SPX_N, output + 2*SPX_N, output + 3*SPX_N, output + 4*SPX_N, output + 5*SPX_N, output + 6*SPX_N, output + 7*SPX_N, input + 0*SPX_N, input + 2*SPX_N, input + 4*SPX_N, input + 6*SPX_N, input + 8*SPX_N, input + 10*SPX_N, input + 12*SPX_N, input + 14*SPX_N, 2, &ctx, addr); if (memcmp(out8, output, 8 * SPX_N)) { printf("failed!\n"); return -1; } printf("successful.\n"); return 0; } ================================================ FILE: sha2-avx2/thash_sha2_robustx8.c ================================================ #include #include #include "address.h" #include "utils.h" #include "params.h" #include "thashx8.h" #include "sha2.h" #include "sha256x8.h" #include "sha256avx.h" #if SPX_SHA512 #include "sha512x4.h" static void thashx8_512( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8] ); #endif /** * 8-way parallel version of thash; takes 8x as much input and output */ void thashx8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8]) { #if SPX_SHA512 if (inblocks > 1) { thashx8_512( out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, inblocks, ctx, addrx8); return; } #endif SPX_VLA(unsigned char, bufx8, 8 * (SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)); SPX_VLA(unsigned char, outbufx8, 8 * SPX_SHA256_OUTPUT_BYTES); SPX_VLA(unsigned char, bitmaskx8, 8 * (inblocks * SPX_N)); unsigned int i; for (i = 0; i < 8; i++) { memcpy(bufx8 + i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), ctx->pub_seed, SPX_N); memcpy(bufx8 + SPX_N + i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), addrx8 + i*8, SPX_SHA256_ADDR_BYTES); } mgf1x8(bitmaskx8, inblocks * SPX_N, bufx8 + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_N + SPX_SHA256_ADDR_BYTES); for (i = 0; i < inblocks * SPX_N; i++) { bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in0[i] ^ bitmaskx8[i + 0*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in1[i] ^ bitmaskx8[i + 1*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in2[i] ^ bitmaskx8[i + 2*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in3[i] ^ bitmaskx8[i + 3*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in4[i] ^ bitmaskx8[i + 4*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in5[i] ^ bitmaskx8[i + 5*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in6[i] ^ bitmaskx8[i + 6*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in7[i] ^ bitmaskx8[i + 7*(inblocks * SPX_N)]; } sha256x8_seeded( /* out */ outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, /* seed */ ctx->state_seeded, 512, /* in */ bufx8 + SPX_N + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */ ); memcpy(out0, outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out1, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out2, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out3, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out4, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out5, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out6, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out7, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, SPX_N); } #if SPX_SHA512 /** * 2x4-way parallel version of thash; this is for the uses of thash that are * based on SHA-512 */ static void thashx8_512( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8]) { SPX_VLA(unsigned char, bufx8, 8 * (SPX_N + SPX_SHA256_ADDR_BYTES + inblocks * SPX_N)); SPX_VLA(unsigned char, outbuf, 4 * SPX_SHA512_OUTPUT_BYTES); SPX_VLA(unsigned char, bitmaskx4, 4 * (inblocks * SPX_N)); unsigned int i; for (i = 0; i < 8; i++) { memcpy(bufx8 + i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), ctx->pub_seed, SPX_N); memcpy(bufx8 + SPX_N + i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), addrx8 + i*8, SPX_SHA256_ADDR_BYTES); } mgf1x4_512(bitmaskx4, inblocks * SPX_N, bufx8 + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_N + SPX_SHA256_ADDR_BYTES); for (i = 0; i < inblocks * SPX_N; i++) { bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in0[i] ^ bitmaskx4[i + 0*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in1[i] ^ bitmaskx4[i + 1*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in2[i] ^ bitmaskx4[i + 2*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in3[i] ^ bitmaskx4[i + 3*(inblocks * SPX_N)]; } mgf1x4_512(bitmaskx4, inblocks * SPX_N, bufx8 + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_N + SPX_SHA256_ADDR_BYTES); for (i = 0; i < inblocks * SPX_N; i++) { bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in4[i] ^ bitmaskx4[i + 0*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in5[i] ^ bitmaskx4[i + 1*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in6[i] ^ bitmaskx4[i + 2*(inblocks * SPX_N)]; bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] = in7[i] ^ bitmaskx4[i + 3*(inblocks * SPX_N)]; } sha512x4_seeded( outbuf + 0*SPX_SHA512_OUTPUT_BYTES, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, ctx->state_seeded_512, /* seed */ 1024, /* seed length */ bufx8 + SPX_N + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */ ); memcpy(out0, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out1, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out2, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out3, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N); sha512x4_seeded( outbuf + 0*SPX_SHA512_OUTPUT_BYTES, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, ctx->state_seeded_512, /* seed */ 1024, /* seed length */ bufx8 + SPX_N + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + SPX_N + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */ ); memcpy(out4, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out5, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out6, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out7, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N); } #endif ================================================ FILE: sha2-avx2/thash_sha2_simplex8.c ================================================ #include #include #include "address.h" #include "utils.h" #include "params.h" #include "thashx8.h" #include "sha2.h" #include "sha256x8.h" #include "sha256avx.h" #if SPX_SHA512 #include "sha512x4.h" static void thashx8_512( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8] ); #endif /** * 8-way parallel version of thash; takes 8x as much input and output */ void thashx8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8]) { #if SPX_SHA512 if (inblocks > 1) { thashx8_512( out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, inblocks, ctx, addrx8); return; } #endif unsigned char bufx8[8*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)]; unsigned char outbufx8[8*SPX_SHA256_OUTPUT_BYTES]; unsigned int i; for (i = 0; i < 8; i++) { memcpy(bufx8 + i*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), addrx8 + i*8, SPX_SHA256_ADDR_BYTES); } memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in0, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in1, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in2, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in3, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in4, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in5, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in6, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in7, inblocks * SPX_N); sha256x8_seeded( /* out */ outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, /* seed */ ctx->state_seeded, 512, /* in */ bufx8 + 0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */ ); memcpy(out0, outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out1, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out2, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out3, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out4, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out5, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out6, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, SPX_N); memcpy(out7, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, SPX_N); } #if SPX_SHA512 /** * 2x4-way parallel version of thash; this is for the uses of thash that are * based on SHA-512 */ static void thashx8_512( unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8]) { unsigned char bufx8[8*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)]; unsigned char outbuf[4*SPX_SHA512_OUTPUT_BYTES]; unsigned int i; for (i = 0; i < 8; i++) { memcpy(bufx8 + i*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), addrx8 + i*8, SPX_SHA256_ADDR_BYTES); } memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in0, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in1, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in2, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in3, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in4, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in5, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in6, inblocks * SPX_N); memcpy(bufx8 + SPX_SHA256_ADDR_BYTES + 7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in7, inblocks * SPX_N); sha512x4_seeded( outbuf + 0*SPX_SHA512_OUTPUT_BYTES, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, ctx->state_seeded_512, /* seed */ 1024, /* seed length */ bufx8 + 0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), /* in */ bufx8 + 1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */ ); memcpy(out0, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out1, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out2, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out3, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N); sha512x4_seeded( outbuf + 0*SPX_SHA512_OUTPUT_BYTES, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, ctx->state_seeded_512, /* seed */ 1024, /* seed length */ bufx8 + 4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), /* in */ bufx8 + 5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), bufx8 + 7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */ ); memcpy(out4, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out5, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out6, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N); memcpy(out7, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N); } #endif ================================================ FILE: sha2-avx2/thashx8.h ================================================ #ifndef SPX_THASHX8_H #define SPX_THASHX8_H #include #include "context.h" #include "params.h" #define thashx8 SPX_NAMESPACE(thashx8) void thashx8(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned char *out4, unsigned char *out5, unsigned char *out6, unsigned char *out7, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, const unsigned char *in4, const unsigned char *in5, const unsigned char *in6, const unsigned char *in7, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx8[8*8]); #endif ================================================ FILE: sha2-avx2/utilsx8.c ================================================ #include #include "utils.h" #include "utilsx8.h" #include "params.h" #include "thashx8.h" #include "address.h" /* * Generate the entire Merkle tree, computing the authentication path for leaf_idx, * and the resulting root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE) * * This expects tree_addrx8 to be initialized to 8 parallel addr structures for * the Merkle tree nodes * * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This works by using the standard Merkle tree building algorithm, except * that each 'node' tracked is actually 8 consecutive nodes in the real tree. * When we combine two logical nodes ABCDEFGH and STUVWXYZ, we perform the H * operation on adjacent real nodes, forming the parent logical node * (AB)(CD)(EF)(GH)(ST)(UV)(WX)(YZ) * * When we get to the top three levels of the real tree (where there is only * one logical node), we continue this operation three more times; the right * most real node will by the actual root (and the other 7 nodes will be * garbage). We follow the same thashx8 logic so that the 'extract * authentication path components' part of the loop is still executed (and * to simplify the code somewhat) * * This currently assumes tree_height >= 3; I suspect that doing an adjusting * idx, addr_idx on the gen_leafx8 call if tree_height < 3 would fix it; since * we don't actually use such short trees, I haven't bothered */ void treehashx8(unsigned char *root, unsigned char *auth_path, const spx_ctx *ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leafx8)( unsigned char* /* Where to write the leaves */, const spx_ctx*, uint32_t idx, void *info), uint32_t tree_addrx8[8*8], void *info) { /* This is where we keep the intermediate nodes */ SPX_VLA(unsigned char, stackx8, 8 * tree_height * SPX_N); uint32_t left_adj = 0, prev_left_adj = 0; /* When we're doing the top 3 */ /* levels, the left-most part of the tree isn't at the beginning */ /* of current[]. These give the offset of the actual start */ uint32_t idx; uint32_t max_idx = (1 << (tree_height-3)) - 1; for (idx = 0;; idx++) { unsigned char current[8*SPX_N]; /* Current logical node */ gen_leafx8( current, ctx, 8*idx + idx_offset, info ); /* Now combine the freshly generated right node with previously */ /* generated left ones */ uint32_t internal_idx_offset = idx_offset; uint32_t internal_idx = idx; uint32_t internal_leaf = leaf_idx; uint32_t h; /* The height we are in the Merkle tree */ for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) { /* Special processing if we're at the top of the tree */ if (h >= tree_height - 3) { if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[7*SPX_N], SPX_N ); return; } /* The tree indexing logic is a bit off in this case */ /* Adjust it so that the left-most node of the part of */ /* the tree that we're processing has index 0 */ prev_left_adj = left_adj; left_adj = 8 - (1 << (tree_height - h - 1)); } /* Check if we hit the top of the tree */ if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[7*SPX_N], SPX_N ); return; } /* * Check if one of the nodes we have is a part of the * authentication path; if it is, write it out */ if ((((internal_idx << 3) ^ internal_leaf) & ~0x7) == 0) { memcpy( &auth_path[ h * SPX_N ], ¤t[(((internal_leaf&7)^1) + prev_left_adj) * SPX_N], SPX_N ); } /* * Check if we're at a left child; if so, stop going up the stack * Exception: if we've reached the end of the tree, keep on going * (so we combine the last 8 nodes into the one root node in three * more iterations) */ if ((internal_idx & 1) == 0 && idx < max_idx) { break; } /* Ok, we're at a right node (or doing the top 3 levels) */ /* Now combine the left and right logical nodes together */ /* Set the address of the node we're creating. */ int j; internal_idx_offset >>= 1; for (j = 0; j < 8; j++) { set_tree_height(tree_addrx8 + j*8, h + 1); set_tree_index(tree_addrx8 + j*8, (8/2) * (internal_idx&~1) + j - left_adj + internal_idx_offset ); } unsigned char *left = &stackx8[h * 8 * SPX_N]; thashx8( ¤t[0 * SPX_N], ¤t[1 * SPX_N], ¤t[2 * SPX_N], ¤t[3 * SPX_N], ¤t[4 * SPX_N], ¤t[5 * SPX_N], ¤t[6 * SPX_N], ¤t[7 * SPX_N], &left [0 * SPX_N], &left [2 * SPX_N], &left [4 * SPX_N], &left [6 * SPX_N], ¤t[0 * SPX_N], ¤t[2 * SPX_N], ¤t[4 * SPX_N], ¤t[6 * SPX_N], 2, ctx, tree_addrx8); } /* We've hit a left child; save the current for when we get the */ /* corresponding right right */ memcpy( &stackx8[h * 8 * SPX_N], current, 8 * SPX_N); } } ================================================ FILE: sha2-avx2/utilsx8.h ================================================ #ifndef SPX_UTILSX8_H #define SPX_UTILSX8_H #include #include "params.h" /** * For a given leaf index, computes the authentication path and the resulting * root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE). * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This implementation uses AVX to compute internal nodes 8 at a time (in * parallel) */ #define treehashx8 SPX_NAMESPACE(treehashx8) void treehashx8(unsigned char *root, unsigned char *auth_path, const spx_ctx *ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leafx8)( unsigned char* /* Where to write the leaves */, const spx_ctx* /* ctx */, uint32_t addr_idx, void *info), uint32_t tree_addrx8[8*8], void *info); #endif ================================================ FILE: sha2-avx2/wots.c ================================================ #include #include #include "utils.h" #include "utilsx8.h" #include "hash.h" #include "hashx8.h" #include "thash.h" #include "thashx8.h" #include "wots.h" #include "wotsx8.h" #include "address.h" #include "params.h" // TODO clarify address expectations, and make them more uniform. // TODO i.e. do we expect types to be set already? // TODO and do we expect modifications or copies? /** * Computes up the chains */ static void gen_chains( unsigned char *out, const unsigned char *in, unsigned int start[SPX_WOTS_LEN], unsigned int steps[SPX_WOTS_LEN], const spx_ctx *ctx, uint32_t addr[8]) { uint32_t i, j, k, idx, watching; int done; unsigned char empty[SPX_N]; unsigned char *bufs[8]; uint32_t addrs[8*8]; int l; uint16_t counts[SPX_WOTS_W] = { 0 }; uint16_t idxs[SPX_WOTS_LEN]; uint16_t total, newTotal; /* set addrs = {addr, addr, ..., addr} */ for (j = 0; j < 8; j++) { memcpy(addrs+j*8, addr, sizeof(uint32_t) * 8); } /* Initialize out with the value at position 'start'. */ memcpy(out, in, SPX_WOTS_LEN*SPX_N); /* Sort the chains in reverse order by steps using counting sort. */ for (i = 0; i < SPX_WOTS_LEN; i++) { counts[steps[i]]++; } total = 0; for (l = SPX_WOTS_W - 1; l >= 0; l--) { newTotal = counts[l] + total; counts[l] = total; total = newTotal; } for (i = 0; i < SPX_WOTS_LEN; i++) { idxs[counts[steps[i]]] = i; counts[steps[i]]++; } /* We got our work cut out for us: do it! */ for (i = 0; i < SPX_WOTS_LEN; i += 8) { for (j = 0; j < 8 && i+j < SPX_WOTS_LEN; j++) { idx = idxs[i+j]; set_chain_addr(addrs+j*8, idx); bufs[j] = out + SPX_N * idx; } /* As the chains are sorted in reverse order, we know that the first * chain is the longest and the last one is the shortest. We keep * an eye on whether the last chain is done and then on the one before, * et cetera. */ watching = 7; done = 0; while (i + watching >= SPX_WOTS_LEN) { bufs[watching] = &empty[0]; watching--; } for (k = 0;; k++) { while (k == steps[idxs[i+watching]]) { bufs[watching] = &empty[0]; if (watching == 0) { done = 1; break; } watching--; } if (done) { break; } for (j = 0; j < watching + 1; j++) { set_hash_addr(addrs+j*8, k + start[idxs[i+j]]); } thashx8(bufs[0], bufs[1], bufs[2], bufs[3], bufs[4], bufs[5], bufs[6], bufs[7], bufs[0], bufs[1], bufs[2], bufs[3], bufs[4], bufs[5], bufs[6], bufs[7], 1, ctx, addrs); } } } /** * base_w algorithm as described in draft. * Interprets an array of bytes as integers in base w. * This only works when log_w is a divisor of 8. */ static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { int in = 0; int out = 0; unsigned char total; int bits = 0; int consumed; for (consumed = 0; consumed < out_len; consumed++) { if (bits == 0) { total = input[in]; in++; bits += 8; } bits -= SPX_WOTS_LOGW; output[out] = (total >> bits) & (SPX_WOTS_W - 1); out++; } } /* Computes the WOTS+ checksum over a message (in base_w). */ static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { unsigned int csum = 0; unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8]; unsigned int i; /* Compute checksum. */ for (i = 0; i < SPX_WOTS_LEN1; i++) { csum += SPX_WOTS_W - 1 - msg_base_w[i]; } /* Convert checksum to base_w. */ /* Make sure expected empty zero bits are the least significant bits. */ csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8); ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes); } /* Takes a message and derives the matching chain lengths. */ void chain_lengths(unsigned int *lengths, const unsigned char *msg) { base_w(lengths, SPX_WOTS_LEN1, msg); wots_checksum(lengths + SPX_WOTS_LEN1, lengths); } /** * Takes a WOTS signature and an n-byte message, computes a WOTS public key. * * Writes the computed public key to 'pk'. */ void wots_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *msg, const spx_ctx *ctx, uint32_t addr[8]) { unsigned int steps[SPX_WOTS_LEN]; unsigned int start[SPX_WOTS_LEN]; uint32_t i; chain_lengths(start, msg); for (i = 0; i < SPX_WOTS_LEN; i++) { steps[i] = SPX_WOTS_W - 1 - start[i]; } gen_chains(pk, sig, start, steps, ctx, addr); } /* * This generates 8 sequential WOTS public keys * It also generates the WOTS signature if leaf_info indicates * that we're signing with one of these WOTS keys */ void wots_gen_leafx8(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info) { struct leaf_info_x8 *info = v_info; uint32_t *leaf_addr = info->leaf_addr; uint32_t *pk_addr = info->pk_addr; unsigned int i, j, k; unsigned char pk_buffer[ 8 * SPX_WOTS_BYTES ]; unsigned wots_offset = SPX_WOTS_BYTES; unsigned char *buffer; uint32_t wots_k_mask; unsigned wots_sign_index; if (((leaf_idx ^ info->wots_sign_leaf) & ~7) == 0) { /* We're traversing the leaf that's signing; generate the WOTS */ /* signature */ wots_k_mask = 0; wots_sign_index = info->wots_sign_leaf & 7; /* Which of of the 8 */ /* slots do the signatures come from */ } else { /* Nope, we're just generating pk's; turn off the signature logic */ wots_k_mask = ~0; wots_sign_index = 0; } for (j = 0; j < 8; j++) { set_keypair_addr( leaf_addr + j*8, leaf_idx + j ); set_keypair_addr( pk_addr + j*8, leaf_idx + j ); } for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) { uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k */ /* to the step if we're generating a signature, ~0 if we're not */ /* Start with the secret seed */ for (j = 0; j < 8; j++) { set_chain_addr(leaf_addr + j*8, i); set_hash_addr(leaf_addr + j*8, 0); set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTSPRF); } prf_addrx8(buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 2*wots_offset, buffer + 3*wots_offset, buffer + 4*wots_offset, buffer + 5*wots_offset, buffer + 6*wots_offset, buffer + 7*wots_offset, ctx, leaf_addr); for (j = 0; j < 8; j++) { set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTS); } /* Iterate down the WOTS chain */ for (k=0;; k++) { /* Check if one of the values we have needs to be saved as a */ /* part of the WOTS signature */ if (k == wots_k) { memcpy( info->wots_sig + i * SPX_N, buffer + wots_sign_index*wots_offset, SPX_N ); } /* Check if we hit the top of the chain */ if (k == SPX_WOTS_W - 1) break; /* Iterate one step on all 8 chains */ for (j = 0; j < 8; j++) { set_hash_addr(leaf_addr + j*8, k); } thashx8(buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 2*wots_offset, buffer + 3*wots_offset, buffer + 4*wots_offset, buffer + 5*wots_offset, buffer + 6*wots_offset, buffer + 7*wots_offset, buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 2*wots_offset, buffer + 3*wots_offset, buffer + 4*wots_offset, buffer + 5*wots_offset, buffer + 6*wots_offset, buffer + 7*wots_offset, 1, ctx, leaf_addr); } } /* Do the final thash to generate the public keys */ thashx8(dest + 0*SPX_N, dest + 1*SPX_N, dest + 2*SPX_N, dest + 3*SPX_N, dest + 4*SPX_N, dest + 5*SPX_N, dest + 6*SPX_N, dest + 7*SPX_N, pk_buffer + 0*wots_offset, pk_buffer + 1*wots_offset, pk_buffer + 2*wots_offset, pk_buffer + 3*wots_offset, pk_buffer + 4*wots_offset, pk_buffer + 5*wots_offset, pk_buffer + 6*wots_offset, pk_buffer + 7*wots_offset, SPX_WOTS_LEN, ctx, pk_addr); } ================================================ FILE: sha2-avx2/wotsx8.h ================================================ #if !defined( WOTSX8_H_ ) #define WOTSX8_H_ #include #include "params.h" /* * This is here to provide an interface to the internal wots_gen_leafx8 * routine. While this routine is not referenced in the package outside of * wots.c, it is called from the stand-alone benchmark code to characterize * the performance */ struct leaf_info_x8 { unsigned char *wots_sig; uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */ uint32_t *wots_steps; uint32_t leaf_addr[8*8]; uint32_t pk_addr[8*8]; }; /* Macro to set the leaf_info to something 'benign', that is, it would */ /* run with the same time as it does during the real signing process */ /* Used only by the benchmark code */ #define INITIALIZE_LEAF_INFO_X8(info, addr, step_buffer) { \ info.wots_sig = 0; \ info.wots_sign_leaf = ~0; \ info.wots_steps = step_buffer; \ int i; \ for (i=0; i<8; i++) { \ memcpy( &info.leaf_addr[8*i], addr, 32 ); \ memcpy( &info.pk_addr[8*i], addr, 32 ); \ } \ } #define wots_gen_leafx8 SPX_NAMESPACE(wots_gen_leafx8) void wots_gen_leafx8(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info); #endif /* WOTSX8_H_ */ ================================================ FILE: shake-a64/.gitignore ================================================ test/* !test/*.c PQCsignKAT_*.rsp PQCsignKAT_*.req PQCgenKAT_sign ================================================ FILE: shake-a64/Makefile ================================================ PARAMS = sphincs-shake-128f THASH = robust CFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -fomit-frame-pointer -flto -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS) SOURCES = hash_shake.c hash_shakex2.c thash_shake_$(THASH)x2.c address.c randombytes.c merkle.c wots.c utils.c utilsx2.c fors.c sign.c fips202.c fips202x2.c f1600x2_const.c f1600x2.s HEADERS = params.h hash.h hashx2.h thashx2.h address.h randombytes.h merkle.h wots.h utils.h utilsx2.h fors.h api.h fips202.h fips202x2.h f1600x2.h thash.h DET_SOURCES = $(SOURCES:randombytes.%=rng.%) DET_HEADERS = $(HEADERS:randombytes.%=rng.%) TESTS = test/fors \ test/spx \ test/thashx2 \ BENCHMARK = test/benchmark .PHONY: clean test benchmark default: PQCgenKAT_sign all: PQCgenKAT_sign tests benchmarks tests: $(TESTS) test: $(TESTS:=.exec) benchmarks: $(BENCHMARK) benchmark: $(BENCHMARK:=.exec) PQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS) $(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto test/benchmark: test/benchmark.c test/cycles.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ test/cycles.c $(SOURCES) $< $(LDLIBS) test/%: test/%.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS) test/%.exec: test/% @$< clean: -$(RM) $(TESTS) -$(RM) $(BENCHMARK) -$(RM) PQCgenKAT_sign -$(RM) PQCsignKAT_*.rsp -$(RM) PQCsignKAT_*.req ================================================ FILE: shake-a64/context.h ================================================ #ifndef SPX_CONTEXT_H #define SPX_CONTEXT_H #include #include "params.h" typedef struct { uint8_t pub_seed[SPX_N]; uint8_t sk_seed[SPX_N]; } spx_ctx; #endif ================================================ FILE: shake-a64/f1600x2.h ================================================ #ifndef SPX_F1600X2_H #define SPX_F1600X2_H #include extern uint64_t f1600_RC[24]; extern void _f1600x2(uint64_t* a, uint64_t* rc); #define f1600x2(s) do {_f1600x2((s), f1600_RC);} while(0) #endif ================================================ FILE: shake-a64/f1600x2.s ================================================ # From https://github.com/bwesterb/armed-keccak .macro round # Execute theta, but without xoring into the state yet. # Compute parities p[i] = a[i] ^ a[5+i] ^ ... ^ a[20+i]. eor3.16b v25, v0, v5, v10 eor3.16b v26, v1, v6, v11 eor3.16b v27, v2, v7, v12 eor3.16b v28, v3, v8, v13 eor3.16b v29, v4, v9, v14 eor3.16b v25, v25, v15, v20 eor3.16b v26, v26, v16, v21 eor3.16b v27, v27, v17, v22 eor3.16b v28, v28, v18, v23 eor3.16b v29, v29, v19, v24 # d[0] = rotl(p[1], 1) ^ p[4] rax1.2d v30, v29, v26 # d[3] = rotl(p[4], 1) ^ p[2] rax1.2d v29, v27, v29 # d[1] = rotl(p[2], 1) ^ p[0] rax1.2d v27, v25, v27 # d[4] = rotl(p[0], 1) ^ p[3] rax1.2d v25, v28, v25 # d[2] = rotl(p[3], 1) ^ p[1] rax1.2d v28, v26, v28 # Xor parities from step theta into the state at the same time # as executing rho and pi. eor.16b v0, v0, v30 mov.16b v31, v1 xar.2d v1, v6, v27, 20 xar.2d v6, v9, v25, 44 xar.2d v9, v22, v28, 3 xar.2d v22, v14, v25, 25 xar.2d v14, v20, v30, 46 xar.2d v20, v2, v28, 2 xar.2d v2, v12, v28, 21 xar.2d v12, v13, v29, 39 xar.2d v13, v19, v25, 56 xar.2d v19, v23, v29, 8 xar.2d v23, v15, v30, 23 xar.2d v15, v4, v25, 37 xar.2d v4, v24, v25, 50 xar.2d v24, v21, v27, 62 xar.2d v21, v8, v29, 9 xar.2d v8, v16, v27, 19 xar.2d v16, v5, v30, 28 xar.2d v5, v3, v29, 36 xar.2d v3, v18, v29, 43 xar.2d v18, v17, v28, 49 xar.2d v17, v11, v27, 54 xar.2d v11, v7, v28, 58 xar.2d v7, v10, v30, 61 xar.2d v10, v31, v27, 63 # Chi bcax.16b v25, v0, v2, v1 bcax.16b v26, v1, v3, v2 bcax.16b v2, v2, v4, v3 bcax.16b v3, v3, v0, v4 bcax.16b v4, v4, v1, v0 mov.16b v0, v25 mov.16b v1, v26 bcax.16b v25, v5, v7, v6 bcax.16b v26, v6, v8, v7 bcax.16b v7, v7, v9, v8 bcax.16b v8, v8, v5, v9 bcax.16b v9, v9, v6, v5 mov.16b v5, v25 mov.16b v6, v26 bcax.16b v25, v10, v12, v11 bcax.16b v26, v11, v13, v12 bcax.16b v12, v12, v14, v13 bcax.16b v13, v13, v10, v14 bcax.16b v14, v14, v11, v10 mov.16b v10, v25 mov.16b v11, v26 bcax.16b v25, v15, v17, v16 bcax.16b v26, v16, v18, v17 bcax.16b v17, v17, v19, v18 bcax.16b v18, v18, v15, v19 bcax.16b v19, v19, v16, v15 mov.16b v15, v25 mov.16b v16, v26 bcax.16b v25, v20, v22, v21 bcax.16b v26, v21, v23, v22 bcax.16b v22, v22, v24, v23 bcax.16b v23, v23, v20, v24 bcax.16b v24, v24, v21, v20 mov.16b v20, v25 mov.16b v21, v26 # iota ld1r {v25.2d}, [x1], #8 eor.16b v0, v0, v25 .endm .align 4 .global __f1600x2 __f1600x2: stp d8, d9, [sp,#-16]! stp d10, d11, [sp,#-16]! stp d12, d13, [sp,#-16]! stp d14, d15, [sp,#-16]! mov x2, x0 mov x3, #24 ld1.2d {v0, v1, v2, v3}, [x0], #64 ld1.2d {v4, v5, v6, v7}, [x0], #64 ld1.2d {v8, v9, v10, v11}, [x0], #64 ld1.2d {v12, v13, v14, v15}, [x0], #64 ld1.2d {v16, v17, v18, v19}, [x0], #64 ld1.2d {v20, v21, v22, v23}, [x0], #64 ld1.2d {v24}, [x0] loop: round subs x3, x3, #1 cbnz x3, loop mov x0, x2 st1.2d {v0, v1, v2, v3}, [x0], #64 st1.2d {v4, v5, v6, v7}, [x0], #64 st1.2d {v8, v9, v10, v11}, [x0], #64 st1.2d {v12, v13, v14, v15}, [x0], #64 st1.2d {v16, v17, v18, v19}, [x0], #64 st1.2d {v20, v21, v22, v23}, [x0], #64 st1.2d {v24}, [x0] ldp d14, d15, [sp], #16 ldp d12, d13, [sp], #16 ldp d10, d11, [sp], #16 ldp d8, d9, [sp], #16 ret lr ================================================ FILE: shake-a64/f1600x2_const.c ================================================ #include "f1600x2.h" uint64_t f1600_RC[24] = { 0x0000000000000001, 0x0000000000008082, 0x800000000000808A, 0x8000000080008000, 0x000000000000808B, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009, 0x000000000000008A, 0x0000000000000088, 0x0000000080008009, 0x000000008000000A, 0x000000008000808B, 0x800000000000008B, 0x8000000000008089, 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 0x000000000000800A, 0x800000008000000A, 0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008, }; ================================================ FILE: shake-a64/fips202x2.c ================================================ #include #include #include "fips202x2.h" #include "fips202.h" #include "f1600x2.h" uint64_t load64(const unsigned char *x) { unsigned long long r = 0, i; for (i = 0; i < 8; ++i) { r |= (unsigned long long)x[i] << 8 * i; } return r; } void store64(uint8_t *x, uint64_t u) { unsigned int i; for(i=0; i<8; ++i) { x[i] = u; u >>= 8; } } static void keccak_absorb2x(uint64_t *s, unsigned int r, const unsigned char *m0, const unsigned char *m1, unsigned long long int mlen, unsigned char p) { unsigned long long i; unsigned char t0[200]; unsigned char t1[200]; while (mlen >= r) { for (i = 0; i < r / 8; ++i) { s[2*i+0] ^= load64(m0 + 8 * i); s[2*i+1] ^= load64(m1 + 8 * i); } f1600x2(s); mlen -= r; m0 += r; m1 += r; } for (i = 0; i < r; ++i) { t0[i] = 0; t1[i] = 0; } for (i = 0; i < mlen; ++i) { t0[i] = m0[i]; t1[i] = m1[i]; } t0[i] = p; t1[i] = p; t0[r - 1] |= 128; t1[r - 1] |= 128; for (i = 0; i < r / 8; ++i) { s[2*i+0] ^= load64(t0 + 8 * i); s[2*i+1] ^= load64(t1 + 8 * i); } } static void keccak_squeezeblocks2x(unsigned char *h0, unsigned char *h1, unsigned long long int nblocks, uint64_t *s, unsigned int r) { unsigned int i; while(nblocks > 0) { f1600x2(s); for(i=0;i<(r>>3);i++) { store64(h0+8*i, s[2*i+0]); store64(h1+8*i, s[2*i+1]); } h0 += r; h1 += r; nblocks--; } } void shake128x2(unsigned char *out0, unsigned char *out1, unsigned long long outlen, unsigned char *in0, unsigned char *in1, unsigned long long inlen) { uint64_t s[50] = {0}; unsigned char t0[SHAKE128_RATE]; unsigned char t1[SHAKE128_RATE]; unsigned int i; /* absorb 4 message of identical length in parallel */ keccak_absorb2x(s, SHAKE128_RATE, in0, in1, inlen, 0x1F); /* Squeeze output */ keccak_squeezeblocks2x(out0, out1, outlen/SHAKE128_RATE, s, SHAKE128_RATE); out0 += (outlen/SHAKE128_RATE)*SHAKE128_RATE; out1 += (outlen/SHAKE128_RATE)*SHAKE128_RATE; if(outlen%SHAKE128_RATE) { keccak_squeezeblocks2x(t0, t1, 1, s, SHAKE128_RATE); for(i=0;i uint64_t load64(const unsigned char *x); void store64(uint8_t *x, uint64_t u); void shake128x2(unsigned char *out0, unsigned char *out1, unsigned long long outlen, unsigned char *in0, unsigned char *in1, unsigned long long inlen); void shake256x2(unsigned char *out0, unsigned char *out1, unsigned long long outlen, unsigned char *in0, unsigned char *in1, unsigned long long inlen); #endif ================================================ FILE: shake-a64/fors.c ================================================ #include #include #include #include "thash.h" #include "fors.h" #include "utils.h" #include "utilsx2.h" #include "hash.h" #include "hashx2.h" #include "thashx2.h" #include "address.h" static void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { prf_addr(sk, ctx, fors_leaf_addr); } static void fors_gen_skx2(unsigned char *sk0, unsigned char *sk1, const spx_ctx *ctx, uint32_t fors_leaf_addrx2[2*8]) { prf_addrx2(sk0, sk1, ctx, fors_leaf_addrx2); } static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { thash(leaf, sk, 1, ctx, fors_leaf_addr); } static void fors_sk_to_leafx2(unsigned char *leaf0, unsigned char *leaf1, const unsigned char *sk0, const unsigned char *sk1, const spx_ctx *ctx, uint32_t fors_leaf_addrx2[2*8]) { thashx2(leaf0, leaf1, sk0, sk1, 1, ctx, fors_leaf_addrx2); } struct fors_gen_leaf_info { uint32_t leaf_addrx[2*8]; }; static void fors_gen_leafx2(unsigned char *leaf, const spx_ctx *ctx, uint32_t addr_idx, void *info) { struct fors_gen_leaf_info *fors_info = info; uint32_t *fors_leaf_addrx2 = fors_info->leaf_addrx; unsigned int j; /* Only set the parts that the caller doesn't set */ for (j = 0; j < 2; j++) { set_tree_index(fors_leaf_addrx2 + j*8, addr_idx + j); set_type(fors_leaf_addrx2 + j*8, SPX_ADDR_TYPE_FORSPRF); } fors_gen_skx2(leaf + 0*SPX_N, leaf + 1*SPX_N, ctx, fors_leaf_addrx2); for (j = 0; j < 2; j++) { set_type(fors_leaf_addrx2 + j*8, SPX_ADDR_TYPE_FORSTREE); } fors_sk_to_leafx2(leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 0*SPX_N, leaf + 1*SPX_N, ctx, fors_leaf_addrx2); } /** * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. * Assumes indices has space for SPX_FORS_TREES integers. */ static void message_to_indices(uint32_t *indices, const unsigned char *m) { unsigned int i, j; unsigned int offset = 0; for (i = 0; i < SPX_FORS_TREES; i++) { indices[i] = 0; for (j = 0; j < SPX_FORS_HEIGHT; j++) { indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; offset++; } } } /** * Signs a message m, deriving the secret key from sk_seed and the FTS address. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_sign(unsigned char *sig, unsigned char *pk, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; uint32_t fors_tree_addr[2*8] = {0}; struct fors_gen_leaf_info fors_info = {0}; uint32_t *fors_leaf_addr = fors_info.leaf_addrx; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; for (i=0; i<2; i++) { copy_keypair_addr(fors_tree_addr + 8*i, fors_addr); set_type(fors_tree_addr + 8*i, SPX_ADDR_TYPE_FORSTREE); copy_keypair_addr(fors_leaf_addr + 8*i, fors_addr); } copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Include the secret key part that produces the selected leaf node. */ set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF); fors_gen_sk(sig, ctx, fors_tree_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); sig += SPX_N; /* Compute the authentication path for this leaf node. */ treehashx2(roots + i*SPX_N, sig, ctx, indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx2, fors_tree_addr, &fors_info); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } /** * Derives the FORS public key from a signature. * This can be used for verification by comparing to a known public key, or to * subsequently verify a signature on the derived public key. The latter is the * typical use-case when used as an FTS below an OTS in a hypertree. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; unsigned char leaf[SPX_N]; uint32_t fors_tree_addr[8] = {0}; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; copy_keypair_addr(fors_tree_addr, fors_addr); copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Derive the leaf from the included secret key part. */ fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr); sig += SPX_N; /* Derive the corresponding root node of this tree. */ compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset, sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } ================================================ FILE: shake-a64/hash_shakex2.c ================================================ #include #include #include "address.h" #include "params.h" #include "fips202x2.h" #include "f1600x2.h" #include "hashx2.h" /* * 2-way parallel version of prf_addr; takes 2x as much input and output */ void prf_addrx2(unsigned char *out0, unsigned char *out1, const spx_ctx *ctx, const uint32_t addrx2[2*8]) { /* As we write and read only a few quadwords, it is more efficient to * build and extract from the fourway SHAKE256 state by hand. */ uint64_t state[50] = {0}; for (int i = 0; i < SPX_N/8; i++) { uint64_t x = load64(ctx->pub_seed + 8*i); state[2*i] = x; state[2*i+1] = x; } for (int i = 0; i < 4; i++) { state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32) | (uint64_t)addrx2[2*i]; state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32) | (uint64_t)addrx2[8+2*i]; } for (int i = 0; i < SPX_N/8; i++) { uint64_t x = load64(ctx->sk_seed + 8*i); state[2*(SPX_N/8+i+4)] = x; state[2*(SPX_N/8+i+4)+1] = x; } /* SHAKE domain separator and padding. */ state[2*(SPX_N/4+4)] = 0x1f; state[2*(SPX_N/4+4)+1] = 0x1f; state[2*16] = 0x80ULL << 56; state[2*16+1] = 0x80ULL << 56; f1600x2(state); for (int i = 0; i < SPX_N/8; i++) { store64(out0 + 8*i, state[2*i]); store64(out1 + 8*i, state[2*i+1]); } } ================================================ FILE: shake-a64/hashx2.h ================================================ #ifndef SPX_HASHX2_H #define SPX_HASHX2_H #include #include "context.h" #include "params.h" #define prf_addrx2 SPX_NAMESPACE(prf_addrx2) void prf_addrx2(unsigned char *out0, unsigned char *out1, const spx_ctx *ctx, const uint32_t addrx2[2*8]); #endif ================================================ FILE: shake-a64/merkle.c ================================================ #include #include #include "utils.h" #include "utilsx2.h" #include "wots.h" #include "wotsx2.h" #include "merkle.h" #include "address.h" #include "params.h" /* * This generates a Merkle signature (WOTS signature followed by the Merkle * authentication path). */ void merkle_sign(uint8_t *sig, unsigned char *root, const spx_ctx* ctx, uint32_t wots_addr[8], uint32_t tree_addr[8], uint32_t idx_leaf) { unsigned char *auth_path = sig + SPX_WOTS_BYTES; uint32_t tree_addrx2[2*8] = { 0 }; int j; struct leaf_info_x2 info = { 0 }; unsigned steps[ SPX_WOTS_LEN ]; info.wots_sig = sig; chain_lengths(steps, root); info.wots_steps = steps; for (j=0; j<2; j++) { set_type(&tree_addrx2[8*j], SPX_ADDR_TYPE_HASHTREE); set_type(&info.leaf_addr[8*j], SPX_ADDR_TYPE_WOTS); set_type(&info.pk_addr[8*j], SPX_ADDR_TYPE_WOTSPK); copy_subtree_addr(&tree_addrx2[8*j], tree_addr); copy_subtree_addr(&info.leaf_addr[8*j], wots_addr); copy_subtree_addr(&info.pk_addr[8*j], wots_addr); } info.wots_sign_leaf = idx_leaf; treehashx2(root, auth_path, ctx, idx_leaf, 0, SPX_TREE_HEIGHT, wots_gen_leafx2, tree_addrx2, &info); } /* Compute root node of the top-most subtree. */ void merkle_gen_root(unsigned char *root, const spx_ctx *ctx) { /* We do not need the auth path in key generation, but it simplifies the code to have just one treehash routine that computes both root and path in one function. */ unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES]; uint32_t top_tree_addr[8] = {0}; uint32_t wots_addr[8] = {0}; set_layer_addr(top_tree_addr, SPX_D - 1); set_layer_addr(wots_addr, SPX_D - 1); merkle_sign(auth_path, root, ctx, wots_addr, top_tree_addr, ~0 /* ~0 means "don't bother generating an auth path */ ); } ================================================ FILE: shake-a64/test/benchmark.c ================================================ #define _POSIX_C_SOURCE 199309L #include #include #include #include "../thash.h" #include "../thashx2.h" #include "../api.h" #include "../f1600x2.h" #include "../fors.h" #include "../wots.h" #include "../wotsx2.h" #include "../params.h" #include "../randombytes.h" #include "cycles.h" #define SPX_MLEN 32 #define NTESTS 10 static void wots_gen_pkx2(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]); static int cmp_llu(const void *a, const void*b) { if(*(unsigned long long *)a < *(unsigned long long *)b) return -1; if(*(unsigned long long *)a > *(unsigned long long *)b) return 1; return 0; } static unsigned long long median(unsigned long long *l, size_t llen) { qsort(l,llen,sizeof(unsigned long long),cmp_llu); if(llen%2) return l[llen/2]; else return (l[llen/2-1]+l[llen/2])/2; } static void delta(unsigned long long *l, size_t llen) { unsigned int i; for(i = 0; i < llen - 1; i++) { l[i] = l[i+1] - l[i]; } } static void printfcomma (unsigned long long n) { if (n < 1000) { printf("%llu", n); return; } printfcomma(n / 1000); printf (",%03llu", n % 1000); } static void printfalignedcomma (unsigned long long n, int len) { unsigned long long ncopy = n; int i = 0; while (ncopy > 9) { len -= 1; ncopy /= 10; i += 1; // to account for commas } i = i/3 - 1; // to account for commas for (; i < len; i++) { printf(" "); } printfcomma(n); } static void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul) { unsigned long long med; result /= NTESTS; delta(l, NTESTS + 1); med = median(l, llen); printf("avg. %11.2lf us (%2.2lf sec); median ", result, result / 1e6); printfalignedcomma(med, 12); printf(" cycles, %5llux: ", mul); printfalignedcomma(mul*med, 12); printf(" cycles\n"); } #define MEASURE_GENERIC(TEXT, MUL, FNCALL, CORR)\ printf(TEXT);\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\ for(i = 0; i < NTESTS; i++) {\ t[i] = cpucycles() / CORR;\ FNCALL;\ }\ t[NTESTS] = cpucycles();\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\ result = ((stop.tv_sec - start.tv_sec) * 1e6 + \ (stop.tv_nsec - start.tv_nsec) / 1e3) / (double)CORR;\ display_result(result, t, NTESTS, MUL); #define MEASURT(TEXT, MUL, FNCALL)\ MEASURE_GENERIC(\ TEXT, MUL,\ do {\ for (int j = 0; j < 1000; j++) {\ FNCALL;\ }\ } while (0);,\ 1000); #define MEASURE(TEXT, MUL, FNCALL) MEASURE_GENERIC(TEXT, MUL, FNCALL, 1) int main(void) { init_cpucycles(); /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); spx_ctx ctx; unsigned char pk[SPX_PK_BYTES]; unsigned char sk[SPX_SK_BYTES]; unsigned char *m = malloc(SPX_MLEN); unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN); unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN); unsigned char fors_pk[SPX_FORS_PK_BYTES]; unsigned char fors_m[SPX_FORS_MSG_BYTES]; unsigned char fors_sig[SPX_FORS_BYTES]; unsigned char addr[SPX_ADDR_BYTES*2]; unsigned char wots_pk[4*SPX_WOTS_PK_BYTES]; unsigned char block[SPX_N]; unsigned long long smlen; unsigned long long mlen; unsigned long long t[NTESTS+1]; struct timespec start, stop; double result; int i; uint64_t statex2[50]; randombytes(m, SPX_MLEN); randombytes(addr, SPX_ADDR_BYTES*2); printf("Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\n", SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES, SPX_WOTS_W); printf("Running %d iterations.\n", NTESTS); MEASURT("thash ", 1, thash(block, block, 1, &ctx, (uint32_t*)addr)); MEASURT("f1600x2 ", 1, f1600x2(statex2)); MEASURT("thashx2 ", 1, thashx2(block, block, block, block, 1, &ctx, (uint32_t*)addr)); MEASURE("Generating keypair.. ", 1, crypto_sign_keypair(pk, sk)); MEASURE(" - WOTS pk gen 2x.. ", (1 << SPX_TREE_HEIGHT) / 2, wots_gen_pkx2(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Signing.. ", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk)); MEASURE(" - FORS signing.. ", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr)); MEASURE(" - WOTS pk gen x2.. ", SPX_D * (1 << SPX_TREE_HEIGHT) / 2, wots_gen_pkx2(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Verifying.. ", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk)); printf("Signature size: %d (%.2f KiB)\n", SPX_BYTES, SPX_BYTES / 1024.0); printf("Public key size: %d (%.2f KiB)\n", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0); printf("Secret key size: %d (%.2f KiB)\n", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0); free(m); free(sm); free(mout); return 0; } static void wots_gen_pkx2(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) { struct leaf_info_x2 leaf; unsigned steps[ SPX_WOTS_LEN ] = { 0 }; INITIALIZE_LEAF_INFO_X2(leaf, addr, steps); wots_gen_leafx2(pk, ctx, 0, &leaf); } ================================================ FILE: shake-a64/test/thashx2.c ================================================ #include #include #include "../thashx2.h" #include "../thash.h" #include "../randombytes.h" #include "../params.h" int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); unsigned char input[2*SPX_N]; unsigned char output[2*SPX_N]; unsigned char out2[2*SPX_N]; uint32_t addr[2*8] = {0}; unsigned int j; spx_ctx ctx; randombytes(ctx.pub_seed, SPX_N); randombytes(input, 4*SPX_N); randombytes((unsigned char *)addr, 2 * 8 * sizeof(uint32_t)); printf("Testing if thash matches thashx2.. "); for (j = 0; j < 2; j++) { thash(out2 + j * SPX_N, input + j * SPX_N, 1, &ctx, addr + j*8); } thashx2(output + 0*SPX_N, output + 1*SPX_N, input + 0*SPX_N, input + 1*SPX_N, 1, &ctx, addr); if (memcmp(out2, output, 2 * SPX_N)) { printf("failed!\n"); return -1; } printf("successful.\n"); return 0; } ================================================ FILE: shake-a64/thash.h ================================================ #ifndef SPX_THASHX2_AS_ONE #define SPX_THASHX2_AS_ONE #include #include "context.h" void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]); #endif ================================================ FILE: shake-a64/thash_shake_robustx2.c ================================================ #include #include #include "thash.h" #include "thashx2.h" #include "address.h" #include "params.h" #include "utils.h" #include "f1600x2.h" #include "fips202x2.h" void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { uint32_t addrx2 [2*8] = { addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7], addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7] }; thashx2(out, out, in, in, inblocks, ctx, addrx2); } /** * 2-way parallel version of thash; takes 2x as much input and output */ void thashx2(unsigned char *out0, unsigned char *out1, const unsigned char *in0, const unsigned char *in1, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx2[2*8]) { if (inblocks == 1 || inblocks == 2) { /* As we write and read only a few quadwords, it is more efficient to * build and extract from the twoway SHAKE256 state by hand. */ uint64_t state[50] = {0}; uint64_t state2[50]; for (int i = 0; i < SPX_N/8; i++) { uint64_t x = load64(ctx->pub_seed + 8*i); state[2*i] = x; state[2*i+1] = x; } for (int i = 0; i < 4; i++) { state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32) | (uint64_t)addrx2[2*i]; state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32) | (uint64_t)addrx2[8+2*i]; } /* Domain separator and padding. */ state[2*16] = 0x80ULL << 56; state[2*16+1] = 0x80ULL << 56; state[2*((SPX_N/8)+4)] ^= 0x1f; state[2*((SPX_N/8)+4)+1] ^= 0x1f; /* We will permutate state2 with f1600x2 to compute the bitmask, * but first we'll copy it to state2 which will be used to compute * the final output, as its input is almost identical. */ memcpy(state2, state, 400); f1600x2(state); /* By copying from state, state2 already contains the pub_seed * and address. We just need to copy in the input blocks xorred with * the bitmask we just computed. */ for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) { state2[2*(SPX_N/8+4+i)] = state[2*i] ^ load64(in0 + 8*i); state2[2*(SPX_N/8+4+i)+1] = state[2*i+1] ^ load64(in1 + 8*i); } /* Domain separator and start of padding. Note that the quadwords * around are already zeroed for state from which we copied. * We do a XOR instead of a set as this might be the 16th quadword * when N=32 and inblocks=2, which already contains the end * of the padding. */ state2[2*((SPX_N/8)*(1+inblocks)+4)] ^= 0x1f; state2[2*((SPX_N/8)*(1+inblocks)+4)+1] ^= 0x1f; f1600x2(state2); for (int i = 0; i < SPX_N/8; i++) { store64(out0 + 8*i, state2[2*i]); store64(out1 + 8*i, state2[2*i+1]); } } else { SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N); SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N); SPX_VLA(unsigned char, bitmask0, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask1, inblocks * SPX_N); unsigned int i; memcpy(buf0, ctx->pub_seed, SPX_N); memcpy(buf1, ctx->pub_seed, SPX_N); memcpy(buf0 + SPX_N, addrx2 + 0*8, SPX_ADDR_BYTES); memcpy(buf1 + SPX_N, addrx2 + 1*8, SPX_ADDR_BYTES); shake256x2(bitmask0, bitmask1, inblocks * SPX_N, buf0, buf1, SPX_N + SPX_ADDR_BYTES); for (i = 0; i < inblocks * SPX_N; i++) { buf0[SPX_N + SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; buf1[SPX_N + SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; } shake256x2(out0, out1, SPX_N, buf0, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); } } ================================================ FILE: shake-a64/thash_shake_simplex2.c ================================================ #include #include #include "thash.h" #include "thashx2.h" #include "address.h" #include "params.h" #include "utils.h" #include "f1600x2.h" #include "fips202x2.h" void thash(unsigned char *out, const unsigned char *in, unsigned int inblocks, const spx_ctx *ctx, uint32_t addr[8]) { uint32_t addrx2 [2*8] = { addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7], addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7] }; thashx2(out, out, in, in, inblocks, ctx, addrx2); } /** * 2-way parallel version of thash; takes 2x as much input and output */ void thashx2(unsigned char *out0, unsigned char *out1, const unsigned char *in0, const unsigned char *in1, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx2[2*8]) { if (inblocks == 1 || inblocks == 2) { /* As we write and read only a few quadwords, it is more efficient to * build and extract from the twoway SHAKE256 state by hand. */ uint64_t state[50] = {0}; for (int i = 0; i < SPX_N/8; i++) { uint64_t x = load64(ctx->pub_seed + 8*i); state[2*i] = x; state[2*i+1] = x; } for (int i = 0; i < 4; i++) { state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32) | (uint64_t)addrx2[2*i]; state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32) | (uint64_t)addrx2[8+2*i]; } for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) { state[2*(SPX_N/8+4+i)] = load64(in0+8*i); state[2*(SPX_N/8+4+i)+1] = load64(in1+8*i); } /* Domain separator and padding. */ state[2*16] = 0x80ULL << 56; state[2*16+1] = 0x80ULL << 56; state[2*((SPX_N/8)*(1+inblocks)+4)] ^= 0x1f; state[2*((SPX_N/8)*(1+inblocks)+4)+1] ^= 0x1f; f1600x2(state); for (int i = 0; i < SPX_N/8; i++) { store64(out0 + 8*i, state[2*i]); store64(out1 + 8*i, state[2*i+1]); } } else { SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N); SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N); memcpy(buf0, ctx->pub_seed, SPX_N); memcpy(buf1, ctx->pub_seed, SPX_N); memcpy(buf0 + SPX_N, addrx2 + 0*8, SPX_ADDR_BYTES); memcpy(buf1 + SPX_N, addrx2 + 1*8, SPX_ADDR_BYTES); memcpy(buf0 + SPX_N + SPX_ADDR_BYTES, in0, inblocks * SPX_N); memcpy(buf1 + SPX_N + SPX_ADDR_BYTES, in1, inblocks * SPX_N); shake256x2(out0, out1, SPX_N, buf0, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); } } ================================================ FILE: shake-a64/thashx2.h ================================================ #ifndef SPX_THASHX2_H #define SPX_THASHX2_H #include #include "context.h" #include "params.h" #define thashx2 SPX_NAMESPACE(thashx2) void thashx2(unsigned char *out0, unsigned char *out1, const unsigned char *in0, const unsigned char *in1, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx2[2*8]); #endif ================================================ FILE: shake-a64/utilsx2.c ================================================ #include #include "utils.h" #include "utilsx2.h" #include "params.h" #include "thashx2.h" #include "address.h" /* * Generate the entire Merkle tree, computing the authentication path for leaf_idx, * and the resulting root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE) * * This expects tree_addrx2 to be initialized to 2 parallel addr structures for * the Merkle tree nodes * * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This works by using the standard Merkle tree building algorithm, except * that each 'node' tracked is actually 2 consecutive nodes in the real tree. * When we combine two logical nodes AB and WX, we perform the H * operation on adjacent real nodes, forming the parent logical node * (AB)(WX) * * When we get to the top level of the real tree (where there is only * one logical node), we continue this operation one more time; the right * most real node will by the actual root (and the other node will be * garbage). We follow the same thashx2 logic so that the 'extract * authentication path components' part of the loop is still executed (and * to simplify the code somewhat) */ void treehashx2(unsigned char *root, unsigned char *auth_path, const spx_ctx *ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leafx2)( unsigned char* /* Where to write the leaves */, const spx_ctx*, uint32_t idx, void *info), uint32_t tree_addrx2[2*8], void *info) { /* This is where we keep the intermediate nodes */ SPX_VLA(unsigned char, stackx2, 2 * tree_height * SPX_N); uint32_t left_adj = 0, prev_left_adj = 0; /* When we're doing the top */ /* level, the left-most part of the tree isn't at the beginning */ /* of current[]. These give the offset of the actual start */ uint32_t idx; uint32_t max_idx = (1 << (tree_height-1)) - 1; for (idx = 0;; idx++) { unsigned char current[2*SPX_N]; /* Current logical node */ gen_leafx2( current, ctx, 2*idx + idx_offset, info ); /* Now combine the freshly generated right node with previously */ /* generated left ones */ uint32_t internal_idx_offset = idx_offset; uint32_t internal_idx = idx; uint32_t internal_leaf = leaf_idx; uint32_t h; /* The height we are in the Merkle tree */ for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) { /* Special processing if we're at the top of the tree */ if (h >= tree_height - 1) { if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[1*SPX_N], SPX_N ); return; } /* The tree indexing logic is a bit off in this case */ /* Adjust it so that the left-most node of the part of */ /* the tree that we're processing has index 0 */ prev_left_adj = left_adj; left_adj = 2 - (1 << (tree_height - h - 1)); } /* Check if we hit the top of the tree */ if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[1*SPX_N], SPX_N ); return; } /* * Check if one of the nodes we have is a part of the * authentication path; if it is, write it out */ if ((((internal_idx << 1) ^ internal_leaf) & ~0x1) == 0) { memcpy( &auth_path[ h * SPX_N ], ¤t[(((internal_leaf&1)^1) + prev_left_adj) * SPX_N], SPX_N ); } /* * Check if we're at a left child; if so, stop going up the stack * Exception: if we've reached the end of the tree, keep on going * (so we combine the last 2 nodes into the one root node in two * more iterations) */ if ((internal_idx & 1) == 0 && idx < max_idx) { break; } /* Ok, we're at a right node (or doing the top 3 levels) */ /* Now combine the left and right logical nodes together */ /* Set the address of the node we're creating. */ int j; internal_idx_offset >>= 1; for (j = 0; j < 2; j++) { set_tree_height(tree_addrx2 + j*8, h + 1); set_tree_index(tree_addrx2 + j*8, (2/2) * (internal_idx&~1) + j - left_adj + internal_idx_offset ); } unsigned char *left = &stackx2[h * 2 * SPX_N]; thashx2( ¤t[0 * SPX_N], ¤t[1 * SPX_N], &left [0 * SPX_N], ¤t[0 * SPX_N], 2, ctx, tree_addrx2); } /* We've hit a left child; save the current for when we get the */ /* corresponding right right */ memcpy( &stackx2[h * 2 * SPX_N], current, 2 * SPX_N); } } ================================================ FILE: shake-a64/utilsx2.h ================================================ #ifndef SPX_UTILSX2_H #define SPX_UTILSX2_H #include #include "params.h" /** * For a given leaf index, computes the authentication path and the resulting * root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE). * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This implementation uses SIMD to compute internal nodes 2 at a time (in * parallel) */ #define treehashx2 SPX_NAMESPACE(treehashx2) void treehashx2(unsigned char *root, unsigned char *auth_path, const spx_ctx *ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leafx2)( unsigned char* /* Where to write the leaves */, const spx_ctx* /* ctx */, uint32_t addr_idx, void *info), uint32_t tree_addrx2[2*8], void *info); #endif ================================================ FILE: shake-a64/wots.c ================================================ #include #include #include "utils.h" #include "utilsx2.h" #include "hash.h" #include "hashx2.h" #include "thashx2.h" #include "wots.h" #include "wotsx2.h" #include "address.h" #include "params.h" // TODO clarify address expectations, and make them more uniform. // TODO i.e. do we expect types to be set already? // TODO and do we expect modifications or copies? /** * Computes up the chains */ static void gen_chains( unsigned char *out, const unsigned char *in, unsigned int start[SPX_WOTS_LEN], unsigned int steps[SPX_WOTS_LEN], const spx_ctx *ctx, uint32_t addr[8]) { uint32_t i, j, k, idx, watching; int done; unsigned char empty[SPX_N]; unsigned char *bufs[4]; uint32_t addrs[8*2]; int l; uint16_t counts[SPX_WOTS_W] = { 0 }; uint16_t idxs[SPX_WOTS_LEN]; uint16_t total, newTotal; /* set addrs = {addr, addr} */ for (j = 0; j < 2; j++) { memcpy(addrs+j*8, addr, sizeof(uint32_t) * 8); } /* Initialize out with the value at position 'start'. */ memcpy(out, in, SPX_WOTS_LEN*SPX_N); /* Sort the chains in reverse order by steps using counting sort. */ for (i = 0; i < SPX_WOTS_LEN; i++) { counts[steps[i]]++; } total = 0; for (l = SPX_WOTS_W - 1; l >= 0; l--) { newTotal = counts[l] + total; counts[l] = total; total = newTotal; } for (i = 0; i < SPX_WOTS_LEN; i++) { idxs[counts[steps[i]]] = i; counts[steps[i]]++; } /* We got our work cut out for us: do it! */ for (i = 0; i < SPX_WOTS_LEN; i += 2) { for (j = 0; j < 2 && i+j < SPX_WOTS_LEN; j++) { idx = idxs[i+j]; set_chain_addr(addrs+j*8, idx); bufs[j] = out + SPX_N * idx; } /* As the chains are sorted in reverse order, we know that the first * chain is the longest and the last one is the shortest. We keep * an eye on whether the last chain is done and then on the one before, * et cetera. */ watching = 1; done = 0; while (i + watching >= SPX_WOTS_LEN) { bufs[watching] = &empty[0]; watching--; } for (k = 0;; k++) { while (k == steps[idxs[i+watching]]) { bufs[watching] = &empty[0]; if (watching == 0) { done = 1; break; } watching--; } if (done) { break; } for (j = 0; j < watching + 1; j++) { set_hash_addr(addrs+j*8, k + start[idxs[i+j]]); } thashx2(bufs[0], bufs[1], bufs[0], bufs[1], 1, ctx, addrs); } } } /** * base_w algorithm as described in draft. * Interprets an array of bytes as integers in base w. * This only works when log_w is a divisor of 8. */ static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { int in = 0; int out = 0; unsigned char total; int bits = 0; int consumed; for (consumed = 0; consumed < out_len; consumed++) { if (bits == 0) { total = input[in]; in++; bits += 8; } bits -= SPX_WOTS_LOGW; output[out] = (total >> bits) & (SPX_WOTS_W - 1); out++; } } /* Computes the WOTS+ checksum over a message (in base_w). */ static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { unsigned int csum = 0; unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8]; unsigned int i; /* Compute checksum. */ for (i = 0; i < SPX_WOTS_LEN1; i++) { csum += SPX_WOTS_W - 1 - msg_base_w[i]; } /* Convert checksum to base_w. */ /* Make sure expected empty zero bits are the least significant bits. */ csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8); ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes); } /* Takes a message and derives the matching chain lengths. */ void chain_lengths(unsigned int *lengths, const unsigned char *msg) { base_w(lengths, SPX_WOTS_LEN1, msg); wots_checksum(lengths + SPX_WOTS_LEN1, lengths); } /** * Takes a WOTS signature and an n-byte message, computes a WOTS public key. * * Writes the computed public key to 'pk'. */ void wots_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *msg, const spx_ctx *ctx, uint32_t addr[8]) { unsigned int steps[SPX_WOTS_LEN]; unsigned int start[SPX_WOTS_LEN]; uint32_t i; chain_lengths(start, msg); for (i = 0; i < SPX_WOTS_LEN; i++) { steps[i] = SPX_WOTS_W - 1 - start[i]; } gen_chains(pk, sig, start, steps, ctx, addr); } /* * This generates 2 sequential WOTS public keys * It also generates the WOTS signature if leaf_info indicates * that we're signing with one of these WOTS keys */ void wots_gen_leafx2(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info) { struct leaf_info_x2 *info = v_info; uint32_t *leaf_addr = info->leaf_addr; uint32_t *pk_addr = info->pk_addr; unsigned int i, j, k; unsigned char pk_buffer[ 2 * SPX_WOTS_BYTES ]; unsigned wots_offset = SPX_WOTS_BYTES; unsigned char *buffer; uint32_t wots_k_mask; unsigned wots_sign_index; if (((leaf_idx ^ info->wots_sign_leaf) & ~1) == 0) { /* We're traversing the leaf that's signing; generate the WOTS */ /* signature */ wots_k_mask = 0; wots_sign_index = info->wots_sign_leaf & 1; /* Which of of the 2 */ /* slots do the signatures come from */ } else { /* Nope, we're just generating pk's; turn off the signature logic */ wots_k_mask = ~0; wots_sign_index = 0; } for (j = 0; j < 2; j++) { set_keypair_addr( leaf_addr + j*8, leaf_idx + j ); set_keypair_addr( pk_addr + j*8, leaf_idx + j ); } for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) { uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k to */ /* the step if we're generating a signature, ~0 if we're not */ /* Start with the secret seed */ for (j = 0; j < 2; j++) { set_chain_addr(leaf_addr + j*8, i); set_hash_addr(leaf_addr + j*8, 0); set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTSPRF); } prf_addrx2(buffer + 0*wots_offset, buffer + 1*wots_offset, ctx, leaf_addr); for (j = 0; j < 2; j++) { set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTS); } /* Iterate down the WOTS chain */ for (k=0;; k++) { /* Check if one of the values we have needs to be saved as a */ /* part of the WOTS signature */ if (k == wots_k) { memcpy( info->wots_sig + i * SPX_N, buffer + wots_sign_index*wots_offset, SPX_N ); } /* Check if we hit the top of the chain */ if (k == SPX_WOTS_W - 1) break; /* Iterate one step on all 4 chains */ for (j = 0; j < 2; j++) { set_hash_addr(leaf_addr + j*8, k); } thashx2(buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 0*wots_offset, buffer + 1*wots_offset, 1, ctx, leaf_addr); } } /* Do the final thash to generate the public keys */ thashx2(dest + 0*SPX_N, dest + 1*SPX_N, pk_buffer + 0*wots_offset, pk_buffer + 1*wots_offset, SPX_WOTS_LEN, ctx, pk_addr); } ================================================ FILE: shake-a64/wotsx2.h ================================================ #if !defined( WOTSX2_H_ ) #define WOTSX2_H_ #include #include "params.h" /* * This is here to provide an interface to the internal wots_gen_leafx2 * routine. While this routine is not referenced in the package outside of * wots.c, it is called from the stand-alone benchmark code to characterize * the performance */ struct leaf_info_x2 { unsigned char *wots_sig; uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */ uint32_t *wots_steps; uint32_t leaf_addr[2*8]; uint32_t pk_addr[2*8]; }; /* Macro to set the leaf_info to something 'benign', that is, it would */ /* run with the same time as it does during the real signing process */ /* Used only by the benchmark code */ #define INITIALIZE_LEAF_INFO_X2(info, addr, step_buffer) { \ info.wots_sig = 0; \ info.wots_sign_leaf = ~0; \ info.wots_steps = step_buffer; \ int i; \ for (i=0; i<2; i++) { \ memcpy( &info.leaf_addr[8*i], addr, 32 ); \ memcpy( &info.pk_addr[8*i], addr, 32 ); \ } \ } #define wots_gen_leafx2 SPX_NAMESPACE(wots_gen_leafx2) void wots_gen_leafx2(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info); #endif /* WOTSX2_H_ */ ================================================ FILE: shake-avx2/.gitignore ================================================ test/* !test/*.c PQCsignKAT_*.rsp PQCsignKAT_*.req PQCgenKAT_sign keccak4x/KeccakP-1600-times4-SIMD256.o ================================================ FILE: shake-avx2/Makefile ================================================ PARAMS = sphincs-shake-128f THASH = robust CC = /usr/bin/gcc CFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -march=native -fomit-frame-pointer -flto -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS) SOURCES = hash_shake.c hash_shakex4.c thash_shake_$(THASH).c thash_shake_$(THASH)x4.c address.c randombytes.c merkle.c wots.c utils.c utilsx4.c fors.c sign.c fips202.c fips202x4.c keccak4x/KeccakP-1600-times4-SIMD256.o HEADERS = params.h hash.h hashx4.h thash.h thashx4.h address.h randombytes.h merkle.h wots.h utils.h utilsx4.h fors.h api.h fips202.h fips202x4.h DET_SOURCES = $(SOURCES:randombytes.%=rng.%) DET_HEADERS = $(HEADERS:randombytes.%=rng.%) TESTS = test/fors \ test/spx \ test/thashx4 \ BENCHMARK = test/benchmark .PHONY: clean test benchmark default: PQCgenKAT_sign all: PQCgenKAT_sign tests benchmarks tests: $(TESTS) test: $(TESTS:=.exec) benchmarks: $(BENCHMARK) benchmark: $(BENCHMARK:=.exec) PQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS) $(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto test/%: test/%.c $(SOURCES) $(HEADERS) $(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS) test/%.exec: test/% @$< keccak4x/KeccakP-1600-times4-SIMD256.o: keccak4x/align.h \ keccak4x/brg_endian.h \ keccak4x/KeccakP-1600-times4-SIMD256.c \ keccak4x/KeccakP-1600-times4-SnP.h \ keccak4x/KeccakP-1600-unrolling.macros \ keccak4x/SIMD256-config.h $(CC) $(CFLAGS) -c keccak4x/KeccakP-1600-times4-SIMD256.c -o $@ clean: -$(RM) keccak4x/KeccakP-1600-times4-SIMD256.o -$(RM) $(TESTS) -$(RM) $(BENCHMARK) -$(RM) PQCgenKAT_sign -$(RM) PQCsignKAT_*.rsp -$(RM) PQCsignKAT_*.req ================================================ FILE: shake-avx2/context.h ================================================ #ifndef SPX_CONTEXT_H #define SPX_CONTEXT_H #include #include "params.h" typedef struct { uint8_t pub_seed[SPX_N]; uint8_t sk_seed[SPX_N]; } spx_ctx; #endif ================================================ FILE: shake-avx2/fips202x4.c ================================================ #include #include #include #include "fips202.h" #include "fips202x4.h" #define NROUNDS 24 #define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) static uint64_t load64(const unsigned char *x) { unsigned long long r = 0, i; for (i = 0; i < 8; ++i) { r |= (unsigned long long)x[i] << 8 * i; } return r; } static void store64(uint8_t *x, uint64_t u) { unsigned int i; for(i=0; i<8; ++i) { x[i] = u; u >>= 8; } } /* Use implementation from the Keccak Code Package */ extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); #define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds static void keccak_absorb4x(__m256i *s, unsigned int r, const unsigned char *m0, const unsigned char *m1, const unsigned char *m2, const unsigned char *m3, unsigned long long int mlen, unsigned char p) { unsigned long long i; unsigned char t0[200]; unsigned char t1[200]; unsigned char t2[200]; unsigned char t3[200]; unsigned long long *ss = (unsigned long long *)s; while (mlen >= r) { for (i = 0; i < r / 8; ++i) { ss[4*i+0] ^= load64(m0 + 8 * i); ss[4*i+1] ^= load64(m1 + 8 * i); ss[4*i+2] ^= load64(m2 + 8 * i); ss[4*i+3] ^= load64(m3 + 8 * i); } KeccakF1600_StatePermute4x(s); mlen -= r; m0 += r; m1 += r; m2 += r; m3 += r; } for (i = 0; i < r; ++i) { t0[i] = 0; t1[i] = 0; t2[i] = 0; t3[i] = 0; } for (i = 0; i < mlen; ++i) { t0[i] = m0[i]; t1[i] = m1[i]; t2[i] = m2[i]; t3[i] = m3[i]; } t0[i] = p; t1[i] = p; t2[i] = p; t3[i] = p; t0[r - 1] |= 128; t1[r - 1] |= 128; t2[r - 1] |= 128; t3[r - 1] |= 128; for (i = 0; i < r / 8; ++i) { ss[4*i+0] ^= load64(t0 + 8 * i); ss[4*i+1] ^= load64(t1 + 8 * i); ss[4*i+2] ^= load64(t2 + 8 * i); ss[4*i+3] ^= load64(t3 + 8 * i); } } static void keccak_squeezeblocks4x(unsigned char *h0, unsigned char *h1, unsigned char *h2, unsigned char *h3, unsigned long long int nblocks, __m256i *s, unsigned int r) { unsigned int i; unsigned long long *ss = (unsigned long long *)s; while(nblocks > 0) { KeccakF1600_StatePermute4x(s); for(i=0;i<(r>>3);i++) { store64(h0+8*i, ss[4*i+0]); store64(h1+8*i, ss[4*i+1]); store64(h2+8*i, ss[4*i+2]); store64(h3+8*i, ss[4*i+3]); } h0 += r; h1 += r; h2 += r; h3 += r; nblocks--; } } void shake128x4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned long long outlen, unsigned char *in0, unsigned char *in1, unsigned char *in2, unsigned char *in3, unsigned long long inlen) { __m256i s[25]; unsigned char t0[SHAKE128_RATE]; unsigned char t1[SHAKE128_RATE]; unsigned char t2[SHAKE128_RATE]; unsigned char t3[SHAKE128_RATE]; unsigned int i; /* zero state */ for(i=0;i<25;i++) s[i] = _mm256_xor_si256(s[i], s[i]); /* absorb 4 message of identical length in parallel */ keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); /* Squeeze output */ keccak_squeezeblocks4x(out0, out1, out2, out3, outlen/SHAKE128_RATE, s, SHAKE128_RATE); out0 += (outlen/SHAKE128_RATE)*SHAKE128_RATE; out1 += (outlen/SHAKE128_RATE)*SHAKE128_RATE; out2 += (outlen/SHAKE128_RATE)*SHAKE128_RATE; out3 += (outlen/SHAKE128_RATE)*SHAKE128_RATE; if(outlen%SHAKE128_RATE) { keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); for(i=0;i void shake128x4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned long long outlen, unsigned char *in0, unsigned char *in1, unsigned char *in2, unsigned char *in3, unsigned long long inlen); void shake256x4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, unsigned long long outlen, unsigned char *in0, unsigned char *in1, unsigned char *in2, unsigned char *in3, unsigned long long inlen); #endif ================================================ FILE: shake-avx2/fors.c ================================================ #include #include #include #include "fors.h" #include "utils.h" #include "utilsx4.h" #include "hash.h" #include "hashx4.h" #include "thash.h" #include "thashx4.h" #include "address.h" static void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { prf_addr(sk, ctx, fors_leaf_addr); } static void fors_gen_skx4(unsigned char *sk0, unsigned char *sk1, unsigned char *sk2, unsigned char *sk3, const spx_ctx *ctx, uint32_t fors_leaf_addrx4[4*8]) { prf_addrx4(sk0, sk1, sk2, sk3, ctx, fors_leaf_addrx4); } static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, const spx_ctx *ctx, uint32_t fors_leaf_addr[8]) { thash(leaf, sk, 1, ctx, fors_leaf_addr); } static void fors_sk_to_leafx4(unsigned char *leaf0, unsigned char *leaf1, unsigned char *leaf2, unsigned char *leaf3, const unsigned char *sk0, const unsigned char *sk1, const unsigned char *sk2, const unsigned char *sk3, const spx_ctx *ctx, uint32_t fors_leaf_addrx4[4*8]) { thashx4(leaf0, leaf1, leaf2, leaf3, sk0, sk1, sk2, sk3, 1, ctx, fors_leaf_addrx4); } struct fors_gen_leaf_info { uint32_t leaf_addrx[4*8]; }; static void fors_gen_leafx4(unsigned char *leaf, const spx_ctx *ctx, uint32_t addr_idx, void *info) { struct fors_gen_leaf_info *fors_info = info; uint32_t *fors_leaf_addrx4 = fors_info->leaf_addrx; unsigned int j; /* Only set the parts that the caller doesn't set */ for (j = 0; j < 4; j++) { set_tree_index(fors_leaf_addrx4 + j*8, addr_idx + j); set_type(fors_leaf_addrx4 + j*8, SPX_ADDR_TYPE_FORSPRF); } fors_gen_skx4(leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 2*SPX_N, leaf + 3*SPX_N, ctx, fors_leaf_addrx4); for (j = 0; j < 4; j++) { set_type(fors_leaf_addrx4 + j*8, SPX_ADDR_TYPE_FORSTREE); } fors_sk_to_leafx4(leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 2*SPX_N, leaf + 3*SPX_N, leaf + 0*SPX_N, leaf + 1*SPX_N, leaf + 2*SPX_N, leaf + 3*SPX_N, ctx, fors_leaf_addrx4); } /** * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. * Assumes indices has space for SPX_FORS_TREES integers. */ static void message_to_indices(uint32_t *indices, const unsigned char *m) { unsigned int i, j; unsigned int offset = 0; for (i = 0; i < SPX_FORS_TREES; i++) { indices[i] = 0; for (j = 0; j < SPX_FORS_HEIGHT; j++) { indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; offset++; } } } /** * Signs a message m, deriving the secret key from sk_seed and the FTS address. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_sign(unsigned char *sig, unsigned char *pk, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; uint32_t fors_tree_addr[4*8] = {0}; struct fors_gen_leaf_info fors_info = {0}; uint32_t *fors_leaf_addr = fors_info.leaf_addrx; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; for (i=0; i<4; i++) { copy_keypair_addr(fors_tree_addr + 8*i, fors_addr); set_type(fors_tree_addr + 8*i, SPX_ADDR_TYPE_FORSTREE); copy_keypair_addr(fors_leaf_addr + 8*i, fors_addr); } copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Include the secret key part that produces the selected leaf node. */ set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF); fors_gen_sk(sig, ctx, fors_tree_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); sig += SPX_N; /* Compute the authentication path for this leaf node. */ treehashx4(roots + i*SPX_N, sig, ctx, indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx4, fors_tree_addr, &fors_info); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } /** * Derives the FORS public key from a signature. * This can be used for verification by comparing to a known public key, or to * subsequently verify a signature on the derived public key. The latter is the * typical use-case when used as an FTS below an OTS in a hypertree. * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits. */ void fors_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *m, const spx_ctx *ctx, const uint32_t fors_addr[8]) { uint32_t indices[SPX_FORS_TREES]; unsigned char roots[SPX_FORS_TREES * SPX_N]; unsigned char leaf[SPX_N]; uint32_t fors_tree_addr[8] = {0}; uint32_t fors_pk_addr[8] = {0}; uint32_t idx_offset; unsigned int i; copy_keypair_addr(fors_tree_addr, fors_addr); copy_keypair_addr(fors_pk_addr, fors_addr); set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE); set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK); message_to_indices(indices, m); for (i = 0; i < SPX_FORS_TREES; i++) { idx_offset = i * (1 << SPX_FORS_HEIGHT); set_tree_height(fors_tree_addr, 0); set_tree_index(fors_tree_addr, indices[i] + idx_offset); /* Derive the leaf from the included secret key part. */ fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr); sig += SPX_N; /* Derive the corresponding root node of this tree. */ compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset, sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr); sig += SPX_N * SPX_FORS_HEIGHT; } /* Hash horizontally across all tree roots to derive the public key. */ thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr); } ================================================ FILE: shake-avx2/hash_shakex4.c ================================================ #include #include #include "address.h" #include "params.h" #include "fips202x4.h" #include "hashx4.h" extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); /* * 4-way parallel version of prf_addr; takes 4x as much input and output */ void prf_addrx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const spx_ctx *ctx, const uint32_t addrx4[4*8]) { /* As we write and read only a few quadwords, it is more efficient to * build and extract from the fourway SHAKE256 state by hand. */ __m256i state[25]; for (int i = 0; i < SPX_N/8; i++) { state[i] = _mm256_set1_epi64x(((int64_t*)ctx->pub_seed)[i]); } for (int i = 0; i < 4; i++) { state[SPX_N/8+i] = _mm256_set_epi32( addrx4[3*8+1+2*i], addrx4[3*8+2*i], addrx4[2*8+1+2*i], addrx4[2*8+2*i], addrx4[8+1+2*i], addrx4[8+2*i], addrx4[1+2*i], addrx4[2*i] ); } for (int i = 0; i < SPX_N/8; i++) { state[SPX_N/8+i+4] = _mm256_set1_epi64x(((int64_t*)ctx->sk_seed)[i]); } /* SHAKE domain separator and padding. */ state[SPX_N/4+4] = _mm256_set1_epi64x(0x1f); for (int i = SPX_N/4+5; i < 16; i++) { state[i] = _mm256_set1_epi64x(0); } // shift unsigned and then cast to avoid UB state[16] = _mm256_set1_epi64x((long long)(0x80ULL << 56)); for (int i = 17; i < 25; i++) { state[i] = _mm256_set1_epi64x(0); } KeccakP1600times4_PermuteAll_24rounds(&state[0]); for (int i = 0; i < SPX_N/8; i++) { ((int64_t*)out0)[i] = _mm256_extract_epi64(state[i], 0); ((int64_t*)out1)[i] = _mm256_extract_epi64(state[i], 1); ((int64_t*)out2)[i] = _mm256_extract_epi64(state[i], 2); ((int64_t*)out3)[i] = _mm256_extract_epi64(state[i], 3); } } ================================================ FILE: shake-avx2/hashx4.h ================================================ #ifndef SPX_HASHX4_H #define SPX_HASHX4_H #include #include "context.h" #include "params.h" #define prf_addrx4 SPX_NAMESPACE(prf_addrx4) void prf_addrx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const spx_ctx *ctx, const uint32_t addrx4[4*8]); #endif ================================================ FILE: shake-avx2/keccak4x/KeccakP-1600-times4-SIMD256.c ================================================ /* Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". For more information, feedback or questions, please refer to our websites: http://keccak.noekeon.org/ http://keyak.noekeon.org/ http://ketje.noekeon.org/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ */ #include #include #include #include #include #include #include #include "align.h" #include "KeccakP-1600-times4-SnP.h" #include "SIMD256-config.h" #include "brg_endian.h" #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) #error Expecting a little-endian platform #endif typedef unsigned char UINT8; typedef unsigned long long int UINT64; typedef __m128i V128; typedef __m256i V256; #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex) #if defined(KeccakP1600times4_useAVX2) #define ANDnu256(a, b) _mm256_andnot_si256(a, b) #define CONST256(a) _mm256_load_si256((const V256 *)&(a)) #define CONST256_64(a) (V256)_mm256_broadcast_sd((const double*)(&a)) #define LOAD256(a) _mm256_load_si256((const V256 *)&(a)) #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a)) #define LOAD4_64(a, b, c, d) _mm256_set_epi64x((UINT64)(a), (UINT64)(b), (UINT64)(c), (UINT64)(d)) #define ROL64in256(d, a, o) d = _mm256_or_si256(_mm256_slli_epi64(a, o), _mm256_srli_epi64(a, 64-(o))) #define ROL64in256_8(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho8)) #define ROL64in256_56(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho56)) static const UINT64 rho8[4] = {0x0605040302010007, 0x0E0D0C0B0A09080F, 0x1615141312111017, 0x1E1D1C1B1A19181F}; static const UINT64 rho56[4] = {0x0007060504030201, 0x080F0E0D0C0B0A09, 0x1017161514131211, 0x181F1E1D1C1B1A19}; #define STORE256(a, b) _mm256_store_si256((V256 *)&(a), b) #define STORE256u(a, b) _mm256_storeu_si256((V256 *)&(a), b) #define STORE2_128(ah, al, v) _mm256_storeu2_m128d((V128*)&(ah), (V128*)&(al), v) #define XOR256(a, b) _mm256_xor_si256(a, b) #define XOReq256(a, b) a = _mm256_xor_si256(a, b) #define UNPACKL( a, b ) _mm256_unpacklo_epi64((a), (b)) #define UNPACKH( a, b ) _mm256_unpackhi_epi64((a), (b)) #define PERM128( a, b, c ) (V256)_mm256_permute2f128_ps((__m256)(a), (__m256)(b), c) #define SHUFFLE64( a, b, c ) (V256)_mm256_shuffle_pd((__m256d)(a), (__m256d)(b), c) #define UNINTLEAVE() lanesL01 = UNPACKL( lanes0, lanes1 ), \ lanesH01 = UNPACKH( lanes0, lanes1 ), \ lanesL23 = UNPACKL( lanes2, lanes3 ), \ lanesH23 = UNPACKH( lanes2, lanes3 ), \ lanes0 = PERM128( lanesL01, lanesL23, 0x20 ), \ lanes2 = PERM128( lanesL01, lanesL23, 0x31 ), \ lanes1 = PERM128( lanesH01, lanesH23, 0x20 ), \ lanes3 = PERM128( lanesH01, lanesH23, 0x31 ) #define INTLEAVE() lanesL01 = PERM128( lanes0, lanes2, 0x20 ), \ lanesH01 = PERM128( lanes1, lanes3, 0x20 ), \ lanesL23 = PERM128( lanes0, lanes2, 0x31 ), \ lanesH23 = PERM128( lanes1, lanes3, 0x31 ), \ lanes0 = SHUFFLE64( lanesL01, lanesH01, 0x00 ), \ lanes1 = SHUFFLE64( lanesL01, lanesH01, 0x0F ), \ lanes2 = SHUFFLE64( lanesL23, lanesH23, 0x00 ), \ lanes3 = SHUFFLE64( lanesL23, lanesH23, 0x0F ) #endif #define SnP_laneLengthInBytes 8 void KeccakP1600times4_InitializeAll(void *states) { memset(states, 0, KeccakP1600times4_statesSizeInBytes); } void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length) { unsigned int sizeLeft = length; unsigned int lanePosition = offset/SnP_laneLengthInBytes; unsigned int offsetInLane = offset%SnP_laneLengthInBytes; const unsigned char *curData = data; UINT64 *statesAsLanes = (UINT64 *)states; if ((sizeLeft > 0) && (offsetInLane != 0)) { unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; UINT64 lane = 0; if (bytesInLane > sizeLeft) bytesInLane = sizeLeft; memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane); statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane; sizeLeft -= bytesInLane; lanePosition++; curData += bytesInLane; } while(sizeLeft >= SnP_laneLengthInBytes) { UINT64 lane = *((const UINT64*)curData); statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane; sizeLeft -= SnP_laneLengthInBytes; lanePosition++; curData += SnP_laneLengthInBytes; } if (sizeLeft > 0) { UINT64 lane = 0; memcpy(&lane, curData, sizeLeft); statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane; } } void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) { V256 *stateAsLanes = (V256 *)states; unsigned int i; const UINT64 *curData0 = (const UINT64 *)data; const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes); const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; #define Xor_In( argIndex ) XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\ lanes1 = LOAD256u( curData1[argIndex]),\ lanes2 = LOAD256u( curData2[argIndex]),\ lanes3 = LOAD256u( curData3[argIndex]),\ INTLEAVE(),\ XOReq256( stateAsLanes[argIndex+0], lanes0 ),\ XOReq256( stateAsLanes[argIndex+1], lanes1 ),\ XOReq256( stateAsLanes[argIndex+2], lanes2 ),\ XOReq256( stateAsLanes[argIndex+3], lanes3 ) if ( laneCount >= 16 ) { Xor_In4( 0 ); Xor_In4( 4 ); Xor_In4( 8 ); Xor_In4( 12 ); if ( laneCount >= 20 ) { Xor_In4( 16 ); for(i=20; i 0) && (offsetInLane != 0)) { unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; if (bytesInLane > sizeLeft) bytesInLane = sizeLeft; memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane); sizeLeft -= bytesInLane; lanePosition++; curData += bytesInLane; } while(sizeLeft >= SnP_laneLengthInBytes) { UINT64 lane = *((const UINT64*)curData); statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane; sizeLeft -= SnP_laneLengthInBytes; lanePosition++; curData += SnP_laneLengthInBytes; } if (sizeLeft > 0) { memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft); } } void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) { V256 *stateAsLanes = (V256 *)states; unsigned int i; const UINT64 *curData0 = (const UINT64 *)data; const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes); const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; #define OverWr( argIndex ) STORE256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) #define OverWr4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\ lanes1 = LOAD256u( curData1[argIndex]),\ lanes2 = LOAD256u( curData2[argIndex]),\ lanes3 = LOAD256u( curData3[argIndex]),\ INTLEAVE(),\ STORE256( stateAsLanes[argIndex+0], lanes0 ),\ STORE256( stateAsLanes[argIndex+1], lanes1 ),\ STORE256( stateAsLanes[argIndex+2], lanes2 ),\ STORE256( stateAsLanes[argIndex+3], lanes3 ) if ( laneCount >= 16 ) { OverWr4( 0 ); OverWr4( 4 ); OverWr4( 8 ); OverWr4( 12 ); if ( laneCount >= 20 ) { OverWr4( 16 ); for(i=20; i= SnP_laneLengthInBytes) { statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0; sizeLeft -= SnP_laneLengthInBytes; lanePosition++; } if (sizeLeft > 0) { memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft); } } void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length) { unsigned int sizeLeft = length; unsigned int lanePosition = offset/SnP_laneLengthInBytes; unsigned int offsetInLane = offset%SnP_laneLengthInBytes; unsigned char *curData = data; const UINT64 *statesAsLanes = (const UINT64 *)states; if ((sizeLeft > 0) && (offsetInLane != 0)) { unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; if (bytesInLane > sizeLeft) bytesInLane = sizeLeft; memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane); sizeLeft -= bytesInLane; lanePosition++; curData += bytesInLane; } while(sizeLeft >= SnP_laneLengthInBytes) { *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)]; sizeLeft -= SnP_laneLengthInBytes; lanePosition++; curData += SnP_laneLengthInBytes; } if (sizeLeft > 0) { memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft); } } void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) { UINT64 *curData0 = (UINT64 *)data; UINT64 *curData1 = (UINT64 *)(data+laneOffset*1*SnP_laneLengthInBytes); UINT64 *curData2 = (UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); UINT64 *curData3 = (UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); const V256 *stateAsLanes = (const V256 *)states; const UINT64 *stateAsLanes64 = (const UINT64*)states; V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; unsigned int i; #define Extr( argIndex ) curData0[argIndex] = stateAsLanes64[4*(argIndex)], \ curData1[argIndex] = stateAsLanes64[4*(argIndex)+1], \ curData2[argIndex] = stateAsLanes64[4*(argIndex)+2], \ curData3[argIndex] = stateAsLanes64[4*(argIndex)+3] #define Extr4( argIndex ) lanes0 = LOAD256( stateAsLanes[argIndex+0] ), \ lanes1 = LOAD256( stateAsLanes[argIndex+1] ), \ lanes2 = LOAD256( stateAsLanes[argIndex+2] ), \ lanes3 = LOAD256( stateAsLanes[argIndex+3] ), \ UNINTLEAVE(), \ STORE256u( curData0[argIndex], lanes0 ), \ STORE256u( curData1[argIndex], lanes1 ), \ STORE256u( curData2[argIndex], lanes2 ), \ STORE256u( curData3[argIndex], lanes3 ) if ( laneCount >= 16 ) { Extr4( 0 ); Extr4( 4 ); Extr4( 8 ); Extr4( 12 ); if ( laneCount >= 20 ) { Extr4( 16 ); for(i=20; i 0) && (offsetInLane != 0)) { unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane); if (bytesInLane > sizeLeft) bytesInLane = sizeLeft; sizeLeft -= bytesInLane; do { *(curOutput++) = *(curInput++) ^ (unsigned char)lane; lane >>= 8; } while ( --bytesInLane != 0); lanePosition++; } while(sizeLeft >= SnP_laneLengthInBytes) { *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)]; sizeLeft -= SnP_laneLengthInBytes; lanePosition++; curInput += SnP_laneLengthInBytes; curOutput += SnP_laneLengthInBytes; } if (sizeLeft != 0) { UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)]; do { *(curOutput++) = *(curInput++) ^ (unsigned char)lane; lane >>= 8; } while ( --sizeLeft != 0); } } void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset) { const UINT64 *curInput0 = (UINT64 *)input; const UINT64 *curInput1 = (UINT64 *)(input+laneOffset*1*SnP_laneLengthInBytes); const UINT64 *curInput2 = (UINT64 *)(input+laneOffset*2*SnP_laneLengthInBytes); const UINT64 *curInput3 = (UINT64 *)(input+laneOffset*3*SnP_laneLengthInBytes); UINT64 *curOutput0 = (UINT64 *)output; UINT64 *curOutput1 = (UINT64 *)(output+laneOffset*1*SnP_laneLengthInBytes); UINT64 *curOutput2 = (UINT64 *)(output+laneOffset*2*SnP_laneLengthInBytes); UINT64 *curOutput3 = (UINT64 *)(output+laneOffset*3*SnP_laneLengthInBytes); const V256 *stateAsLanes = (const V256 *)states; const UINT64 *stateAsLanes64 = (const UINT64*)states; V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; unsigned int i; #define ExtrXor( argIndex ) \ curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes64[4*(argIndex)],\ curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes64[4*(argIndex)+1],\ curOutput2[argIndex] = curInput2[argIndex] ^ stateAsLanes64[4*(argIndex)+2],\ curOutput3[argIndex] = curInput3[argIndex] ^ stateAsLanes64[4*(argIndex)+3] #define ExtrXor4( argIndex ) \ lanes0 = LOAD256( stateAsLanes[argIndex+0] ),\ lanes1 = LOAD256( stateAsLanes[argIndex+1] ),\ lanes2 = LOAD256( stateAsLanes[argIndex+2] ),\ lanes3 = LOAD256( stateAsLanes[argIndex+3] ),\ UNINTLEAVE(),\ lanesL01 = LOAD256u( curInput0[argIndex]),\ lanesH01 = LOAD256u( curInput1[argIndex]),\ lanesL23 = LOAD256u( curInput2[argIndex]),\ lanesH23 = LOAD256u( curInput3[argIndex]),\ XOReq256( lanes0, lanesL01 ),\ XOReq256( lanes1, lanesH01 ),\ XOReq256( lanes2, lanesL23 ),\ XOReq256( lanes3, lanesH23 ),\ STORE256u( curOutput0[argIndex], lanes0 ),\ STORE256u( curOutput1[argIndex], lanes1 ),\ STORE256u( curOutput2[argIndex], lanes2 ),\ STORE256u( curOutput3[argIndex], lanes3 ) if ( laneCount >= 16 ) { ExtrXor4( 0 ); ExtrXor4( 4 ); ExtrXor4( 8 ); ExtrXor4( 12 ); if ( laneCount >= 20 ) { ExtrXor4( 16 ); for(i=20; i= (laneOffsetParallel*3 + laneCount)*8) { V256 *stateAsLanes = (V256 *)states; V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; #define Xor_In( argIndex ) \ XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) #define Xor_In4( argIndex ) \ lanes0 = LOAD256u( curData0[argIndex]),\ lanes1 = LOAD256u( curData1[argIndex]),\ lanes2 = LOAD256u( curData2[argIndex]),\ lanes3 = LOAD256u( curData3[argIndex]),\ INTLEAVE(),\ XOReq256( stateAsLanes[argIndex+0], lanes0 ),\ XOReq256( stateAsLanes[argIndex+1], lanes1 ),\ XOReq256( stateAsLanes[argIndex+2], lanes2 ),\ XOReq256( stateAsLanes[argIndex+3], lanes3 ) Xor_In4( 0 ); Xor_In4( 4 ); Xor_In4( 8 ); Xor_In4( 12 ); Xor_In4( 16 ); Xor_In( 20 ); #undef Xor_In #undef Xor_In4 KeccakP1600times4_PermuteAll_24rounds(states); curData0 += laneOffsetSerial; curData1 += laneOffsetSerial; curData2 += laneOffsetSerial; curData3 += laneOffsetSerial; dataByteLen -= laneOffsetSerial*8; } return (const unsigned char *)curData0 - dataStart; #else // unsigned int i; const unsigned char *dataStart = data; const UINT64 *curData0 = (const UINT64 *)data; const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); V256 *statesAsLanes = (V256 *)states; declareABCDE copyFromState(A, statesAsLanes) while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { #define XOR_In( Xxx, argIndex ) \ XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) XOR_In( Aba, 0 ); XOR_In( Abe, 1 ); XOR_In( Abi, 2 ); XOR_In( Abo, 3 ); XOR_In( Abu, 4 ); XOR_In( Aga, 5 ); XOR_In( Age, 6 ); XOR_In( Agi, 7 ); XOR_In( Ago, 8 ); XOR_In( Agu, 9 ); XOR_In( Aka, 10 ); XOR_In( Ake, 11 ); XOR_In( Aki, 12 ); XOR_In( Ako, 13 ); XOR_In( Aku, 14 ); XOR_In( Ama, 15 ); XOR_In( Ame, 16 ); XOR_In( Ami, 17 ); XOR_In( Amo, 18 ); XOR_In( Amu, 19 ); XOR_In( Asa, 20 ); #undef XOR_In rounds24 curData0 += laneOffsetSerial; curData1 += laneOffsetSerial; curData2 += laneOffsetSerial; curData3 += laneOffsetSerial; dataByteLen -= laneOffsetSerial*8; } copyToState(statesAsLanes, A) return (const unsigned char *)curData0 - dataStart; #endif } else { // unsigned int i; const unsigned char *dataStart = data; while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel); KeccakP1600times4_PermuteAll_24rounds(states); data += laneOffsetSerial*8; dataByteLen -= laneOffsetSerial*8; } return data - dataStart; } } size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen) { if (laneCount == 21) { #if 0 const unsigned char *dataStart = data; const UINT64 *curData0 = (const UINT64 *)data; const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { V256 *stateAsLanes = states; V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; #define Xor_In( argIndex ) \ XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) #define Xor_In4( argIndex ) \ lanes0 = LOAD256u( curData0[argIndex]),\ lanes1 = LOAD256u( curData1[argIndex]),\ lanes2 = LOAD256u( curData2[argIndex]),\ lanes3 = LOAD256u( curData3[argIndex]),\ INTLEAVE(),\ XOReq256( stateAsLanes[argIndex+0], lanes0 ),\ XOReq256( stateAsLanes[argIndex+1], lanes1 ),\ XOReq256( stateAsLanes[argIndex+2], lanes2 ),\ XOReq256( stateAsLanes[argIndex+3], lanes3 ) Xor_In4( 0 ); Xor_In4( 4 ); Xor_In4( 8 ); Xor_In4( 12 ); Xor_In4( 16 ); Xor_In( 20 ); #undef Xor_In #undef Xor_In4 KeccakP1600times4_PermuteAll_12rounds(states); curData0 += laneOffsetSerial; curData1 += laneOffsetSerial; curData2 += laneOffsetSerial; curData3 += laneOffsetSerial; dataByteLen -= laneOffsetSerial*8; } return (const unsigned char *)curData0 - dataStart; #else // unsigned int i; const unsigned char *dataStart = data; const UINT64 *curData0 = (const UINT64 *)data; const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); V256 *statesAsLanes = states; declareABCDE copyFromState(A, statesAsLanes) while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { #define XOR_In( Xxx, argIndex ) \ XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) XOR_In( Aba, 0 ); XOR_In( Abe, 1 ); XOR_In( Abi, 2 ); XOR_In( Abo, 3 ); XOR_In( Abu, 4 ); XOR_In( Aga, 5 ); XOR_In( Age, 6 ); XOR_In( Agi, 7 ); XOR_In( Ago, 8 ); XOR_In( Agu, 9 ); XOR_In( Aka, 10 ); XOR_In( Ake, 11 ); XOR_In( Aki, 12 ); XOR_In( Ako, 13 ); XOR_In( Aku, 14 ); XOR_In( Ama, 15 ); XOR_In( Ame, 16 ); XOR_In( Ami, 17 ); XOR_In( Amo, 18 ); XOR_In( Amu, 19 ); XOR_In( Asa, 20 ); #undef XOR_In rounds12 curData0 += laneOffsetSerial; curData1 += laneOffsetSerial; curData2 += laneOffsetSerial; curData3 += laneOffsetSerial; dataByteLen -= laneOffsetSerial*8; } copyToState(statesAsLanes, A) return (const unsigned char *)curData0 - dataStart; #endif } else { // unsigned int i; const unsigned char *dataStart = data; while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel); KeccakP1600times4_PermuteAll_12rounds(states); data += laneOffsetSerial*8; dataByteLen -= laneOffsetSerial*8; } return data - dataStart; } } ================================================ FILE: shake-avx2/keccak4x/KeccakP-1600-times4-SnP.h ================================================ /* Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". For more information, feedback or questions, please refer to our websites: http://keccak.noekeon.org/ http://keyak.noekeon.org/ http://ketje.noekeon.org/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ */ #ifndef _KeccakP_1600_times4_SnP_h_ #define _KeccakP_1600_times4_SnP_h_ /** For the documentation, see PlSnP-documentation.h. */ #include "SIMD256-config.h" #define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")" #define KeccakP1600times4_statesSizeInBytes 800 #define KeccakP1600times4_statesAlignment 32 #define KeccakF1600times4_FastLoop_supported #define KeccakP1600times4_12rounds_FastLoop_supported #include #define KeccakP1600times4_StaticInitialize() void KeccakP1600times4_InitializeAll(void *states); #define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \ ((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte) void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount); void KeccakP1600times4_PermuteAll_12rounds(void *states); void KeccakP1600times4_PermuteAll_24rounds(void *states); void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset); void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset); size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); #endif ================================================ FILE: shake-avx2/keccak4x/KeccakP-1600-unrolling.macros ================================================ /* Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". For more information, feedback or questions, please refer to our websites: http://keccak.noekeon.org/ http://keyak.noekeon.org/ http://ketje.noekeon.org/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ */ #if (defined(FullUnrolling)) #define rounds24 \ prepareTheta \ thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ thetaRhoPiChiIotaPrepareTheta(10, A, E) \ thetaRhoPiChiIotaPrepareTheta(11, E, A) \ thetaRhoPiChiIotaPrepareTheta(12, A, E) \ thetaRhoPiChiIotaPrepareTheta(13, E, A) \ thetaRhoPiChiIotaPrepareTheta(14, A, E) \ thetaRhoPiChiIotaPrepareTheta(15, E, A) \ thetaRhoPiChiIotaPrepareTheta(16, A, E) \ thetaRhoPiChiIotaPrepareTheta(17, E, A) \ thetaRhoPiChiIotaPrepareTheta(18, A, E) \ thetaRhoPiChiIotaPrepareTheta(19, E, A) \ thetaRhoPiChiIotaPrepareTheta(20, A, E) \ thetaRhoPiChiIotaPrepareTheta(21, E, A) \ thetaRhoPiChiIotaPrepareTheta(22, A, E) \ thetaRhoPiChiIota(23, E, A) \ #define rounds12 \ prepareTheta \ thetaRhoPiChiIotaPrepareTheta(12, A, E) \ thetaRhoPiChiIotaPrepareTheta(13, E, A) \ thetaRhoPiChiIotaPrepareTheta(14, A, E) \ thetaRhoPiChiIotaPrepareTheta(15, E, A) \ thetaRhoPiChiIotaPrepareTheta(16, A, E) \ thetaRhoPiChiIotaPrepareTheta(17, E, A) \ thetaRhoPiChiIotaPrepareTheta(18, A, E) \ thetaRhoPiChiIotaPrepareTheta(19, E, A) \ thetaRhoPiChiIotaPrepareTheta(20, A, E) \ thetaRhoPiChiIotaPrepareTheta(21, E, A) \ thetaRhoPiChiIotaPrepareTheta(22, A, E) \ thetaRhoPiChiIota(23, E, A) \ #elif (Unrolling == 12) #define rounds24 \ prepareTheta \ for(i=0; i<24; i+=12) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ } \ #define rounds12 \ prepareTheta \ thetaRhoPiChiIotaPrepareTheta(12, A, E) \ thetaRhoPiChiIotaPrepareTheta(13, E, A) \ thetaRhoPiChiIotaPrepareTheta(14, A, E) \ thetaRhoPiChiIotaPrepareTheta(15, E, A) \ thetaRhoPiChiIotaPrepareTheta(16, A, E) \ thetaRhoPiChiIotaPrepareTheta(17, E, A) \ thetaRhoPiChiIotaPrepareTheta(18, A, E) \ thetaRhoPiChiIotaPrepareTheta(19, E, A) \ thetaRhoPiChiIotaPrepareTheta(20, A, E) \ thetaRhoPiChiIotaPrepareTheta(21, E, A) \ thetaRhoPiChiIotaPrepareTheta(22, A, E) \ thetaRhoPiChiIota(23, E, A) \ #elif (Unrolling == 6) #define rounds24 \ prepareTheta \ for(i=0; i<24; i+=6) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ } \ #define rounds12 \ prepareTheta \ for(i=12; i<24; i+=6) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ } \ #elif (Unrolling == 4) #define rounds24 \ prepareTheta \ for(i=0; i<24; i+=4) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ } \ #define rounds12 \ prepareTheta \ for(i=12; i<24; i+=4) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ } \ #elif (Unrolling == 3) #define rounds24 \ prepareTheta \ for(i=0; i<24; i+=3) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ copyStateVariables(A, E) \ } \ #define rounds12 \ prepareTheta \ for(i=12; i<24; i+=3) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ copyStateVariables(A, E) \ } \ #elif (Unrolling == 2) #define rounds24 \ prepareTheta \ for(i=0; i<24; i+=2) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ } \ #define rounds12 \ prepareTheta \ for(i=12; i<24; i+=2) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ } \ #elif (Unrolling == 1) #define rounds24 \ prepareTheta \ for(i=0; i<24; i++) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ copyStateVariables(A, E) \ } \ #define rounds12 \ prepareTheta \ for(i=12; i<24; i++) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ copyStateVariables(A, E) \ } \ #else #error "Unrolling is not correctly specified!" #endif #define roundsN(__nrounds) \ prepareTheta \ i = 24 - (__nrounds); \ if ((i&1) != 0) { \ thetaRhoPiChiIotaPrepareTheta(i, A, E) \ copyStateVariables(A, E) \ ++i; \ } \ for( /* empty */; i<24; i+=2) { \ thetaRhoPiChiIotaPrepareTheta(i , A, E) \ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ } ================================================ FILE: shake-avx2/keccak4x/SIMD256-config.h ================================================ #define KeccakP1600times4_implementation_config "AVX2, all rounds unrolled" #define KeccakP1600times4_fullUnrolling #define KeccakP1600times4_useAVX2 ================================================ FILE: shake-avx2/keccak4x/align.h ================================================ /* Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". For more information, feedback or questions, please refer to our websites: http://keccak.noekeon.org/ http://keyak.noekeon.org/ http://ketje.noekeon.org/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ */ #ifndef _align_h_ #define _align_h_ /* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ #ifdef ALIGN #undef ALIGN #endif #if defined(__GNUC__) #define ALIGN(x) __attribute__ ((aligned(x))) #elif defined(_MSC_VER) #define ALIGN(x) __declspec(align(x)) #elif defined(__ARMCC_VERSION) #define ALIGN(x) __align(x) #else #define ALIGN(x) #endif #endif ================================================ FILE: shake-avx2/keccak4x/brg_endian.h ================================================ /* --------------------------------------------------------------------------- Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. LICENSE TERMS The redistribution and use of this software (with or without changes) is allowed without the payment of fees or royalties provided that: 1. source code distributions include the above copyright notice, this list of conditions and the following disclaimer; 2. binary distributions include the above copyright notice, this list of conditions and the following disclaimer in their documentation; 3. the name of the copyright holder is not used to endorse products built using this software without specific written permission. DISCLAIMER This software is provided 'as is' with no explicit or implied warranties in respect of its properties, including, but not limited to, correctness and/or fitness for purpose. --------------------------------------------------------------------------- Issue Date: 20/12/2007 Changes for ARM 9/9/2010 */ #ifndef _BRG_ENDIAN_H #define _BRG_ENDIAN_H #define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ #define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ #if 0 /* Include files where endian defines and byteswap functions may reside */ #if defined( __sun ) # include #elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) # include #elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) # include #elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) # if !defined( __MINGW32__ ) && !defined( _AIX ) # include # if !defined( __BEOS__ ) # include # endif # endif #endif #endif /* Now attempt to set the define for platform byte order using any */ /* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ /* seem to encompass most endian symbol definitions */ #if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) # if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN # elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN # endif #elif defined( BIG_ENDIAN ) # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #elif defined( LITTLE_ENDIAN ) # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN #endif #if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) # if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN # elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN # endif #elif defined( _BIG_ENDIAN ) # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #elif defined( _LITTLE_ENDIAN ) # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN #endif #if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) # if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN # elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN # endif #elif defined( __BIG_ENDIAN ) # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #elif defined( __LITTLE_ENDIAN ) # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN #endif #if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) # if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN # elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN # endif #elif defined( __BIG_ENDIAN__ ) # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #elif defined( __LITTLE_ENDIAN__ ) # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN #endif /* if the platform byte order could not be determined, then try to */ /* set this define using common machine defines */ #if !defined(PLATFORM_BYTE_ORDER) #if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ defined( vax ) || defined( vms ) || defined( VMS ) || \ defined( __VMS ) || defined( _M_X64 ) # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN #elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #elif defined(__arm__) # ifdef __BIG_ENDIAN # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN # else # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN # endif #elif 1 /* **** EDIT HERE IF NECESSARY **** */ # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN #elif 0 /* **** EDIT HERE IF NECESSARY **** */ # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #else # error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order #endif #endif #endif ================================================ FILE: shake-avx2/merkle.c ================================================ #include #include #include "utils.h" #include "utilsx4.h" #include "wots.h" #include "wotsx4.h" #include "merkle.h" #include "address.h" #include "params.h" /* * This generates a Merkle signature (WOTS signature followed by the Merkle * authentication path). */ void merkle_sign(uint8_t *sig, unsigned char *root, const spx_ctx* ctx, uint32_t wots_addr[8], uint32_t tree_addr[8], uint32_t idx_leaf) { unsigned char *auth_path = sig + SPX_WOTS_BYTES; uint32_t tree_addrx4[4*8] = { 0 }; int j; struct leaf_info_x4 info = { 0 }; unsigned steps[ SPX_WOTS_LEN ]; info.wots_sig = sig; chain_lengths(steps, root); info.wots_steps = steps; for (j=0; j<4; j++) { set_type(&tree_addrx4[8*j], SPX_ADDR_TYPE_HASHTREE); set_type(&info.leaf_addr[8*j], SPX_ADDR_TYPE_WOTS); set_type(&info.pk_addr[8*j], SPX_ADDR_TYPE_WOTSPK); copy_subtree_addr(&tree_addrx4[8*j], tree_addr); copy_subtree_addr(&info.leaf_addr[8*j], wots_addr); copy_subtree_addr(&info.pk_addr[8*j], wots_addr); } info.wots_sign_leaf = idx_leaf; treehashx4(root, auth_path, ctx, idx_leaf, 0, SPX_TREE_HEIGHT, wots_gen_leafx4, tree_addrx4, &info); } /* Compute root node of the top-most subtree. */ void merkle_gen_root(unsigned char *root, const spx_ctx *ctx) { /* We do not need the auth path in key generation, but it simplifies the code to have just one treehash routine that computes both root and path in one function. */ unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES]; uint32_t top_tree_addr[8] = {0}; uint32_t wots_addr[8] = {0}; set_layer_addr(top_tree_addr, SPX_D - 1); set_layer_addr(wots_addr, SPX_D - 1); merkle_sign(auth_path, root, ctx, wots_addr, top_tree_addr, ~0 /* ~0 means "don't bother generating an auth path */ ); } ================================================ FILE: shake-avx2/test/benchmark.c ================================================ #define _POSIX_C_SOURCE 199309L #include #include #include #include "../api.h" #include "../fors.h" #include "../wots.h" #include "../wotsx4.h" #include "../params.h" #include "../randombytes.h" #define SPX_MLEN 32 #define NTESTS 10 static void wots_gen_pkx4(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]); static int cmp_llu(const void *a, const void*b) { if(*(unsigned long long *)a < *(unsigned long long *)b) return -1; if(*(unsigned long long *)a > *(unsigned long long *)b) return 1; return 0; } static unsigned long long median(unsigned long long *l, size_t llen) { qsort(l,llen,sizeof(unsigned long long),cmp_llu); if(llen%2) return l[llen/2]; else return (l[llen/2-1]+l[llen/2])/2; } static void delta(unsigned long long *l, size_t llen) { unsigned int i; for(i = 0; i < llen - 1; i++) { l[i] = l[i+1] - l[i]; } } static unsigned long long cpucycles(void) { unsigned long long result; __asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (result) :: "%rdx"); return result; } static void printfcomma (unsigned long long n) { if (n < 1000) { printf("%llu", n); return; } printfcomma(n / 1000); printf (",%03llu", n % 1000); } static void printfalignedcomma (unsigned long long n, int len) { unsigned long long ncopy = n; int i = 0; while (ncopy > 9) { len -= 1; ncopy /= 10; i += 1; // to account for commas } i = i/3 - 1; // to account for commas for (; i < len; i++) { printf(" "); } printfcomma(n); } static void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul) { unsigned long long med; result /= NTESTS; delta(l, NTESTS + 1); med = median(l, llen); printf("avg. %11.2lf us (%2.2lf sec); median ", result, result / 1e6); printfalignedcomma(med, 12); printf(" cycles, %5llux: ", mul); printfalignedcomma(mul*med, 12); printf(" cycles\n"); } #define MEASURE(TEXT, MUL, FNCALL)\ printf(TEXT);\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\ for(i = 0; i < NTESTS; i++) {\ t[i] = cpucycles();\ FNCALL;\ }\ t[NTESTS] = cpucycles();\ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\ result = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) / 1e3;\ display_result(result, t, NTESTS, MUL); int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); spx_ctx ctx; unsigned char pk[SPX_PK_BYTES]; unsigned char sk[SPX_SK_BYTES]; unsigned char *m = malloc(SPX_MLEN); unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN); unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN); unsigned char fors_pk[SPX_FORS_PK_BYTES]; unsigned char fors_m[SPX_FORS_MSG_BYTES]; unsigned char fors_sig[SPX_FORS_BYTES]; unsigned char addr[SPX_ADDR_BYTES]; unsigned char wots_pk[4*SPX_WOTS_PK_BYTES]; unsigned long long smlen; unsigned long long mlen; unsigned long long t[NTESTS+1]; struct timespec start, stop; double result; int i; randombytes(m, SPX_MLEN); randombytes(addr, SPX_ADDR_BYTES); printf("Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\n", SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES, SPX_WOTS_W); printf("Running %d iterations.\n", NTESTS); MEASURE("Generating keypair.. ", 1, crypto_sign_keypair(pk, sk)); MEASURE(" - WOTS pk gen 4x.. ", (1 << SPX_TREE_HEIGHT) / 4, wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Signing.. ", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk)); MEASURE(" - FORS signing.. ", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr)); MEASURE(" - WOTS pk gen x4.. ", SPX_D * (1 << SPX_TREE_HEIGHT) / 4, wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr)); MEASURE("Verifying.. ", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk)); printf("Signature size: %d (%.2f KiB)\n", SPX_BYTES, SPX_BYTES / 1024.0); printf("Public key size: %d (%.2f KiB)\n", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0); printf("Secret key size: %d (%.2f KiB)\n", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0); free(m); free(sm); free(mout); return 0; } static void wots_gen_pkx4(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) { struct leaf_info_x4 leaf; unsigned steps[ SPX_WOTS_LEN ] = { 0 }; INITIALIZE_LEAF_INFO_X4(leaf, addr, steps); wots_gen_leafx4(pk, ctx, 0, &leaf); } ================================================ FILE: shake-avx2/test/thashx4.c ================================================ #include #include #include "../thashx4.h" #include "../thash.h" #include "../randombytes.h" #include "../params.h" int main(void) { /* Make stdout buffer more responsive. */ setbuf(stdout, NULL); unsigned char input[4*SPX_N]; unsigned char output[4*SPX_N]; unsigned char out4[4*SPX_N]; uint32_t addr[4*8] = {0}; unsigned int j; spx_ctx ctx; randombytes(ctx.pub_seed, SPX_N); randombytes(input, 4*SPX_N); randombytes((unsigned char *)addr, 4 * 8 * sizeof(uint32_t)); printf("Testing if thash matches thashx4.. "); for (j = 0; j < 4; j++) { thash(out4 + j * SPX_N, input + j * SPX_N, 1, &ctx, addr + j*8); } thashx4(output + 0*SPX_N, output + 1*SPX_N, output + 2*SPX_N, output + 3*SPX_N, input + 0*SPX_N, input + 1*SPX_N, input + 2*SPX_N, input + 3*SPX_N, 1, &ctx, addr); if (memcmp(out4, output, 4 * SPX_N)) { printf("failed!\n"); return -1; } printf("successful.\n"); return 0; } ================================================ FILE: shake-avx2/thash_shake_robustx4.c ================================================ #include #include #include "thashx4.h" #include "address.h" #include "params.h" #include "utils.h" #include "fips202x4.h" extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); /** * 4-way parallel version of thash; takes 4x as much input and output */ void thashx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx4[4*8]) { if (inblocks == 1 || inblocks == 2) { /* As we write and read only a few quadwords, it is more efficient to * build and extract from the fourway SHAKE256 state by hand. */ __m256i state[25]; for (int i = 0; i < SPX_N/8; i++) { state[i] = _mm256_set1_epi64x(((int64_t*)ctx->pub_seed)[i]); } for (int i = 0; i < 4; i++) { state[SPX_N/8+i] = _mm256_set_epi32( addrx4[3*8+1+2*i], addrx4[3*8+2*i], addrx4[2*8+1+2*i], addrx4[2*8+2*i], addrx4[8+1+2*i], addrx4[8+2*i], addrx4[1+2*i], addrx4[2*i] ); } /* SHAKE domain separator and padding */ state[SPX_N/8+4] = _mm256_set1_epi64x(0x1f); for (int i = SPX_N/8+5; i < 16; i++) { state[i] = _mm256_set1_epi64x(0); } state[16] = _mm256_set1_epi64x((long long)(0x80ULL << 56)); for (int i = 17; i < 25; i++) { state[i] = _mm256_set1_epi64x(0); } /* We will permutate state2 with f1600x4 to compute the bitmask, * but first we'll copy it to state2 which will be used to compute * the final output, as its input is alsmost identical. */ __m256i state2[25]; memcpy(state2, state, 800); KeccakP1600times4_PermuteAll_24rounds(&state[0]); /* By copying from state, state2 already contains the pub_seed * and addres. We just need to copy in the input blocks xorred with * the bitmask we just computed. */ for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) { state2[SPX_N/8+4+i] = _mm256_xor_si256( state[i], _mm256_set_epi64x( ((int64_t*)in3)[i], ((int64_t*)in2)[i], ((int64_t*)in1)[i], ((int64_t*)in0)[i] ) ); } /* Domain separator and start of padding. Note that the quadwords * around are already zeroed for state from which we copied. * We do a XOR instead of a set as this might be the 16th quadword * when N=32 and inblocks=2, which already contains the end * of the padding. */ state2[(SPX_N/8)*(1+inblocks)+4] = _mm256_xor_si256( state2[(SPX_N/8)*(1+inblocks)+4], _mm256_set1_epi64x(0x1f) ); KeccakP1600times4_PermuteAll_24rounds(&state2[0]); for (int i = 0; i < SPX_N/8; i++) { ((int64_t*)out0)[i] = _mm256_extract_epi64(state2[i], 0); ((int64_t*)out1)[i] = _mm256_extract_epi64(state2[i], 1); ((int64_t*)out2)[i] = _mm256_extract_epi64(state2[i], 2); ((int64_t*)out3)[i] = _mm256_extract_epi64(state2[i], 3); } } else { SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf2, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, bitmask0, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask1, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask2, inblocks * SPX_N); SPX_VLA(unsigned char, bitmask3, inblocks * SPX_N); unsigned int i; memcpy(buf0, ctx->pub_seed, SPX_N); memcpy(buf1, ctx->pub_seed, SPX_N); memcpy(buf2, ctx->pub_seed, SPX_N); memcpy(buf3, ctx->pub_seed, SPX_N); memcpy(buf0 + SPX_N, addrx4 + 0*8, SPX_ADDR_BYTES); memcpy(buf1 + SPX_N, addrx4 + 1*8, SPX_ADDR_BYTES); memcpy(buf2 + SPX_N, addrx4 + 2*8, SPX_ADDR_BYTES); memcpy(buf3 + SPX_N, addrx4 + 3*8, SPX_ADDR_BYTES); shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, inblocks * SPX_N, buf0, buf1, buf2, buf3, SPX_N + SPX_ADDR_BYTES); for (i = 0; i < inblocks * SPX_N; i++) { buf0[SPX_N + SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; buf1[SPX_N + SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; buf2[SPX_N + SPX_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; buf3[SPX_N + SPX_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; } shake256x4(out0, out1, out2, out3, SPX_N, buf0, buf1, buf2, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); } } ================================================ FILE: shake-avx2/thash_shake_simplex4.c ================================================ #include #include #include "thashx4.h" #include "address.h" #include "params.h" #include "utils.h" #include "fips202x4.h" extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); /** * 4-way parallel version of thash; takes 4x as much input and output */ void thashx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx4[4*8]) { if (inblocks == 1 || inblocks == 2) { /* As we write and read only a few quadwords, it is more efficient to * build and extract from the fourway SHAKE256 state by hand. */ __m256i state[25]; for (int i = 0; i < SPX_N/8; i++) { state[i] = _mm256_set1_epi64x(((int64_t*)ctx->pub_seed)[i]); } for (int i = 0; i < 4; i++) { state[SPX_N/8+i] = _mm256_set_epi32( addrx4[3*8+1+2*i], addrx4[3*8+2*i], addrx4[2*8+1+2*i], addrx4[2*8+2*i], addrx4[8+1+2*i], addrx4[8+2*i], addrx4[1+2*i], addrx4[2*i] ); } for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) { state[SPX_N/8+4+i] = _mm256_set_epi64x( ((int64_t*)in3)[i], ((int64_t*)in2)[i], ((int64_t*)in1)[i], ((int64_t*)in0)[i] ); } /* Domain separator and padding. */ for (int i = (SPX_N/8)*(1+inblocks)+4; i < 16; i++) { state[i] = _mm256_set1_epi64x(0); } state[16] = _mm256_set1_epi64x((long long)(0x80ULL << 56)); state[(SPX_N/8)*(1+inblocks)+4] = _mm256_xor_si256( state[(SPX_N/8)*(1+inblocks)+4], _mm256_set1_epi64x(0x1f) ); for (int i = 17; i < 25; i++) { state[i] = _mm256_set1_epi64x(0); } KeccakP1600times4_PermuteAll_24rounds(&state[0]); for (int i = 0; i < SPX_N/8; i++) { ((int64_t*)out0)[i] = _mm256_extract_epi64(state[i], 0); ((int64_t*)out1)[i] = _mm256_extract_epi64(state[i], 1); ((int64_t*)out2)[i] = _mm256_extract_epi64(state[i], 2); ((int64_t*)out3)[i] = _mm256_extract_epi64(state[i], 3); } } else { SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf2, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); SPX_VLA(unsigned char, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); memcpy(buf0, ctx->pub_seed, SPX_N); memcpy(buf1, ctx->pub_seed, SPX_N); memcpy(buf2, ctx->pub_seed, SPX_N); memcpy(buf3, ctx->pub_seed, SPX_N); memcpy(buf0 + SPX_N, addrx4 + 0*8, SPX_ADDR_BYTES); memcpy(buf1 + SPX_N, addrx4 + 1*8, SPX_ADDR_BYTES); memcpy(buf2 + SPX_N, addrx4 + 2*8, SPX_ADDR_BYTES); memcpy(buf3 + SPX_N, addrx4 + 3*8, SPX_ADDR_BYTES); memcpy(buf0 + SPX_N + SPX_ADDR_BYTES, in0, inblocks * SPX_N); memcpy(buf1 + SPX_N + SPX_ADDR_BYTES, in1, inblocks * SPX_N); memcpy(buf2 + SPX_N + SPX_ADDR_BYTES, in2, inblocks * SPX_N); memcpy(buf3 + SPX_N + SPX_ADDR_BYTES, in3, inblocks * SPX_N); shake256x4(out0, out1, out2, out3, SPX_N, buf0, buf1, buf2, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N); } } ================================================ FILE: shake-avx2/thashx4.h ================================================ #ifndef SPX_THASHX4_H #define SPX_THASHX4_H #include #include "context.h" #include "params.h" #define thashx4 SPX_NAMESPACE(thashx4) void thashx4(unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, const unsigned char *in3, unsigned int inblocks, const spx_ctx *ctx, uint32_t addrx4[4*8]); #endif ================================================ FILE: shake-avx2/utilsx4.c ================================================ #include #include "utils.h" #include "utilsx4.h" #include "params.h" #include "thashx4.h" #include "address.h" /* * Generate the entire Merkle tree, computing the authentication path for leaf_idx, * and the resulting root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE) * * This expects tree_addrx4 to be initialized to 4 parallel addr structures for * the Merkle tree nodes * * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This works by using the standard Merkle tree building algorithm, except * that each 'node' tracked is actually 4 consecutive nodes in the real tree. * When we combine two logical nodes ABCD and WXYZ, we perform the H * operation on adjacent real nodes, forming the parent logical node * (AB)(CD)(WX)(YZ) * * When we get to the top two levels of the real tree (where there is only * one logical node), we continue this operation two more times; the right * most real node will by the actual root (and the other 3 nodes will be * garbage). We follow the same thashx4 logic so that the 'extract * authentication path components' part of the loop is still executed (and * to simplify the code somewhat) * * This currently assumes tree_height >= 2; I suspect that doing an adjusting * idx, addr_idx on the gen_leafx4 call if tree_height < 2 would fix it; since * we don't actually use such short trees, I haven't bothered */ void treehashx4(unsigned char *root, unsigned char *auth_path, const spx_ctx *ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leafx4)( unsigned char* /* Where to write the leaves */, const spx_ctx*, uint32_t idx, void *info), uint32_t tree_addrx4[4*8], void *info) { /* This is where we keep the intermediate nodes */ SPX_VLA(unsigned char, stackx4, tree_height * 4 * SPX_N); uint32_t left_adj = 0, prev_left_adj = 0; /* When we're doing the top 3 */ /* levels, the left-most part of the tree isn't at the beginning */ /* of current[]. These give the offset of the actual start */ uint32_t idx; uint32_t max_idx = (1 << (tree_height-2)) - 1; for (idx = 0;; idx++) { unsigned char current[4*SPX_N]; /* Current logical node */ gen_leafx4( current, ctx, 4*idx + idx_offset, info ); /* Now combine the freshly generated right node with previously */ /* generated left ones */ uint32_t internal_idx_offset = idx_offset; uint32_t internal_idx = idx; uint32_t internal_leaf = leaf_idx; uint32_t h; /* The height we are in the Merkle tree */ for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) { /* Special processing if we're at the top of the tree */ if (h >= tree_height - 2) { if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[3*SPX_N], SPX_N ); return; } /* The tree indexing logic is a bit off in this case */ /* Adjust it so that the left-most node of the part of */ /* the tree that we're processing has index 0 */ prev_left_adj = left_adj; left_adj = 4 - (1 << (tree_height - h - 1)); } /* Check if we hit the top of the tree */ if (h == tree_height) { /* We hit the root; return it */ memcpy( root, ¤t[3*SPX_N], SPX_N ); return; } /* * Check if one of the nodes we have is a part of the * authentication path; if it is, write it out */ if ((((internal_idx << 2) ^ internal_leaf) & ~0x3) == 0) { memcpy( &auth_path[ h * SPX_N ], ¤t[(((internal_leaf&3)^1) + prev_left_adj) * SPX_N], SPX_N ); } /* * Check if we're at a left child; if so, stop going up the stack * Exception: if we've reached the end of the tree, keep on going * (so we combine the last 4 nodes into the one root node in two * more iterations) */ if ((internal_idx & 1) == 0 && idx < max_idx) { break; } /* Ok, we're at a right node (or doing the top 3 levels) */ /* Now combine the left and right logical nodes together */ /* Set the address of the node we're creating. */ int j; internal_idx_offset >>= 1; for (j = 0; j < 4; j++) { set_tree_height(tree_addrx4 + j*8, h + 1); set_tree_index(tree_addrx4 + j*8, (4/2) * (internal_idx&~1) + j - left_adj + internal_idx_offset ); } unsigned char *left = &stackx4[h * 4 * SPX_N]; thashx4( ¤t[0 * SPX_N], ¤t[1 * SPX_N], ¤t[2 * SPX_N], ¤t[3 * SPX_N], &left [0 * SPX_N], &left [2 * SPX_N], ¤t[0 * SPX_N], ¤t[2 * SPX_N], 2, ctx, tree_addrx4); } /* We've hit a left child; save the current for when we get the */ /* corresponding right right */ memcpy( &stackx4[h * 4 * SPX_N], current, 4 * SPX_N); } } ================================================ FILE: shake-avx2/utilsx4.h ================================================ #ifndef SPX_UTILSX4_H #define SPX_UTILSX4_H #include #include "params.h" /** * For a given leaf index, computes the authentication path and the resulting * root node using Merkle's TreeHash algorithm. * Expects the layer and tree parts of the tree_addr to be set, as well as the * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE). * Applies the offset idx_offset to indices before building addresses, so that * it is possible to continue counting indices across trees. * * This implementation uses AVX to compute internal nodes 4 at a time (in * parallel) */ #define treehashx4 SPX_NAMESPACE(treehashx4) void treehashx4(unsigned char *root, unsigned char *auth_path, const spx_ctx *ctx, uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, void (*gen_leafx4)( unsigned char* /* Where to write the leaves */, const spx_ctx* /* ctx */, uint32_t addr_idx, void *info), uint32_t tree_addrx4[4*8], void *info); #endif ================================================ FILE: shake-avx2/wots.c ================================================ #include #include #include "utils.h" #include "utilsx4.h" #include "hash.h" #include "hashx4.h" #include "thash.h" #include "thashx4.h" #include "wots.h" #include "wotsx4.h" #include "address.h" #include "params.h" // TODO clarify address expectations, and make them more uniform. // TODO i.e. do we expect types to be set already? // TODO and do we expect modifications or copies? /** * Computes up the chains */ static void gen_chains( unsigned char *out, const unsigned char *in, unsigned int start[SPX_WOTS_LEN], unsigned int steps[SPX_WOTS_LEN], const spx_ctx *ctx, uint32_t addr[8]) { uint32_t i, j, k, idx, watching; int done; unsigned char empty[SPX_N]; unsigned char *bufs[4]; uint32_t addrs[8*4]; int l; uint16_t counts[SPX_WOTS_W] = { 0 }; uint16_t idxs[SPX_WOTS_LEN]; uint16_t total, newTotal; /* set addrs = {addr, addr, addr, addr} */ for (j = 0; j < 4; j++) { memcpy(addrs+j*8, addr, sizeof(uint32_t) * 8); } /* Initialize out with the value at position 'start'. */ memcpy(out, in, SPX_WOTS_LEN*SPX_N); /* Sort the chains in reverse order by steps using counting sort. */ for (i = 0; i < SPX_WOTS_LEN; i++) { counts[steps[i]]++; } total = 0; for (l = SPX_WOTS_W - 1; l >= 0; l--) { newTotal = counts[l] + total; counts[l] = total; total = newTotal; } for (i = 0; i < SPX_WOTS_LEN; i++) { idxs[counts[steps[i]]] = i; counts[steps[i]]++; } /* We got our work cut out for us: do it! */ for (i = 0; i < SPX_WOTS_LEN; i += 4) { for (j = 0; j < 4 && i+j < SPX_WOTS_LEN; j++) { idx = idxs[i+j]; set_chain_addr(addrs+j*8, idx); bufs[j] = out + SPX_N * idx; } /* As the chains are sorted in reverse order, we know that the first * chain is the longest and the last one is the shortest. We keep * an eye on whether the last chain is done and then on the one before, * et cetera. */ watching = 3; done = 0; while (i + watching >= SPX_WOTS_LEN) { bufs[watching] = &empty[0]; watching--; } for (k = 0;; k++) { while (k == steps[idxs[i+watching]]) { bufs[watching] = &empty[0]; if (watching == 0) { done = 1; break; } watching--; } if (done) { break; } for (j = 0; j < watching + 1; j++) { set_hash_addr(addrs+j*8, k + start[idxs[i+j]]); } thashx4(bufs[0], bufs[1], bufs[2], bufs[3], bufs[0], bufs[1], bufs[2], bufs[3], 1, ctx, addrs); } } } /** * base_w algorithm as described in draft. * Interprets an array of bytes as integers in base w. * This only works when log_w is a divisor of 8. */ static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { int in = 0; int out = 0; unsigned char total; int bits = 0; int consumed; for (consumed = 0; consumed < out_len; consumed++) { if (bits == 0) { total = input[in]; in++; bits += 8; } bits -= SPX_WOTS_LOGW; output[out] = (total >> bits) & (SPX_WOTS_W - 1); out++; } } /* Computes the WOTS+ checksum over a message (in base_w). */ static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { unsigned int csum = 0; unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8]; unsigned int i; /* Compute checksum. */ for (i = 0; i < SPX_WOTS_LEN1; i++) { csum += SPX_WOTS_W - 1 - msg_base_w[i]; } /* Convert checksum to base_w. */ /* Make sure expected empty zero bits are the least significant bits. */ csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8); ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes); } /* Takes a message and derives the matching chain lengths. */ void chain_lengths(unsigned int *lengths, const unsigned char *msg) { base_w(lengths, SPX_WOTS_LEN1, msg); wots_checksum(lengths + SPX_WOTS_LEN1, lengths); } /** * Takes a WOTS signature and an n-byte message, computes a WOTS public key. * * Writes the computed public key to 'pk'. */ void wots_pk_from_sig(unsigned char *pk, const unsigned char *sig, const unsigned char *msg, const spx_ctx *ctx, uint32_t addr[8]) { unsigned int steps[SPX_WOTS_LEN]; unsigned int start[SPX_WOTS_LEN]; uint32_t i; chain_lengths(start, msg); for (i = 0; i < SPX_WOTS_LEN; i++) { steps[i] = SPX_WOTS_W - 1 - start[i]; } gen_chains(pk, sig, start, steps, ctx, addr); } /* * This generates 4 sequential WOTS public keys * It also generates the WOTS signature if leaf_info indicates * that we're signing with one of these WOTS keys */ void wots_gen_leafx4(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info) { struct leaf_info_x4 *info = v_info; uint32_t *leaf_addr = info->leaf_addr; uint32_t *pk_addr = info->pk_addr; unsigned int i, j, k; unsigned char pk_buffer[ 4 * SPX_WOTS_BYTES ]; unsigned wots_offset = SPX_WOTS_BYTES; unsigned char *buffer; uint32_t wots_k_mask; unsigned wots_sign_index; if (((leaf_idx ^ info->wots_sign_leaf) & ~3) == 0) { /* We're traversing the leaf that's signing; generate the WOTS */ /* signature */ wots_k_mask = 0; wots_sign_index = info->wots_sign_leaf & 3; /* Which of of the 4 */ /* 4 slots do the signatures come from */ } else { /* Nope, we're just generating pk's; turn off the signature logic */ wots_k_mask = (uint32_t)~0; wots_sign_index = 0; } for (j = 0; j < 4; j++) { set_keypair_addr( leaf_addr + j*8, leaf_idx + j ); set_keypair_addr( pk_addr + j*8, leaf_idx + j ); } for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) { uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k to */ /* the step if we're generating a signature, ~0 if we're not */ /* Start with the secret seed */ for (j = 0; j < 4; j++) { set_chain_addr(leaf_addr + j*8, i); set_hash_addr(leaf_addr + j*8, 0); set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTSPRF); } prf_addrx4(buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 2*wots_offset, buffer + 3*wots_offset, ctx, leaf_addr); for (j = 0; j < 4; j++) { set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTS); } /* Iterate down the WOTS chain */ for (k=0;; k++) { /* Check if one of the values we have needs to be saved as a */ /* part of the WOTS signature */ if (k == wots_k) { memcpy( info->wots_sig + i * SPX_N, buffer + wots_sign_index*wots_offset, SPX_N ); } /* Check if we hit the top of the chain */ if (k == SPX_WOTS_W - 1) break; /* Iterate one step on all 4 chains */ for (j = 0; j < 4; j++) { set_hash_addr(leaf_addr + j*8, k); } thashx4(buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 2*wots_offset, buffer + 3*wots_offset, buffer + 0*wots_offset, buffer + 1*wots_offset, buffer + 2*wots_offset, buffer + 3*wots_offset, 1, ctx, leaf_addr); } } /* Do the final thash to generate the public keys */ thashx4(dest + 0*SPX_N, dest + 1*SPX_N, dest + 2*SPX_N, dest + 3*SPX_N, pk_buffer + 0*wots_offset, pk_buffer + 1*wots_offset, pk_buffer + 2*wots_offset, pk_buffer + 3*wots_offset, SPX_WOTS_LEN, ctx, pk_addr); } ================================================ FILE: shake-avx2/wotsx4.h ================================================ #if !defined( WOTSX4_H_ ) #define WOTSX4_H_ #include #include "params.h" /* * This is here to provide an interface to the internal wots_gen_leafx4 * routine. While this routine is not referenced in the package outside of * wots.c, it is called from the stand-alone benchmark code to characterize * the performance */ struct leaf_info_x4 { unsigned char *wots_sig; uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */ uint32_t *wots_steps; uint32_t leaf_addr[4*8]; uint32_t pk_addr[4*8]; }; /* Macro to set the leaf_info to something 'benign', that is, it would */ /* run with the same time as it does during the real signing process */ /* Used only by the benchmark code */ #define INITIALIZE_LEAF_INFO_X4(info, addr, step_buffer) { \ info.wots_sig = 0; \ info.wots_sign_leaf = ~0; \ info.wots_steps = step_buffer; \ int i; \ for (i=0; i<4; i++) { \ memcpy( &info.leaf_addr[8*i], addr, 32 ); \ memcpy( &info.pk_addr[8*i], addr, 32 ); \ } \ } #define wots_gen_leafx4 SPX_NAMESPACE(wots_gen_leafx4) void wots_gen_leafx4(unsigned char *dest, const spx_ctx *ctx, uint32_t leaf_idx, void *v_info); #endif /* WOTSX4_H_ */ ================================================ FILE: vectors.py ================================================ #! /usr/bin/env python3 # Without arguments, generates sha256 sums of NIST KAT response files # for each of the instances (which should match SHA256SUMS.) # # With two arguments, checks whether the sha256 sum of the given # generated NIST KAT response file is correct, e.g.: # # ./vectors.py sphincs-shake-128s-simple shake-avx2 import multiprocessing import subprocess import itertools import tempfile import hashlib import shutil import os import sys fns = ['shake', 'sha2', 'haraka'] options = ["f", "s"] sizes = [128, 192, 256] thashes = ['robust', 'simple'] def nameFor(fn, opt, size, thash): return f"sphincs-{fn}-{size}{opt}-{thash}" def make(fn, opt, size, thash, bindir, impl): name = nameFor(fn, opt, size, thash) overrides = [f'PARAMS=sphincs-{fn}-{size}{opt}', 'THASH='+thash] sys.stderr.write(f"Compiling {name} …\n") sys.stderr.flush() subprocess.run(["make", "-C", impl, "clean"] + overrides, stdout=subprocess.DEVNULL, stderr=sys.stderr, check=True) subprocess.run(["make", '-j', "-C", impl, "PQCgenKAT_sign"] + overrides, stdout=subprocess.DEVNULL, stderr=sys.stderr, check=True) shutil.move( os.path.join(impl, 'PQCgenKAT_sign'), os.path.join(bindir, name), ) return (name, size) def run(name_size, bindir): name, size = name_size rsp = f'PQCsignKAT_{size//2}.rsp' req = f'PQCsignKAT_{size//2}.req' with tempfile.TemporaryDirectory() as rundir: sys.stderr.write(f"Running {name} …\n") sys.stderr.flush() subprocess.run([os.path.join(bindir, name)], stdout=subprocess.DEVNULL, stderr=sys.stderr, cwd=rundir, check=True) with open(os.path.join(rundir, rsp), 'rb') as f: h = hashlib.sha256(f.read()).hexdigest() return f"{h} {name}" def generate_sums(): with tempfile.TemporaryDirectory() as bindir: with multiprocessing.Pool() as pool: name_sizes = [] for fn in fns: for opt, size, thash in itertools.product(options, sizes, thashes): name_sizes.append(make(fn, opt, size, thash, bindir, 'ref')) res = pool.starmap(run, zip(name_sizes, [bindir]*len(name_sizes))) res.sort() print('\n'.join(res)) def check_sum(name, impl): line = None with tempfile.TemporaryDirectory() as bindir: for fn in fns: for opt, size, thash in itertools.product( options, sizes, thashes): if nameFor(fn, opt, size, thash) != name: continue name_size = make(fn, opt, size, thash, bindir, impl) line = run(name_size, bindir) break if not line: sys.stderr.write("No such instance\n") sys.exit(1) with open('SHA256SUMS', 'r') as f: if f.read().find(line + '\n') == -1: sys.stderr.write(f"Test vector mismatch: {line}\n") sys.exit(2) sys.stderr.write("ok\n") if __name__ == '__main__': if len(sys.argv) <= 1: generate_sums() elif len(sys.argv) == 3: check_sum(sys.argv[1], sys.argv[2]) else: sys.stderr.write("Expect two or no arguments\n") sys.exit(3)