[
  {
    "path": ".github/workflows/test-haraka-aesni.yml",
    "content": "name: Tests for haraka-aesni implementation\n\non:\n  - push\n  - pull_request\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        size:\n          - 128\n          - 192\n          - 256\n        option:\n          - s\n          - f\n        thash:\n          - simple\n          - robust\n    steps:\n      - uses: actions/checkout@v1\n      - name: Run make\n        run: |\n          make -C haraka-aesni THASH=${{ matrix.thash }} clean\n          make -C haraka-aesni THASH=${{ matrix.thash }} PARAMS=sphincs-haraka-${{ matrix.size }}${{ matrix.option }} tests\n          make -C haraka-aesni THASH=${{ matrix.thash }} PARAMS=sphincs-haraka-${{ matrix.size }}${{ matrix.option }} test\n          make -C haraka-aesni THASH=${{ matrix.thash }} PARAMS=sphincs-haraka-${{ matrix.size }}${{ matrix.option }} PQCgenKAT_sign\n      - name: Run PQCgenKAT_sign\n        run: python3 vectors.py sphincs-haraka-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} haraka-aesni\n\n#  vim: set ft=yaml ts=2 sw=2 et :\n"
  },
  {
    "path": ".github/workflows/test-ref.yml",
    "content": "name: Tests for ref implementation\n\non:\n  - push\n  - pull_request\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        hash:\n          - sha2\n          - shake\n          - haraka\n        size:\n          - 128\n          - 192\n          - 256\n        option:\n          - s\n          - f\n        thash:\n          - simple\n          - robust\n    steps:\n      - uses: actions/checkout@v1\n      - name: Run make\n        run: |\n          make -C ref HASH=${{ matrix.hash }} THASH=${{ matrix.thash }} clean\n          make -C ref HASH=${{ matrix.hash }} THASH=${{ matrix.thash }} PARAMS=sphincs-${{ matrix.hash }}-${{ matrix.size }}${{ matrix.option }} tests\n          make -C ref HASH=${{ matrix.hash }} THASH=${{ matrix.thash }} PARAMS=sphincs-${{ matrix.hash }}-${{ matrix.size }}${{ matrix.option }} test\n          make -C ref THASH=${{ matrix.thash }} PQCgenKAT_sign\n      - name: Run PQCgenKAT_sign\n        run: python3 vectors.py sphincs-${{ matrix.hash }}-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} ref\n\n#  vim: set ft=yaml ts=2 sw=2 et :\n"
  },
  {
    "path": ".github/workflows/test-sha2-avx2.yml",
    "content": "name: Tests for sha2-avx2 implementation\n\non:\n  - push\n  - pull_request\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        size:\n          - 128\n          - 192\n          - 256\n        option:\n          - s\n          - f\n        thash:\n          - simple\n          - robust\n    steps:\n      - uses: actions/checkout@v1\n      - name: Run make\n        run: |\n          make -C sha2-avx2 THASH=${{ matrix.thash }} clean\n          make -C sha2-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-sha2-${{ matrix.size }}${{ matrix.option }} tests\n          make -C sha2-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-sha2-${{ matrix.size }}${{ matrix.option }} test\n          make -C sha2-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-sha2-${{ matrix.size }}${{ matrix.option }} PQCgenKAT_sign\n      - name: Run PQCgenKAT_sign\n        run: python3 vectors.py sphincs-sha2-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} sha2-avx2\n\n#  vim: set ft=yaml ts=2 sw=2 et :\n"
  },
  {
    "path": ".github/workflows/test-shake-avx2.yml",
    "content": "name: Tests for shake-avx2 implementation\n\non:\n  - push\n  - pull_request\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        size:\n          - 128\n          - 192\n          - 256\n        option:\n          - s\n          - f\n        thash:\n          - simple\n          - robust\n    steps:\n      - uses: actions/checkout@v1\n      - name: Run make\n        run: |\n          make -C shake-avx2 THASH=${{ matrix.thash }} clean\n          make -C shake-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-shake-${{ matrix.size }}${{ matrix.option }} tests\n          make -C shake-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-shake-${{ matrix.size }}${{ matrix.option }} test\n          make -C shake-avx2 THASH=${{ matrix.thash }} PARAMS=sphincs-shake-${{ matrix.size }}${{ matrix.option }} PQCgenKAT_sign\n      - name: Run PQCgenKAT_sign\n        run: python3 vectors.py sphincs-shake-${{ matrix.size }}${{ matrix.option }}-${{ matrix.thash }} shake-avx2\n\n#  vim: set ft=yaml ts=2 sw=2 et :\n"
  },
  {
    "path": ".reuse/dep5",
    "content": "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: SPHINCS+\nUpstream-Contact: contact@sphincs.org\nSouce: https://github.com/sphincs/sphincsplus\n\nFiles: *\nLicense: LicenseRef-SPHINCS-PLUS-Public-Domain OR CC0-1.0 OR 0BSD OR MIT-0\n\nFiles: ref/haraka.c\nCopyright: 2016 Thomas Pornin <pornin@bolet.org> and SPHINCS+ team\nLicense: (LicenseRef-SPHINCS-PLUS-Public-Domain OR CC0-1.0 OR 0BSD OR MIT-0) AND MIT\n\nFiles: ref/PQCgenKAT_sign.c ref/rng.c ref/rng.h\nCopyright: 2017 Bassham, Lawrence E (Fed).\nLicense: All rights reserved.\n"
  },
  {
    "path": "LICENSE",
    "content": "SPDX-License-Identifier: (LicenseRef-SPHINCS-PLUS-Public-Domain OR CC0-1.0 OR 0BSD OR MIT-0) AND MIT\n\n"
  },
  {
    "path": "LICENSES/0BSD.txt",
    "content": "BSD Zero Clause License\n\nPermission to use, copy, modify, and/or distribute this software for\nany purpose with or without fee is hereby granted.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL\nWARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES\nOF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE\nFOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY\nDAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN\nAN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT\nOF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.\n"
  },
  {
    "path": "LICENSES/CC0-1.0.txt",
    "content": "Creative Commons Legal Code\n\nCC0 1.0 Universal\n\n    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE\n    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN\n    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS\n    INFORMATION ON AN \"AS-IS\" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES\n    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS\n    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM\n    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED\n    HEREUNDER.\n\nStatement of Purpose\n\nThe laws of most jurisdictions throughout the world automatically confer\nexclusive Copyright and Related Rights (defined below) upon the creator\nand subsequent owner(s) (each and all, an \"owner\") of an original work of\nauthorship and/or a database (each, a \"Work\").\n\nCertain owners wish to permanently relinquish those rights to a Work for\nthe purpose of contributing to a commons of creative, cultural and\nscientific works (\"Commons\") that the public can reliably and without fear\nof later claims of infringement build upon, modify, incorporate in other\nworks, reuse and redistribute as freely as possible in any form whatsoever\nand for any purposes, including without limitation commercial purposes.\nThese owners may contribute to the Commons to promote the ideal of a free\nculture and the further production of creative, cultural and scientific\nworks, or to gain reputation or greater distribution for their Work in\npart through the use and efforts of others.\n\nFor these and/or other purposes and motivations, and without any\nexpectation of additional consideration or compensation, the person\nassociating CC0 with a Work (the \"Affirmer\"), to the extent that he or she\nis an owner of Copyright and Related Rights in the Work, voluntarily\nelects to apply CC0 to the Work and publicly distribute the Work under its\nterms, with knowledge of his or her Copyright and Related Rights in the\nWork and the meaning and intended legal effect of CC0 on those rights.\n\n1. Copyright and Related Rights. A Work made available under CC0 may be\nprotected by copyright and related or neighboring rights (\"Copyright and\nRelated Rights\"). Copyright and Related Rights include, but are not\nlimited to, the following:\n\n  i. the right to reproduce, adapt, distribute, perform, display,\n     communicate, and translate a Work;\n ii. moral rights retained by the original author(s) and/or performer(s);\niii. publicity and privacy rights pertaining to a person's image or\n     likeness depicted in a Work;\n iv. rights protecting against unfair competition in regards to a Work,\n     subject to the limitations in paragraph 4(a), below;\n  v. rights protecting the extraction, dissemination, use and reuse of data\n     in a Work;\n vi. database rights (such as those arising under Directive 96/9/EC of the\n     European Parliament and of the Council of 11 March 1996 on the legal\n     protection of databases, and under any national implementation\n     thereof, including any amended or successor version of such\n     directive); and\nvii. other similar, equivalent or corresponding rights throughout the\n     world based on applicable law or treaty, and any national\n     implementations thereof.\n\n2. Waiver. To the greatest extent permitted by, but not in contravention\nof, applicable law, Affirmer hereby overtly, fully, permanently,\nirrevocably and unconditionally waives, abandons, and surrenders all of\nAffirmer's Copyright and Related Rights and associated claims and causes\nof action, whether now known or unknown (including existing as well as\nfuture claims and causes of action), in the Work (i) in all territories\nworldwide, (ii) for the maximum duration provided by applicable law or\ntreaty (including future time extensions), (iii) in any current or future\nmedium and for any number of copies, and (iv) for any purpose whatsoever,\nincluding without limitation commercial, advertising or promotional\npurposes (the \"Waiver\"). Affirmer makes the Waiver for the benefit of each\nmember of the public at large and to the detriment of Affirmer's heirs and\nsuccessors, fully intending that such Waiver shall not be subject to\nrevocation, rescission, cancellation, termination, or any other legal or\nequitable action to disrupt the quiet enjoyment of the Work by the public\nas contemplated by Affirmer's express Statement of Purpose.\n\n3. Public License Fallback. Should any part of the Waiver for any reason\nbe judged legally invalid or ineffective under applicable law, then the\nWaiver shall be preserved to the maximum extent permitted taking into\naccount Affirmer's express Statement of Purpose. In addition, to the\nextent the Waiver is so judged Affirmer hereby grants to each affected\nperson a royalty-free, non transferable, non sublicensable, non exclusive,\nirrevocable and unconditional license to exercise Affirmer's Copyright and\nRelated Rights in the Work (i) in all territories worldwide, (ii) for the\nmaximum duration provided by applicable law or treaty (including future\ntime extensions), (iii) in any current or future medium and for any number\nof copies, and (iv) for any purpose whatsoever, including without\nlimitation commercial, advertising or promotional purposes (the\n\"License\"). The License shall be deemed effective as of the date CC0 was\napplied by Affirmer to the Work. Should any part of the License for any\nreason be judged legally invalid or ineffective under applicable law, such\npartial invalidity or ineffectiveness shall not invalidate the remainder\nof the License, and in such case Affirmer hereby affirms that he or she\nwill not (i) exercise any of his or her remaining Copyright and Related\nRights in the Work or (ii) assert any associated claims and causes of\naction with respect to the Work, in either case contrary to Affirmer's\nexpress Statement of Purpose.\n\n4. Limitations and Disclaimers.\n\n a. No trademark or patent rights held by Affirmer are waived, abandoned,\n    surrendered, licensed or otherwise affected by this document.\n b. Affirmer offers the Work as-is and makes no representations or\n    warranties of any kind concerning the Work, express, implied,\n    statutory or otherwise, including without limitation warranties of\n    title, merchantability, fitness for a particular purpose, non\n    infringement, or the absence of latent or other defects, accuracy, or\n    the present or absence of errors, whether or not discoverable, all to\n    the greatest extent permissible under applicable law.\n c. Affirmer disclaims responsibility for clearing rights of other persons\n    that may apply to the Work or any use thereof, including without\n    limitation any person's Copyright and Related Rights in the Work.\n    Further, Affirmer disclaims responsibility for obtaining any necessary\n    consents, permissions or other rights required for any use of the\n    Work.\n d. Affirmer understands and acknowledges that Creative Commons is not a\n    party to this document and has no duty or obligation with respect to\n    this CC0 or use of the Work.\n\n"
  },
  {
    "path": "LICENSES/LicenseRef-SPHINCS-PLUS-Public-Domain.txt",
    "content": "This work is hereby placed into the public domain.\n\n"
  },
  {
    "path": "LICENSES/MIT-0.txt",
    "content": "MIT No Attribution\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this\nsoftware and associated documentation files (the \"Software\"), to deal in the Software\nwithout restriction, including without limitation the rights to use, copy, modify,\nmerge, publish, distribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\nINCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\nPARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\nHOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n\n"
  },
  {
    "path": "LICENSES/MIT.txt",
    "content": "MIT License\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n\n"
  },
  {
    "path": "README.md",
    "content": "## SPHINCS+\n\nThis repository contains the software that accompanies the [SPHINCS+ submission](https://sphincs.org/) to [NIST's Post-Quantum Cryptography](https://csrc.nist.gov/Projects/Post-Quantum-Cryptography) project.\n\n![][test-ref]\n![][test-sha256-avx2]\n![][test-shake256-avx2]\n![][test-haraka-aesni]\n\n### Parameters\n\nThe [SPHINCS+ specification](https://sphincs.org/data/sphincs+-specification.pdf) proposed a set of 36 named instances, specifying hash functions and concrete parameters for the security level, tree dimensions, WOTS+ and FORS. This reference implementation allows for more flexibility, as parameters can be specified in a `params.h` file. The proposed parameter sets have been predefined in `ref/params/params-*.h`, and the hash function can be varied by linking with the different implementations of `hash.h`, i.e., `hash_haraka.c`, `hash_sha2.c` and `hash_shake.c`, as well as different implementations of `thash.h`, i.e., `*_robust.c` and `*_simple.c`. This is demonstrated in the `Makefile`. See the table below for a summary of the parameter sets. These parameters target the NIST security categories 1, 3 and 5; for each category, there is a parameter set geared towards either small signatures or fast signature generation.\n\n|               | n  | h  | d  | log(t) | k  |  w  | bit security | pk bytes | sk bytes | sig bytes |\n| :------------ | -: | -: | -: | -----: | -: | --: | -----------: | -------: | -------: | --------: |\n| SPHINCS+-128s | 16 | 63 |  7 |     12 | 14 |  16 |          133 |       32 |       64 |     7,856 |\n| SPHINCS+-128f | 16 | 66 | 22 |      6 | 33 |  16 |          128 |       32 |       64 |    17,088 |\n| SPHINCS+-192s | 24 | 63 |  7 |     14 | 17 |  16 |          193 |       48 |       96 |    16,224 |\n| SPHINCS+-192f | 24 | 66 | 22 |      8 | 33 |  16 |          194 |       48 |       96 |    35,664 |\n| SPHINCS+-256s | 32 | 64 |  8 |     14 | 22 |  16 |          255 |       64 |      128 |    29,792 |\n| SPHINCS+-256f | 32 | 68 | 17 |      9 | 35 |  16 |          255 |       64 |      128 |    49,856 |\n\n### License\n\nAll included code has been placed into\n[Public Domain](LICENSES/LicenseRef-SPHINCS-PLUS-Public-Domain.txt)\nand is available under various open source licenses\n([Creative Commons Zero v1.0 Universal (CC0-1.0)](LICENSES/CC0-1.0.txt),\n[BSD Zero Clause License (0BSD)](LICENSES/0BSD.txt), and\n[MIT No Attribution (MIT-0)](LICENSES/MIT-0.txt),\nsee the [LICENSE file](LICENSE) and the licenses in the [LICENSES folder](LICENSES)), with the exception of `rng.c`, `rng.h` and `PQCgenKAT_sign.c`, which were provided by NIST, and parts of `ref/haraka.c`, which are under\n[MIT license (MIT)](LICENSES/MIT.txt).\n\n[test-ref]: https://github.com/sphincs/sphincsplus/actions/workflows/test-ref.yml/badge.svg\n[test-sha256-avx2]: https://github.com/sphincs/sphincsplus/actions/workflows/test-sha256-avx2.yml/badge.svg\n[test-shake256-avx2]: https://github.com/sphincs/sphincsplus/actions/workflows/test-shake256-avx2.yml/badge.svg\n[test-haraka-aesni]: https://github.com/sphincs/sphincsplus/actions/workflows/test-haraka-aesni.yml/badge.svg\n"
  },
  {
    "path": "SHA256SUMS",
    "content": "9e1b3168520c917b6de676caa7a5799ec972e55caa150090e8452c80c299545e sphincs-haraka-128f-robust\nc6a28dcf0667bd91c7bb46814ac7408c0375727fe5fec7d41332149006d3f9d7 sphincs-haraka-128f-simple\nf93f4a554322080545a70f85ce936a12acc2fe928a243e3d13546ffe87872a9e sphincs-haraka-128s-robust\n3c9b181d3d96c066039b77e9accd926745fe1ecb010039d3579140b877da6f33 sphincs-haraka-128s-simple\n8876bfae8924983db27acfeaee6252d37cea86f05fcc4b16ea2c902d717e6a6e sphincs-haraka-192f-robust\ndf26bd02796f5ad9d6ff412793960e79ec911cbf4521656814895e6ef5a1db83 sphincs-haraka-192f-simple\n6cfde6cb5f9ce93eb3f7b0845e1149f661f92000f54e9d340c0bff504920ec7e sphincs-haraka-192s-robust\n64037177e1524f2b2d3ea4a79fdaf9352eb39a3aa6e68bc9d3316b7c2b835820 sphincs-haraka-192s-simple\na838509fa6ec49ade2638efc35d9e29fdb56bd9b843d5e1f48210a2cab218332 sphincs-haraka-256f-robust\ne1e3258be6b4467bcea81392363f657a58278a5b99fe240f29e388b0fe72f5da sphincs-haraka-256f-simple\nb5c5cc535f03789c25c018c009615ac62ba5b64188e4db5e3ede5513e3704dcc sphincs-haraka-256s-robust\n9428a566a2c2ee03665fc0eb2dbf208deb1b28716dc8c2d5e7c036a9f83d31da sphincs-haraka-256s-simple\nb6c82007bbce794f9fd67de708cd4d959319c744b918ddb28795fd491b713aa9 sphincs-sha2-128f-robust\n708f6ab77f8026361e975f7be7b9b5d1cd8aca56e4a3604c85ef3f9fe6618549 sphincs-sha2-128f-simple\nf4c2f31082fc8ad15419edc4f24c34a83d909f75eb37ea5ffe53df0fb5ef5306 sphincs-sha2-128s-robust\n65942fac8e225fde77dd277d297e68c94c2e25a2a4089f88be4b56fa92b18a84 sphincs-sha2-128s-simple\nb8e617db2099e617dfc372ff732eead88872aea791e2fe82628568d75dd03c78 sphincs-sha2-192f-robust\n84b1a342683bcad658efb6c65f7367c6b30623e74e3a24c2238d19eaf74722ab sphincs-sha2-192f-simple\n50c4b94dc788446077b48af1d8fa0170dc2114b4cb72a19f1d8c7628f9dadfd6 sphincs-sha2-192s-robust\n13efa67b9297afa051b9b30e2686266350c8b4000caa49aa432516e2a86d0b68 sphincs-sha2-192s-simple\ndc3330f8f19c816f45ee9a1127bf2b8a8c900e05df9a964bb760f0adf8f9b1b3 sphincs-sha2-256f-robust\n46e286dc1a20012789c1bf4793a8eb2043dd0c11df729fa36d9f96b0aeffdac6 sphincs-sha2-256f-simple\n1f42b407e1e351861ba23e520b1974f399e349fcb66c614d727a38fb4e646634 sphincs-sha2-256s-robust\nc816ca365a667e4d6564a95ac576bc9d7be0de7e66eff93e6f05dd4f134a183f sphincs-sha2-256s-simple\n4be71430814589ce7c861030c7cdce0aa73f75885b693b41fdb7c34d8f32fa79 sphincs-shake-128f-robust\n5167df2ce46f33b76ccf0688f7769217d91878bd7d9b431080a3032eba51da10 sphincs-shake-128f-simple\nfbe6c99d6ccc42fc9af5babbac532f28288d4164b182515dffeb1cd47f351d12 sphincs-shake-128s-robust\ne7d5caee1941be99b6dfe46a95fc4535a34792f429e61d1cdc7fd3bbafe9ff02 sphincs-shake-128s-simple\n243d0e25de08fea547b0beae5f778a48bd55e56066435f9cdb9afc60a722699e sphincs-shake-192f-robust\nf204fd1cd5dce187441d104ae7159b64322b6a4afae708d48dc9966fe418ec4e sphincs-shake-192f-simple\ncb13eaa2b1c074f53c87f1025e6bb1b356ad8de3bea9388b90a058a6460766bb sphincs-shake-192s-robust\n4cc01c4a562d738ac54f5abfead35ecc4f46a1e2531fa12b4bc2819f4560c351 sphincs-shake-192s-simple\n5a736aeba47f8d84e3ca47126715affcb4ce6cef13e3c9f6af220827973aa383 sphincs-shake-256f-robust\n127f7ab83c740344546fe30777b221e8cb39f30fc4242d07d7608dc31a9835d4 sphincs-shake-256f-simple\n4d2ca7d10f2206c3cb9a26c6b00a0361601a1fe2dddf102fbfd6d3dac0be10fe sphincs-shake-256s-robust\n4ce4552e2e9b009a9016eb6dbcbefae3da2de151d61e2f392d4b9517eaeab91d sphincs-shake-256s-simple\n"
  },
  {
    "path": "benchmark.py",
    "content": "#! /usr/bin/env python3\nimport fileinput\nimport itertools\nimport os\nimport sys\nfrom subprocess import DEVNULL, run\n\nimplementations = [\n                   ('ref', ['shake', 'sha2', 'haraka']),\n                   ('haraka-aesni', ['haraka']),\n                   ('shake-avx2', ['shake']),\n                   ('sha2-avx2', ['sha2']),\n                   ]\n\noptions = [\"f\", \"s\"]\nsizes = [128, 192, 256]\nthashes = ['robust', 'simple']\n\nfor impl, fns in implementations:\n    params = os.path.join(impl, \"params.h\")\n    for fn in fns:\n        for opt, size, thash in itertools.product(options, sizes, thashes):\n            paramset = \"sphincs-{}-{}{}\".format(fn, size, opt)\n            paramfile = \"params-{}.h\".format(paramset)\n\n            print(\"Benchmarking\", paramset, thash, \"using\", impl, flush=True)\n\n            params = 'PARAMS={}'.format(paramset)  # overrides Makefile var\n            thash = 'THASH={}'.format(thash)  # overrides Makefile var\n\n            run([\"make\", \"-C\", impl, \"clean\", thash, params],\n                stdout=DEVNULL, stderr=sys.stderr)\n            run([\"make\", \"-C\", impl, \"benchmarks\", thash, params],\n                stdout=DEVNULL, stderr=sys.stderr)\n            run([\"make\", \"-C\", impl, \"benchmark\", thash, params],\n                stdout=sys.stdout, stderr=sys.stderr)\n\n            print(flush=True)\n\n"
  },
  {
    "path": "haraka-aesni/.gitignore",
    "content": "test/*\n!test/*.c\nPQCsignKAT_*.rsp\nPQCsignKAT_*.req\nPQCgenKAT_sign\n"
  },
  {
    "path": "haraka-aesni/Makefile",
    "content": "PARAMS = sphincs-haraka-128f\nTHASH = robust\n\nCC = /usr/bin/gcc\nCFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -march=native -fomit-frame-pointer -flto -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS)\n\n\nSOURCES = hash_haraka.c hash_harakax4.c thash_haraka_$(THASH).c thash_haraka_$(THASH)x4.c address.c randombytes.c merkle.c wots.c utils.c utilsx4.c fors.c sign.c haraka.c\nHEADERS = params.h hash.h hashx4.h thash.h thashx4.h address.h randombytes.h merkle.c wots.h utils.h utilsx4.h fors.h api.h haraka.h harakax4.h\n\nDET_SOURCES = $(SOURCES:randombytes.%=rng.%)\nDET_HEADERS = $(HEADERS:randombytes.%=rng.%)\n\nTESTS = test/fors \\\n\t\ttest/spx \\\n\nBENCHMARK = test/benchmark\n\n.PHONY: clean test benchmark\n\ndefault: PQCgenKAT_sign\n\nall: PQCgenKAT_sign tests benchmarks\n\ntests: $(TESTS)\n\ntest: $(TESTS:=.exec)\n\nbenchmarks: $(BENCHMARK)\n\nbenchmark: $(BENCHMARK:=.exec)\n\nPQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto\n\ntest/%: test/%.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS)\n\ntest/%.exec: test/%\n\t@$<\n\nclean:\n\t-$(RM) $(TESTS)\n\t-$(RM) $(BENCHMARK)\n\t-$(RM) PQCgenKAT_sign\n\t-$(RM) PQCsignKAT_*.rsp\n\t-$(RM) PQCsignKAT_*.req\n"
  },
  {
    "path": "haraka-aesni/context.h",
    "content": "#ifndef SPX_CONTEXT_H\n#define SPX_CONTEXT_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n#include \"immintrin.h\"\n\ntypedef struct {\n    uint8_t pub_seed[SPX_N];\n    uint8_t sk_seed[SPX_N];\n\n    __m128i rc[40];\n} spx_ctx;\n\n#endif\n"
  },
  {
    "path": "haraka-aesni/haraka.c",
    "content": "/*\nPlain C implementation of the Haraka256 and Haraka512 permutations.\n*/\n#include <immintrin.h>\n#include <stdio.h>\n#include <string.h>\n#include <stdlib.h>\n#include <stdint.h>\n\n#include \"haraka.h\"\n#include \"harakax4.h\"\n#include \"utils.h\"\n\n#define HARAKAS_RATE 32\n\n#define u64 unsigned long\n#define u128 __m128i\n\n#define LOAD(src) _mm_loadu_si128((u128 *)(src))\n#define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src)\n\n#define XOR128(a, b) _mm_xor_si128(a, b)\n\n#define AES2(s0, s1, rci) \\\n  s0 = _mm_aesenc_si128(s0, *(rci)); \\\n  s1 = _mm_aesenc_si128(s1, *(rci + 1)); \\\n  s0 = _mm_aesenc_si128(s0, *(rci + 2)); \\\n  s1 = _mm_aesenc_si128(s1, *(rci + 3));\n\n#define AES2_4x(s0, s1, s2, s3, rci) \\\n  AES2(s0[0], s0[1], rci); \\\n  AES2(s1[0], s1[1], rci); \\\n  AES2(s2[0], s2[1], rci); \\\n  AES2(s3[0], s3[1], rci);\n\n#define AES4(s0, s1, s2, s3, rci) \\\n  s0 = _mm_aesenc_si128(s0, *(rci)); \\\n  s1 = _mm_aesenc_si128(s1, *(rci + 1)); \\\n  s2 = _mm_aesenc_si128(s2, *(rci + 2)); \\\n  s3 = _mm_aesenc_si128(s3, *(rci + 3)); \\\n  s0 = _mm_aesenc_si128(s0, *(rci + 4)); \\\n  s1 = _mm_aesenc_si128(s1, *(rci + 5)); \\\n  s2 = _mm_aesenc_si128(s2, *(rci + 6)); \\\n  s3 = _mm_aesenc_si128(s3, *(rci + 7));\n\n#define AES4_4x(s0, s1, s2, s3, rci) \\\n  AES4(s0[0], s0[1], s0[2], s0[3], rci); \\\n  AES4(s1[0], s1[1], s1[2], s1[3], rci); \\\n  AES4(s2[0], s2[1], s2[2], s2[3], rci); \\\n  AES4(s3[0], s3[1], s3[2], s3[3], rci);\n\n#define MIX2(s0, s1) \\\n  tmp = _mm_unpacklo_epi32(s0, s1); \\\n  s1 = _mm_unpackhi_epi32(s0, s1); \\\n  s0 = tmp;\n\n#define MIX4(s0, s1, s2, s3) \\\n  tmp  = _mm_unpacklo_epi32(s0, s1); \\\n  s0 = _mm_unpackhi_epi32(s0, s1); \\\n  s1 = _mm_unpacklo_epi32(s2, s3); \\\n  s2 = _mm_unpackhi_epi32(s2, s3); \\\n  s3 = _mm_unpacklo_epi32(s0, s2); \\\n  s0 = _mm_unpackhi_epi32(s0, s2); \\\n  s2 = _mm_unpackhi_epi32(s1, tmp); \\\n  s1 = _mm_unpacklo_epi32(s1, tmp);\n\n#define TRUNCSTORE(out, s0, s1, s2, s3) \\\n  _mm_storeu_si128((u128 *)out, \\\n                   _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(s0), _mm_castsi128_pd(s1), 3))); \\\n  _mm_storeu_si128((u128 *)(out + 16), \\\n                   _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(s2), _mm_castsi128_pd(s3), 0)));\n\nstatic void load_haraka_constants(u128 *rc)\n{\n    rc[0] = _mm_set_epi32(0x0684704c,0xe620c00a,0xb2c5fef0,0x75817b9d);\n    rc[1] = _mm_set_epi32(0x8b66b4e1,0x88f3a06b,0x640f6ba4,0x2f08f717);\n    rc[2] = _mm_set_epi32(0x3402de2d,0x53f28498,0xcf029d60,0x9f029114);\n    rc[3] = _mm_set_epi32(0x0ed6eae6,0x2e7b4f08,0xbbf3bcaf,0xfd5b4f79);\n    rc[4] = _mm_set_epi32(0xcbcfb0cb,0x4872448b,0x79eecd1c,0xbe397044);\n    rc[5] = _mm_set_epi32(0x7eeacdee,0x6e9032b7,0x8d5335ed,0x2b8a057b);\n    rc[6] = _mm_set_epi32(0x67c28f43,0x5e2e7cd0,0xe2412761,0xda4fef1b);\n    rc[7] = _mm_set_epi32(0x2924d9b0,0xafcacc07,0x675ffde2,0x1fc70b3b);\n    rc[8] = _mm_set_epi32(0xab4d63f1,0xe6867fe9,0xecdb8fca,0xb9d465ee);\n    rc[9] = _mm_set_epi32(0x1c30bf84,0xd4b7cd64,0x5b2a404f,0xad037e33);\n    rc[10] = _mm_set_epi32(0xb2cc0bb9,0x941723bf,0x69028b2e,0x8df69800);\n    rc[11] = _mm_set_epi32(0xfa0478a6,0xde6f5572,0x4aaa9ec8,0x5c9d2d8a);\n    rc[12] = _mm_set_epi32(0xdfb49f2b,0x6b772a12,0x0efa4f2e,0x29129fd4);\n    rc[13] = _mm_set_epi32(0x1ea10344,0xf449a236,0x32d611ae,0xbb6a12ee);\n    rc[14] = _mm_set_epi32(0xaf044988,0x4b050084,0x5f9600c9,0x9ca8eca6);\n    rc[15] = _mm_set_epi32(0x21025ed8,0x9d199c4f,0x78a2c7e3,0x27e593ec);\n    rc[16] = _mm_set_epi32(0xbf3aaaf8,0xa759c9b7,0xb9282ecd,0x82d40173);\n    rc[17] = _mm_set_epi32(0x6260700d,0x6186b017,0x37f2efd9,0x10307d6b);\n    rc[18] = _mm_set_epi32(0x5aca45c2,0x21300443,0x81c29153,0xf6fc9ac6);\n    rc[19] = _mm_set_epi32(0x9223973c,0x226b68bb,0x2caf92e8,0x36d1943a);\n    rc[20] = _mm_set_epi32(0xd3bf9238,0x225886eb,0x6cbab958,0xe51071b4);\n    rc[21] = _mm_set_epi32(0xdb863ce5,0xaef0c677,0x933dfddd,0x24e1128d);\n    rc[22] = _mm_set_epi32(0xbb606268,0xffeba09c,0x83e48de3,0xcb2212b1);\n    rc[23] = _mm_set_epi32(0x734bd3dc,0xe2e4d19c,0x2db91a4e,0xc72bf77d);\n    rc[24] = _mm_set_epi32(0x43bb47c3,0x61301b43,0x4b1415c4,0x2cb3924e);\n    rc[25] = _mm_set_epi32(0xdba775a8,0xe707eff6,0x03b231dd,0x16eb6899);\n    rc[26] = _mm_set_epi32(0x6df3614b,0x3c755977,0x8e5e2302,0x7eca472c);\n    rc[27] = _mm_set_epi32(0xcda75a17,0xd6de7d77,0x6d1be5b9,0xb88617f9);\n    rc[28] = _mm_set_epi32(0xec6b43f0,0x6ba8e9aa,0x9d6c069d,0xa946ee5d);\n    rc[29] = _mm_set_epi32(0xcb1e6950,0xf957332b,0xa2531159,0x3bf327c1);\n    rc[30] = _mm_set_epi32(0x2cee0c75,0x00da619c,0xe4ed0353,0x600ed0d9);\n    rc[31] = _mm_set_epi32(0xf0b1a5a1,0x96e90cab,0x80bbbabc,0x63a4a350);\n    rc[32] = _mm_set_epi32(0xae3db102,0x5e962988,0xab0dde30,0x938dca39);\n    rc[33] = _mm_set_epi32(0x17bb8f38,0xd554a40b,0x8814f3a8,0x2e75b442);\n    rc[34] = _mm_set_epi32(0x34bb8a5b,0x5f427fd7,0xaeb6b779,0x360a16f6);\n    rc[35] = _mm_set_epi32(0x26f65241,0xcbe55438,0x43ce5918,0xffbaafde);\n    rc[36] = _mm_set_epi32(0x4ce99a54,0xb9f3026a,0xa2ca9cf7,0x839ec978);\n    rc[37] = _mm_set_epi32(0xae51a51a,0x1bdff7be,0x40c06e28,0x22901235);\n    rc[38] = _mm_set_epi32(0xa0c1613c,0xba7ed22b,0xc173bc0f,0x48a659cf);\n    rc[39] = _mm_set_epi32(0x756acc03,0x02288288,0x4ad6bdfd,0xe9c59da1);\n}\n\nvoid tweak_constants(spx_ctx *ctx)\n{\n    int i;\n    unsigned char buf[40*16];\n\n    /* Use the standard constants to generate tweaked ones. */\n    load_haraka_constants(ctx->rc);\n\n    /* Constants for pk.seed */\n    haraka_S(buf, 40*16, ctx->pub_seed, SPX_N, ctx);\n\n    /* Tweak constants with the pub_seed */\n    for (i = 0; i < 40; i++) {\n        ctx->rc[i] = LOAD(buf + i*16);\n    }\n}\n\nstatic void haraka_S_absorb(unsigned char *s, unsigned int r,\n                            const unsigned char *m, unsigned long long mlen,\n                            unsigned char p, const spx_ctx *ctx)\n{\n    unsigned long long i;\n    SPX_VLA(unsigned char, t, r);\n\n    while (mlen >= r) {\n        // XOR block to state\n        STORE(s, XOR128(LOAD(s), LOAD(m)));\n        STORE(s + 16, XOR128(LOAD(s + 16), LOAD(m + 16)));\n        haraka512_perm(s, s, ctx);\n        mlen -= r;\n        m += r;\n    }\n\n    for (i = 0; i < r; ++i) {\n        t[i] = 0;\n    }\n    for (i = 0; i < mlen; ++i) {\n        t[i] = m[i];\n    }\n    t[i] = p;\n    t[r - 1] |= 128;\n    STORE(s, XOR128(LOAD(s), LOAD(t)));\n    STORE(s + 16, XOR128(LOAD(s + 16), LOAD(t + 16)));\n}\n\nstatic void haraka_S_absorb4x(unsigned char *s,\n                              unsigned int r,\n                              const unsigned char *m0,\n                              const unsigned char *m1,\n                              const unsigned char *m2,\n                              const unsigned char *m3,\n                              unsigned long long int mlen,\n                              unsigned char p,\n                              const spx_ctx *ctx)\n{\n    unsigned long long i;\n    SPX_VLA(unsigned char, t0, r);\n    SPX_VLA(unsigned char, t1, r);\n    SPX_VLA(unsigned char, t2, r);\n    SPX_VLA(unsigned char, t3, r);\n\n    while (mlen >= r) {\n        // XOR block to state\n        STORE(s, XOR128(LOAD(s), LOAD(m0)));\n        STORE(s + 16, XOR128(LOAD(s + 16), LOAD(m0 + 16)));\n        STORE(s + 64, XOR128(LOAD(s + 64), LOAD(m1)));\n        STORE(s + 80, XOR128(LOAD(s + 80), LOAD(m1 + 16)));\n        STORE(s + 128, XOR128(LOAD(s + 128), LOAD(m2)));\n        STORE(s + 144, XOR128(LOAD(s + 144), LOAD(m2 + 16)));\n        STORE(s + 192, XOR128(LOAD(s + 192), LOAD(m3)));\n        STORE(s + 208, XOR128(LOAD(s + 208), LOAD(m3 + 16)));\n\n        haraka512_perm_x4(s, s, ctx);\n        mlen -= r;\n        m0 += r;\n        m1 += r;\n        m2 += r;\n        m3 += r;\n    }\n\n    for (i = 0; i < r; ++i) {\n        t0[i] = 0;\n        t1[i] = 0;\n        t2[i] = 0;\n        t3[i] = 0;\n    }\n    for (i = 0; i < mlen; ++i) {\n        t0[i] = m0[i];\n        t1[i] = m1[i];\n        t2[i] = m2[i];\n        t3[i] = m3[i];\n    }\n\n    t0[i] = p;\n    t1[i] = p;\n    t2[i] = p;\n    t3[i] = p;\n\n    t0[r - 1] |= 128;\n    t1[r - 1] |= 128;\n    t2[r - 1] |= 128;\n    t3[r - 1] |= 128;\n\n    STORE(s, XOR128(LOAD(s), LOAD(t0)));\n    STORE(s + 16, XOR128(LOAD(s + 16), LOAD(t0 + 16)));\n    STORE(s + 64, XOR128(LOAD(s + 64), LOAD(t1)));\n    STORE(s + 80, XOR128(LOAD(s + 80), LOAD(t1 + 16)));\n    STORE(s + 128, XOR128(LOAD(s + 128), LOAD(t2)));\n    STORE(s + 144, XOR128(LOAD(s + 144), LOAD(t2 + 16)));\n    STORE(s + 192, XOR128(LOAD(s + 192), LOAD(t3)));\n    STORE(s + 208, XOR128(LOAD(s + 208), LOAD(t3 + 16)));            \n}\n\nstatic void haraka_S_squeezeblocks(unsigned char *h, unsigned long long nblocks,\n                                   unsigned char *s, unsigned int r,\n                                   const spx_ctx *ctx)\n{\n    while (nblocks > 0) {\n        haraka512_perm(s, s, ctx);\n        STORE(h, LOAD(s));\n        STORE(h + 16, LOAD(s + 16));\n        h += r;\n        nblocks--;\n    }\n}\n\nstatic void haraka_S_squeezeblocks4x(unsigned char *h0,\n                                     unsigned char *h1,\n                                     unsigned char *h2,\n                                     unsigned char *h3,\n                                     unsigned long long nblocks,\n                                     unsigned char *s, \n                                     unsigned int r,\n                                     const spx_ctx *ctx)\n{\n    while (nblocks > 0) {\n        haraka512_perm_x4(s, s, ctx);\n        STORE(h0, LOAD(s));\n        STORE(h0 + 16, LOAD(s + 16));\n        STORE(h1, LOAD(s + 64));\n        STORE(h1 + 16, LOAD(s + 80));\n        STORE(h2, LOAD(s + 128));\n        STORE(h2 + 16, LOAD(s + 144));\n        STORE(h3, LOAD(s + 192));\n        STORE(h3 + 16, LOAD(s + 208));                        \n        h0 += r;\n        h1 += r;\n        h2 += r;\n        h3 += r;\n        nblocks--;\n    }\n}\n\nvoid haraka_S_inc_init(uint8_t *s_inc)\n{\n    size_t i;\n\n    for (i = 0; i < 64; i++) {\n        s_inc[i] = 0;\n    }\n    s_inc[64] = 0;\n}\n\nvoid haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen,\n        const spx_ctx *ctx)\n{\n    size_t i;\n\n    /* Recall that s_inc[64] is the non-absorbed bytes xored into the state */\n    while (mlen + s_inc[64] >= HARAKAS_RATE) {\n        for (i = 0; i < (size_t)(HARAKAS_RATE - s_inc[64]); i++) {\n            /* Take the i'th byte from message\n               xor with the s_inc[64] + i'th byte of the state */\n            s_inc[s_inc[64] + i] ^= m[i];\n        }\n        mlen -= (size_t)(HARAKAS_RATE - s_inc[64]);\n        m += HARAKAS_RATE - s_inc[64];\n        s_inc[64] = 0;\n\n        haraka512_perm(s_inc, s_inc, ctx);\n    }\n\n    for (i = 0; i < mlen; i++) {\n        s_inc[s_inc[64] + i] ^= m[i];\n    }\n    s_inc[64] += mlen;\n}\n\nvoid haraka_S_inc_finalize(uint8_t *s_inc)\n{\n    /* After haraka_S_inc_absorb, we are guaranteed that s_inc[64] < HARAKAS_RATE,\n       so we can always use one more byte for p in the current state. */\n    s_inc[s_inc[64]] ^= 0x1F;\n    s_inc[HARAKAS_RATE - 1] ^= 128;\n    s_inc[64] = 0;\n}\n\nvoid haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc,\n        const spx_ctx *ctx)\n{\n    size_t i;\n\n    /* First consume any bytes we still have sitting around */\n    for (i = 0; i < outlen && i < s_inc[64]; i++) {\n        /* There are s_inc[64] bytes left, so r - s_inc[64] is the first\n           available byte. We consume from there, i.e., up to r. */\n        out[i] = (uint8_t)s_inc[(HARAKAS_RATE - s_inc[64] + i)];\n    }\n    out += i;\n    outlen -= i;\n    s_inc[64] -= i;\n\n    /* Then squeeze the remaining necessary blocks */\n    while (outlen > 0) {\n        haraka512_perm(s_inc, s_inc, ctx);\n\n        for (i = 0; i < outlen && i < HARAKAS_RATE; i++) {\n            out[i] = s_inc[i];\n        }\n        out += i;\n        outlen -= i;\n        s_inc[64] = HARAKAS_RATE - i;\n    }\n}\n\nvoid haraka_S(unsigned char *out, unsigned long long outlen,\n              const unsigned char *in, unsigned long long inlen,\n              const spx_ctx *ctx)\n{\n    unsigned long long i;\n    unsigned char s[64];\n    unsigned char d[32];\n\n    for (i = 0; i < 64; i++) {\n        s[i] = 0;\n    }\n    haraka_S_absorb(s, HARAKAS_RATE, in, inlen, 0x1F, ctx);\n\n    haraka_S_squeezeblocks(out, outlen / HARAKAS_RATE, s, HARAKAS_RATE, ctx);\n    out += (outlen / HARAKAS_RATE) * HARAKAS_RATE;\n\n    if (outlen % HARAKAS_RATE) {\n        haraka_S_squeezeblocks(d, 1, s, HARAKAS_RATE, ctx);\n        for (i = 0; i < outlen % HARAKAS_RATE; i++) {\n            out[i] = d[i];\n        }\n    }\n}\n\nvoid haraka_Sx4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3, \n                unsigned long long outlen,\n                const unsigned char *in0,\n                const unsigned char *in1,\n                const unsigned char *in2,\n                const unsigned char *in3, \n                unsigned long long inlen,\n                const spx_ctx *ctx)\n{\n    unsigned long long i;\n    unsigned char s[64 * 4];\n    unsigned char d0[32];\n    unsigned char d1[32];\n    unsigned char d2[32];\n    unsigned char d3[32];\n\n    for (i = 0; i < 64 * 4; i++) {\n        s[i] = 0;\n    }\n    haraka_S_absorb4x(s, HARAKAS_RATE, in0, in1, in2, in3, inlen, 0x1F, ctx);\n\n    haraka_S_squeezeblocks4x(out0, out1, out2, out3, outlen / HARAKAS_RATE, s,\n            HARAKAS_RATE, ctx);\n    out0 += (outlen / HARAKAS_RATE) * HARAKAS_RATE;\n    out1 += (outlen / HARAKAS_RATE) * HARAKAS_RATE;\n    out2 += (outlen / HARAKAS_RATE) * HARAKAS_RATE;\n    out3 += (outlen / HARAKAS_RATE) * HARAKAS_RATE;\n\n    if (outlen % HARAKAS_RATE) {\n        haraka_S_squeezeblocks4x(d0, d1, d2, d3, 1, s, HARAKAS_RATE, ctx);\n        for (i = 0; i < outlen % HARAKAS_RATE; i++) {\n            out0[i] = d0[i];\n            out1[i] = d1[i];\n            out2[i] = d2[i];\n            out3[i] = d3[i];\n        }\n    }\n}\n\nvoid haraka512_perm(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx)\n{\n    u128 s[4], tmp;\n  \n    s[0] = LOAD(in);\n    s[1] = LOAD(in + 16);\n    s[2] = LOAD(in + 32);\n    s[3] = LOAD(in + 48);\n  \n    AES4(s[0], s[1], s[2], s[3], ctx->rc);\n    MIX4(s[0], s[1], s[2], s[3]);\n  \n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 8);\n    MIX4(s[0], s[1], s[2], s[3]);\n  \n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 16);\n    MIX4(s[0], s[1], s[2], s[3]);\n  \n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 24);\n    MIX4(s[0], s[1], s[2], s[3]);\n  \n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 32);\n    MIX4(s[0], s[1], s[2], s[3]);\n  \n    STORE(out, s[0]);\n    STORE(out + 16, s[1]);\n    STORE(out + 32, s[2]);\n    STORE(out + 48, s[3]);\n}\n\nvoid haraka512_perm_x4(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx) \n{\n    u128 s[4][4], tmp;\n    \n    s[0][0] = LOAD(in);\n    s[0][1] = LOAD(in + 16);\n    s[0][2] = LOAD(in + 32);\n    s[0][3] = LOAD(in + 48);\n    s[1][0] = LOAD(in + 64);\n    s[1][1] = LOAD(in + 80);\n    s[1][2] = LOAD(in + 96);\n    s[1][3] = LOAD(in + 112);\n    s[2][0] = LOAD(in + 128);\n    s[2][1] = LOAD(in + 144);\n    s[2][2] = LOAD(in + 160);\n    s[2][3] = LOAD(in + 176);\n    s[3][0] = LOAD(in + 192);\n    s[3][1] = LOAD(in + 208);\n    s[3][2] = LOAD(in + 224);\n    s[3][3] = LOAD(in + 240);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 8);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 16);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 24);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 32);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    STORE(out, s[0][0]);\n    STORE(out + 16, s[0][1]);\n    STORE(out + 32, s[0][2]);\n    STORE(out + 48, s[0][3]);\n    STORE(out + 64, s[1][0]);\n    STORE(out + 80, s[1][1]);\n    STORE(out + 96, s[1][2]);\n    STORE(out + 112, s[1][3]);\n    STORE(out + 128, s[2][0]);\n    STORE(out + 144, s[2][1]);\n    STORE(out + 160, s[2][2]);\n    STORE(out + 176, s[2][3]);\n    STORE(out + 192, s[3][0]);\n    STORE(out + 208, s[3][1]);\n    STORE(out + 224, s[3][2]);\n    STORE(out + 240, s[3][3]);\n}\n\nvoid haraka512(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx)\n{\n    u128 s[4], tmp;\n\n    s[0] = LOAD(in);\n    s[1] = LOAD(in + 16);\n    s[2] = LOAD(in + 32);\n    s[3] = LOAD(in + 48); \n\n    AES4(s[0], s[1], s[2], s[3], ctx->rc);\n    MIX4(s[0], s[1], s[2], s[3]);\n\n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 8);\n    MIX4(s[0], s[1], s[2], s[3]);\n\n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 16);\n    MIX4(s[0], s[1], s[2], s[3]);\n\n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 24);\n    MIX4(s[0], s[1], s[2], s[3]);\n\n    AES4(s[0], s[1], s[2], s[3], ctx->rc + 32);\n    MIX4(s[0], s[1], s[2], s[3]);   \n\n    s[0] = XOR128(s[0], LOAD(in));\n    s[1] = XOR128(s[1], LOAD(in + 16));\n    s[2] = XOR128(s[2], LOAD(in + 32));\n    s[3] = XOR128(s[3], LOAD(in + 48));\n\n    // truncate and store result\n    TRUNCSTORE(out, s[0], s[1], s[2], s[3]);  \n}\n\nvoid haraka512x4(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx) \n{  \n    u128 s[4][4], tmp;\n    \n    s[0][0] = LOAD(in);\n    s[0][1] = LOAD(in + 16);\n    s[0][2] = LOAD(in + 32);\n    s[0][3] = LOAD(in + 48);\n    s[1][0] = LOAD(in + 64);\n    s[1][1] = LOAD(in + 80);\n    s[1][2] = LOAD(in + 96);\n    s[1][3] = LOAD(in + 112);\n    s[2][0] = LOAD(in + 128);\n    s[2][1] = LOAD(in + 144);\n    s[2][2] = LOAD(in + 160);\n    s[2][3] = LOAD(in + 176);\n    s[3][0] = LOAD(in + 192);\n    s[3][1] = LOAD(in + 208);\n    s[3][2] = LOAD(in + 224);\n    s[3][3] = LOAD(in + 240);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 8);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 16);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 24);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);\n    \n    AES4_4x(s[0], s[1], s[2], s[3], ctx->rc + 32);\n    MIX4(s[0][0], s[0][1], s[0][2], s[0][3]);\n    MIX4(s[1][0], s[1][1], s[1][2], s[1][3]);\n    MIX4(s[2][0], s[2][1], s[2][2], s[2][3]);\n    MIX4(s[3][0], s[3][1], s[3][2], s[3][3]);   \n\n    s[0][0] = XOR128(s[0][0], LOAD(in));\n    s[0][1] = XOR128(s[0][1], LOAD(in + 16));\n    s[0][2] = XOR128(s[0][2], LOAD(in + 32));\n    s[0][3] = XOR128(s[0][3], LOAD(in + 48));\n    s[1][0] = XOR128(s[1][0], LOAD(in + 64));\n    s[1][1] = XOR128(s[1][1], LOAD(in + 80));\n    s[1][2] = XOR128(s[1][2], LOAD(in + 96));\n    s[1][3] = XOR128(s[1][3], LOAD(in + 112));\n    s[2][0] = XOR128(s[2][0], LOAD(in + 128));\n    s[2][1] = XOR128(s[2][1], LOAD(in + 144));\n    s[2][2] = XOR128(s[2][2], LOAD(in + 160));\n    s[2][3] = XOR128(s[2][3], LOAD(in + 176));\n    s[3][0] = XOR128(s[3][0], LOAD(in + 192));\n    s[3][1] = XOR128(s[3][1], LOAD(in + 208));\n    s[3][2] = XOR128(s[3][2], LOAD(in + 224));\n    s[3][3] = XOR128(s[3][3], LOAD(in + 240));\n    \n    TRUNCSTORE(out, s[0][0], s[0][1], s[0][2], s[0][3]);\n    TRUNCSTORE((out + 32), s[1][0], s[1][1], s[1][2], s[1][3]);\n    TRUNCSTORE((out + 64), s[2][0], s[2][1], s[2][2], s[2][3]);\n    TRUNCSTORE((out + 96), s[3][0], s[3][1], s[3][2], s[3][3]);    \n}\n\nvoid haraka256(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx) \n{\n    u128 s[2], tmp;\n  \n    s[0] = LOAD(in);\n    s[1] = LOAD(in + 16);\n  \n    AES2(s[0], s[1], ctx->rc);\n    MIX2(s[0], s[1]);\n  \n    AES2(s[0], s[1], ctx->rc + 4);\n    MIX2(s[0], s[1]);\n  \n    AES2(s[0], s[1], ctx->rc + 8);\n    MIX2(s[0], s[1]);\n  \n    AES2(s[0], s[1], ctx->rc + 12);\n    MIX2(s[0], s[1]);\n  \n    AES2(s[0], s[1], ctx->rc + 16);\n    MIX2(s[0], s[1]);\n  \n    s[0] = XOR128(s[0], LOAD(in));\n    s[1] = XOR128(s[1], LOAD(in + 16));\n  \n    STORE(out, s[0]);\n    STORE(out + 16, s[1]);\n}\n\nvoid haraka256x4(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx) \n{\n    u128 s[4][2], tmp;\n\n    s[0][0] = LOAD(in);\n    s[0][1] = LOAD(in + 16);\n    s[1][0] = LOAD(in + 32);\n    s[1][1] = LOAD(in + 48);\n    s[2][0] = LOAD(in + 64);\n    s[2][1] = LOAD(in + 80);\n    s[3][0] = LOAD(in + 96);\n    s[3][1] = LOAD(in + 112);\n\n    // Round 1\n    AES2_4x(s[0], s[1], s[2], s[3], ctx->rc);\n\n    MIX2(s[0][0], s[0][1]);\n    MIX2(s[1][0], s[1][1]);\n    MIX2(s[2][0], s[2][1]);\n    MIX2(s[3][0], s[3][1]);\n\n    // Round 2\n    AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 4);\n\n    MIX2(s[0][0], s[0][1]);\n    MIX2(s[1][0], s[1][1]);\n    MIX2(s[2][0], s[2][1]);\n    MIX2(s[3][0], s[3][1]);\n\n    // Round 3\n    AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 8);\n\n    MIX2(s[0][0], s[0][1]);\n    MIX2(s[1][0], s[1][1]);\n    MIX2(s[2][0], s[2][1]);\n    MIX2(s[3][0], s[3][1]);\n\n    // Round 4\n    AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 12);\n\n    MIX2(s[0][0], s[0][1]);\n    MIX2(s[1][0], s[1][1]);\n    MIX2(s[2][0], s[2][1]);\n    MIX2(s[3][0], s[3][1]);\n    \n    // Round 5\n    AES2_4x(s[0], s[1], s[2], s[3], ctx->rc + 16);\n    \n    MIX2(s[0][0], s[0][1]);\n    MIX2(s[1][0], s[1][1]);\n    MIX2(s[2][0], s[2][1]);\n    MIX2(s[3][0], s[3][1]);\n    \n    // Feed Forward\n    s[0][0] = _mm_xor_si128(s[0][0], LOAD(in));\n    s[0][1] = _mm_xor_si128(s[0][1], LOAD(in + 16));\n    s[1][0] = _mm_xor_si128(s[1][0], LOAD(in + 32));\n    s[1][1] = _mm_xor_si128(s[1][1], LOAD(in + 48));\n    s[2][0] = _mm_xor_si128(s[2][0], LOAD(in + 64));\n    s[2][1] = _mm_xor_si128(s[2][1], LOAD(in + 80));\n    s[3][0] = _mm_xor_si128(s[3][0], LOAD(in + 96));\n    s[3][1] = _mm_xor_si128(s[3][1], LOAD(in + 112));\n    \n    STORE(out, s[0][0]);\n    STORE(out + 16, s[0][1]);\n    STORE(out + 32, s[1][0]);\n    STORE(out + 48, s[1][1]);\n    STORE(out + 64, s[2][0]);\n    STORE(out + 80, s[2][1]);\n    STORE(out + 96, s[3][0]);\n    STORE(out + 112, s[3][1]);\n}\n"
  },
  {
    "path": "haraka-aesni/harakax4.h",
    "content": "#ifndef SPX_HARAKAX4_H\n#define SPX_HARAKAX4_H\n\n#include \"context.h\"\n#include \"params.h\"\n\n/* Haraka Sponge */\n#define haraka_Sx4 SPX_NAMESPACE(haraka_Sx4)\nvoid haraka_Sx4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3,\n                unsigned long long outlen,\n                const unsigned char *in0,\n                const unsigned char *in1,\n                const unsigned char *in2,\n                const unsigned char *in3,\n                unsigned long long inlen,\n                const spx_ctx *ctx);\n\n/* Applies the 512-bit Haraka permutation x4 to in. */\n#define haraka512_perm_x4 SPX_NAMESPACE(haraka512_perm_x4)\nvoid haraka512_perm_x4(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx);\n\n/* Implementation of Haraka-512 x4*/\n#define haraka512x4 SPX_NAMESPACE(haraka512x4)\nvoid haraka512x4(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx);\n\n/* Implementation of Haraka-256 x4 */\n#define haraka256x4 SPX_NAMESPACE(haraka256x4)\nvoid haraka256x4(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx);\n\n#endif\n"
  },
  {
    "path": "haraka-aesni/hash_harakax4.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"params.h\"\n#include \"harakax4.h\"\n#include \"hashx4.h\"\n\n/*\n * 4-way parallel version of prf_addr; takes 4x as much input and output\n */\n#define prf_addrx4 SPX_NAMESPACE(prf_addrx4)\nvoid prf_addrx4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3,\n                const spx_ctx *ctx,\n                const uint32_t addrx4[4*8])\n{\n    unsigned char bufx4[4 * 64] = {0};\n    /* Since SPX_N may be smaller than 32, we need temporary buffers. */\n    unsigned char outbuf[4 * 32];\n    unsigned int i;\n\n    for (i = 0; i < 4; i++) {\n        memcpy(bufx4 + i*64, addrx4 + i*8, SPX_ADDR_BYTES);\n        memcpy(bufx4 + i*64 + SPX_ADDR_BYTES, ctx->sk_seed, SPX_N);\n    }\n\n    haraka512x4(outbuf, bufx4, ctx);\n\n    memcpy(out0, outbuf, SPX_N);\n    memcpy(out1, outbuf + 32, SPX_N);\n    memcpy(out2, outbuf + 64, SPX_N);\n    memcpy(out3, outbuf + 96, SPX_N);\n}\n"
  },
  {
    "path": "haraka-aesni/test/benchmark.c",
    "content": "#define _POSIX_C_SOURCE 199309L\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n\n#include \"../api.h\"\n#include \"../fors.h\"\n#include \"../wotsx4.h\"\n#include \"../params.h\"\n#include \"../randombytes.h\"\n\n#define SPX_MLEN 32\n#define NTESTS 10\n\nstatic void wots_gen_pkx4(unsigned char* pk, const spx_ctx *ctx,\n         uint32_t addr[8]);\n\nstatic int cmp_llu(const void *a, const void*b)\n{\n  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;\n  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;\n  return 0;\n}\n\nstatic unsigned long long median(unsigned long long *l, size_t llen)\n{\n  qsort(l,llen,sizeof(unsigned long long),cmp_llu);\n\n  if(llen%2) return l[llen/2];\n  else return (l[llen/2-1]+l[llen/2])/2;\n}\n\nstatic void delta(unsigned long long *l, size_t llen)\n{\n    unsigned int i;\n    for(i = 0; i < llen - 1; i++) {\n        l[i] = l[i+1] - l[i];\n    }\n}\n\nstatic unsigned long long cpucycles(void)\n{\n  unsigned long long result;\n  __asm volatile(\".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax\"\n    : \"=a\" (result) ::  \"%rdx\");\n  return result;\n}\n\nstatic void printfcomma (unsigned long long n)\n{\n    if (n < 1000) {\n        printf(\"%llu\", n);\n        return;\n    }\n    printfcomma(n / 1000);\n    printf (\",%03llu\", n % 1000);\n}\n\nstatic void printfalignedcomma (unsigned long long n, int len)\n{\n    unsigned long long ncopy = n;\n    int i = 0;\n\n    while (ncopy > 9) {\n        len -= 1;\n        ncopy /= 10;\n        i += 1;  // to account for commas\n    }\n    i = i/3 - 1;  // to account for commas\n    for (; i < len; i++) {\n        printf(\" \");\n    }\n    printfcomma(n);\n}\n\nstatic void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul)\n{\n    unsigned long long med;\n\n    result /= NTESTS;\n    delta(l, NTESTS + 1);\n    med = median(l, llen);\n    printf(\"avg. %11.2lf us (%2.2lf sec); median \", result, result / 1e6);\n    printfalignedcomma(med, 12);\n    printf(\" cycles,  %5llux: \", mul);\n    printfalignedcomma(mul*med, 12);\n    printf(\" cycles\\n\");\n}\n\n#define MEASURE(TEXT, MUL, FNCALL)\\\n    printf(TEXT);\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\\\n    for(i = 0; i < NTESTS; i++) {\\\n        t[i] = cpucycles();\\\n        FNCALL;\\\n    }\\\n    t[NTESTS] = cpucycles();\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\\\n    result = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) / 1e3;\\\n    display_result(result, t, NTESTS, MUL);\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    spx_ctx ctx;\n    unsigned char pk[SPX_PK_BYTES];\n    unsigned char sk[SPX_SK_BYTES];\n    unsigned char *m = malloc(SPX_MLEN);\n    unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN);\n\n    unsigned char fors_pk[SPX_FORS_PK_BYTES];\n    unsigned char fors_m[SPX_FORS_MSG_BYTES];\n    unsigned char fors_sig[SPX_FORS_BYTES];\n    unsigned char addr[SPX_ADDR_BYTES];\n\n    unsigned char wots_pk[SPX_WOTS_PK_BYTES];\n\n    unsigned long long smlen;\n    unsigned long long mlen;\n    unsigned long long t[NTESTS+1];\n    struct timespec start, stop;\n    double result;\n    int i;\n\n    randombytes(m, SPX_MLEN);\n    randombytes(addr, SPX_ADDR_BYTES);\n\n    printf(\"Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\\n\",\n           SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES,\n           SPX_WOTS_W);\n\n    printf(\"Running %d iterations.\\n\", NTESTS);\n\n    MEASURE(\"Generating keypair.. \", 1, crypto_sign_keypair(pk, sk));\n    MEASURE(\"  - WOTS pk gen..    \", (1 << SPX_TREE_HEIGHT), wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Signing..            \", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk));\n    MEASURE(\"  - FORS signing..   \", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr));\n    MEASURE(\"  - WOTS pk gen..    \", SPX_D * (1 << SPX_TREE_HEIGHT), wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Verifying..          \", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk));\n\n    printf(\"Signature size: %d (%.2f KiB)\\n\", SPX_BYTES, SPX_BYTES / 1024.0);\n    printf(\"Public key size: %d (%.2f KiB)\\n\", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0);\n    printf(\"Secret key size: %d (%.2f KiB)\\n\", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0);\n\n    free(m);\n    free(sm);\n    free(mout);\n\n    return 0;\n}\n\nstatic void wots_gen_pkx4(unsigned char *pk, const spx_ctx *ctx,\n                 uint32_t addr[8]) {\n    struct leaf_info_x4 leaf;\n    unsigned steps[ SPX_WOTS_LEN ] = { 0 };\n    INITIALIZE_LEAF_INFO_X4(leaf, addr, steps);\n    wots_gen_leafx4(pk, ctx, 0, &leaf);\n}\n"
  },
  {
    "path": "haraka-aesni/thash_haraka_robustx4.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thashx4.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"harakax4.h\"\n\n/**\n * 4-way parallel version of thash; takes 4x as much input and output\n */\n#define thashx4 SPX_NAMESPACE(thashx4)\nvoid thashx4(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx4[4*8])\n{\n    SPX_VLA(unsigned char, buf0, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, buf1, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, buf2, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, buf3, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, bitmask0, inblocks * SPX_N);\n    SPX_VLA(unsigned char, bitmask1, inblocks * SPX_N);\n    SPX_VLA(unsigned char, bitmask2, inblocks * SPX_N);\n    SPX_VLA(unsigned char, bitmask3, inblocks * SPX_N);\n    unsigned char outbuf[32 * 4];\n    unsigned char buf_tmp[64 * 4];\n    unsigned int i;\n\n    if (inblocks == 1) {\n        memset(buf_tmp, 0, 64 * 4);\n\n        // Generate masks first in buffer\n        memcpy(buf_tmp,      addrx4 + 0*8, 32);\n        memcpy(buf_tmp + 32, addrx4 + 1*8, 32);\n        memcpy(buf_tmp + 64, addrx4 + 2*8, 32);\n        memcpy(buf_tmp + 96, addrx4 + 3*8, 32);\n\n        haraka256x4(outbuf, buf_tmp, ctx);\n\n        /* move addresses to make room for inputs; zero old values */\n        memcpy(buf_tmp + 192, buf_tmp + 96, SPX_ADDR_BYTES);\n        memcpy(buf_tmp + 128, buf_tmp + 64, SPX_ADDR_BYTES);\n        memcpy(buf_tmp + 64,  buf_tmp + 32, SPX_ADDR_BYTES);\n        /* skip memcpy(buf_tmp, buf_tmp, SPX_ADDR_BYTES); already in place */\n\n        /* skip memset(buf_tmp, 0, SPX_ADDR_BYTES); remained untouched */\n        memset(buf_tmp + 32, 0, SPX_ADDR_BYTES);\n        /* skip memset(buf_tmp + 64, 0, SPX_ADDR_BYTES); contains addr1 */\n        memset(buf_tmp + 96, 0, SPX_ADDR_BYTES);\n\n        for (i = 0; i < SPX_N; i++) {\n            buf_tmp[SPX_ADDR_BYTES + i]       = in0[i] ^ outbuf[i];\n            buf_tmp[SPX_ADDR_BYTES + i + 64]  = in1[i] ^ outbuf[i + 32];\n            buf_tmp[SPX_ADDR_BYTES + i + 128] = in2[i] ^ outbuf[i + 64];\n            buf_tmp[SPX_ADDR_BYTES + i + 192] = in3[i] ^ outbuf[i + 96];\n        }\n\n        haraka512x4(outbuf, buf_tmp, ctx);\n\n        memcpy(out0, outbuf,      SPX_N);\n        memcpy(out1, outbuf + 32, SPX_N);\n        memcpy(out2, outbuf + 64, SPX_N);\n        memcpy(out3, outbuf + 96, SPX_N);\n    } else {\n        /* All other tweakable hashes*/\n        memcpy(buf0, addrx4 + 0*8, 32);\n        memcpy(buf1, addrx4 + 1*8, 32);\n        memcpy(buf2, addrx4 + 2*8, 32);\n        memcpy(buf3, addrx4 + 3*8, 32);\n\n        haraka_Sx4(bitmask0, bitmask1, bitmask2, bitmask3, inblocks * SPX_N,\n                   buf0, buf1, buf2, buf3, SPX_ADDR_BYTES, ctx);\n\n        for (i = 0; i < inblocks * SPX_N; i++) {\n            buf0[SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i];\n            buf1[SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i];\n            buf2[SPX_ADDR_BYTES + i] = in2[i] ^ bitmask2[i];\n            buf3[SPX_ADDR_BYTES + i] = in3[i] ^ bitmask3[i];\n        }\n\n        haraka_Sx4(out0, out1, out2, out3, SPX_N,\n                   buf0, buf1, buf2, buf3, SPX_ADDR_BYTES + inblocks*SPX_N,\n                   ctx);\n    }\n}\n"
  },
  {
    "path": "haraka-aesni/thash_haraka_simplex4.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thashx4.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"harakax4.h\"\n\n/**\n * 4-way parallel version of thash; takes 4x as much input and output\n */\n#define thashx4 SPX_NAMESPACE(thashx4)\nvoid thashx4(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx4[4*8])\n{\n    SPX_VLA(unsigned char, buf0, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, buf1, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, buf2, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(unsigned char, buf3, SPX_ADDR_BYTES + inblocks*SPX_N);\n    unsigned char outbuf[32 * 4];\n    unsigned char buf_tmp[64 * 4];\n\n    if (inblocks == 1) {\n        memset(buf_tmp, 0, 64 * 4);\n\n        memcpy(buf_tmp,       addrx4 + 0*8, 32);\n        memcpy(buf_tmp + 64,  addrx4 + 1*8, 32);\n        memcpy(buf_tmp + 128, addrx4 + 2*8, 32);\n        memcpy(buf_tmp + 192, addrx4 + 3*8, 32);\n\n        memcpy(buf_tmp + SPX_ADDR_BYTES,       in0, SPX_N);\n        memcpy(buf_tmp + SPX_ADDR_BYTES + 64,  in1, SPX_N);\n        memcpy(buf_tmp + SPX_ADDR_BYTES + 128, in2, SPX_N);\n        memcpy(buf_tmp + SPX_ADDR_BYTES + 192, in3, SPX_N);\n\n        haraka512x4(outbuf, buf_tmp, ctx);\n\n        memcpy(out0, outbuf,      SPX_N);\n        memcpy(out1, outbuf + 32, SPX_N);\n        memcpy(out2, outbuf + 64, SPX_N);\n        memcpy(out3, outbuf + 96, SPX_N);\n    } else {\n        /* All other tweakable hashes*/\n        memcpy(buf0, addrx4 + 0*8, 32);\n        memcpy(buf1, addrx4 + 1*8, 32);\n        memcpy(buf2, addrx4 + 2*8, 32);\n        memcpy(buf3, addrx4 + 3*8, 32);\n\n        memcpy(buf0 + SPX_ADDR_BYTES, in0, inblocks * SPX_N);\n        memcpy(buf1 + SPX_ADDR_BYTES, in1, inblocks * SPX_N);\n        memcpy(buf2 + SPX_ADDR_BYTES, in2, inblocks * SPX_N);\n        memcpy(buf3 + SPX_ADDR_BYTES, in3, inblocks * SPX_N);\n\n        haraka_Sx4(out0, out1, out2, out3, SPX_N,\n                   buf0, buf1, buf2, buf3, SPX_ADDR_BYTES + inblocks*SPX_N,\n                   ctx);\n    }\n}\n"
  },
  {
    "path": "ref/.gitignore",
    "content": "test/*\n!test/*.c\nPQCsignKAT_*.rsp\nPQCsignKAT_*.req\nPQCgenKAT_sign\n"
  },
  {
    "path": "ref/Makefile",
    "content": "PARAMS = sphincs-haraka-128f\nTHASH = robust\n\nCC=/usr/bin/gcc\nCFLAGS=-Wall -Wextra -Wpedantic -O3 -std=c99 -Wconversion -Wmissing-prototypes -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS)\n\nSOURCES =          address.c randombytes.c merkle.c wots.c wotsx1.c utils.c utilsx1.c fors.c sign.c\nHEADERS = params.h address.h randombytes.h merkle.h wots.h wotsx1.h utils.h utilsx1.h fors.h api.h  hash.h thash.h\n\nifneq (,$(findstring shake,$(PARAMS)))\n\tSOURCES += fips202.c hash_shake.c thash_shake_$(THASH).c\n\tHEADERS += fips202.h\nendif\nifneq (,$(findstring haraka,$(PARAMS)))\n\tSOURCES += haraka.c hash_haraka.c thash_haraka_$(THASH).c\n\tHEADERS += haraka.h\nendif\nifneq (,$(findstring sha2,$(PARAMS)))\n\tSOURCES += sha2.c hash_sha2.c thash_sha2_$(THASH).c\n\tHEADERS += sha2.h\nendif\n\nDET_SOURCES = $(SOURCES:randombytes.%=rng.%)\nDET_HEADERS = $(HEADERS:randombytes.%=rng.%)\n\nTESTS =         test/fors \\\n\t\ttest/spx \\\n\nBENCHMARK = test/benchmark\n\n.PHONY: clean test benchmark\n\ndefault: PQCgenKAT_sign\n\nall: PQCgenKAT_sign tests benchmarks\n\ntests: $(TESTS)\n\ntest: $(TESTS:=.exec)\n\nbenchmarks: $(BENCHMARK)\n\nbenchmark: $(BENCHMARK:=.exec)\n\nPQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto\n\ntest/benchmark: test/benchmark.c test/cycles.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ test/cycles.c $(SOURCES) $< $(LDLIBS)\n\ntest/%: test/%.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS)\n\ntest/haraka: test/haraka.c $(filter-out haraka.c,$(SOURCES)) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(filter-out haraka.c,$(SOURCES)) $< $(LDLIBS)\n\ntest/%.exec: test/%\n\t@$<\n\nclean:\n\t-$(RM) $(TESTS)\n\t-$(RM) $(BENCHMARK)\n\t-$(RM) PQCgenKAT_sign\n\t-$(RM) PQCsignKAT_*.rsp\n\t-$(RM) PQCsignKAT_*.req\n"
  },
  {
    "path": "ref/PQCgenKAT_sign.c",
    "content": "\n//\n//  PQCgenKAT_sign.c\n//\n//  Created by Bassham, Lawrence E (Fed) on 8/29/17.\n//  Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved.\n//\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <ctype.h>\n#include \"rng.h\"\n#include \"api.h\"\n\n#define\tMAX_MARKER_LEN\t\t50\n\n#define KAT_SUCCESS          0\n#define KAT_FILE_OPEN_ERROR -1\n#define KAT_DATA_ERROR      -3\n#define KAT_CRYPTO_FAILURE  -4\n\nint\t\tFindMarker(FILE *infile, const char *marker);\nint\t\tReadHex(FILE *infile, unsigned char *A, int Length, char *str);\nvoid\tfprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L);\n\nchar    AlgName[] = \"My Alg Name\";\n\nint\nmain(void)\n{\n    char                fn_req[32], fn_rsp[32];\n    FILE                *fp_req, *fp_rsp;\n    unsigned char       seed[48];\n    unsigned char       msg[3300];\n    unsigned char       entropy_input[48];\n    unsigned char       *m, *sm, *m1;\n    unsigned long long  mlen, smlen, mlen1;\n    int                 count;\n    int                 done;\n    unsigned char       pk[CRYPTO_PUBLICKEYBYTES], sk[CRYPTO_SECRETKEYBYTES];\n    int                 ret_val;\n\n    // Create the REQUEST file\n    sprintf(fn_req, \"PQCsignKAT_%d.req\", CRYPTO_SECRETKEYBYTES);\n    if ( (fp_req = fopen(fn_req, \"w\")) == NULL ) {\n        printf(\"Couldn't open <%s> for write\\n\", fn_req);\n        return KAT_FILE_OPEN_ERROR;\n    }\n    sprintf(fn_rsp, \"PQCsignKAT_%d.rsp\", CRYPTO_SECRETKEYBYTES);\n    if ( (fp_rsp = fopen(fn_rsp, \"w\")) == NULL ) {\n        printf(\"Couldn't open <%s> for write\\n\", fn_rsp);\n        return KAT_FILE_OPEN_ERROR;\n    }\n\n    for (int i=0; i<48; i++)\n        entropy_input[i] = (unsigned char)i;\n\n    randombytes_init(entropy_input, NULL);\n    for (int i=0; i<100; i++) {\n        fprintf(fp_req, \"count = %d\\n\", i);\n        randombytes(seed, 48);\n        fprintBstr(fp_req, \"seed = \", seed, 48);\n        mlen = (unsigned long long int)(33*(i+1));\n        fprintf(fp_req, \"mlen = %llu\\n\", mlen);\n        randombytes(msg, mlen);\n        fprintBstr(fp_req, \"msg = \", msg, mlen);\n        fprintf(fp_req, \"pk =\\n\");\n        fprintf(fp_req, \"sk =\\n\");\n        fprintf(fp_req, \"smlen =\\n\");\n        fprintf(fp_req, \"sm =\\n\\n\");\n    }\n    fclose(fp_req);\n\n    //Create the RESPONSE file based on what's in the REQUEST file\n    if ( (fp_req = fopen(fn_req, \"r\")) == NULL ) {\n        printf(\"Couldn't open <%s> for read\\n\", fn_req);\n        return KAT_FILE_OPEN_ERROR;\n    }\n\n    fprintf(fp_rsp, \"# %s\\n\\n\", CRYPTO_ALGNAME);\n    done = 0;\n    do {\n        if ( FindMarker(fp_req, \"count = \") )\n            ret_val = fscanf(fp_req, \"%d\", &count);\n        else {\n            done = 1;\n            break;\n        }\n        fprintf(fp_rsp, \"count = %d\\n\", count);\n\n        if ( !ReadHex(fp_req, seed, 48, \"seed = \") ) {\n            printf(\"ERROR: unable to read 'seed' from <%s>\\n\", fn_req);\n            return KAT_DATA_ERROR;\n        }\n        fprintBstr(fp_rsp, \"seed = \", seed, 48);\n\n        randombytes_init(seed, NULL);\n\n        if ( FindMarker(fp_req, \"mlen = \") )\n            ret_val = fscanf(fp_req, \"%llu\", &mlen);\n        else {\n            printf(\"ERROR: unable to read 'mlen' from <%s>\\n\", fn_req);\n            return KAT_DATA_ERROR;\n        }\n        fprintf(fp_rsp, \"mlen = %llu\\n\", mlen);\n\n        m = (unsigned char *)calloc(mlen, sizeof(unsigned char));\n        m1 = (unsigned char *)calloc(mlen+CRYPTO_BYTES, sizeof(unsigned char));\n        sm = (unsigned char *)calloc(mlen+CRYPTO_BYTES, sizeof(unsigned char));\n\n        if ( !ReadHex(fp_req, m, (int)mlen, \"msg = \") ) {\n            printf(\"ERROR: unable to read 'msg' from <%s>\\n\", fn_req);\n            return KAT_DATA_ERROR;\n        }\n        fprintBstr(fp_rsp, \"msg = \", m, mlen);\n\n        // Generate the public/private keypair\n        if ( (ret_val = crypto_sign_keypair(pk, sk)) != 0) {\n            printf(\"crypto_sign_keypair returned <%d>\\n\", ret_val);\n            return KAT_CRYPTO_FAILURE;\n        }\n        fprintBstr(fp_rsp, \"pk = \", pk, CRYPTO_PUBLICKEYBYTES);\n        fprintBstr(fp_rsp, \"sk = \", sk, CRYPTO_SECRETKEYBYTES);\n\n        if ( (ret_val = crypto_sign(sm, &smlen, m, mlen, sk)) != 0) {\n            printf(\"crypto_sign returned <%d>\\n\", ret_val);\n            return KAT_CRYPTO_FAILURE;\n        }\n        fprintf(fp_rsp, \"smlen = %llu\\n\", smlen);\n        fprintBstr(fp_rsp, \"sm = \", sm, smlen);\n        fprintf(fp_rsp, \"\\n\");\n\n        if ( (ret_val = crypto_sign_open(m1, &mlen1, sm, smlen, pk)) != 0) {\n            printf(\"crypto_sign_open returned <%d>\\n\", ret_val);\n            return KAT_CRYPTO_FAILURE;\n        }\n\n        if ( mlen != mlen1 ) {\n            printf(\"crypto_sign_open returned bad 'mlen': Got <%llu>, expected <%llu>\\n\", mlen1, mlen);\n            return KAT_CRYPTO_FAILURE;\n        }\n\n        if ( memcmp(m, m1, mlen) ) {\n            printf(\"crypto_sign_open returned bad 'm' value\\n\");\n            return KAT_CRYPTO_FAILURE;\n        }\n\n        free(m);\n        free(m1);\n        free(sm);\n\n    } while ( !done );\n\n    fclose(fp_req);\n    fclose(fp_rsp);\n\n    return KAT_SUCCESS;\n}\n\n//\n// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.)\n//\nint\nFindMarker(FILE *infile, const char *marker)\n{\n\tchar\tline[MAX_MARKER_LEN];\n\tsize_t\t\ti, len;\n\tint curr_line;\n\n\tlen = strlen(marker);\n\tif ( len > MAX_MARKER_LEN-1 )\n\t\tlen = MAX_MARKER_LEN-1;\n\n\tfor ( i=0; i<len; i++ )\n\t  {\n\t    curr_line = fgetc(infile);\n\t    line[i] = (char)curr_line;\n\t    if (curr_line == EOF )\n\t      return 0;\n\t  }\n\tline[len] = '\\0';\n\n\twhile ( 1 ) {\n\t\tif ( !strncmp(line, marker, len) )\n\t\t\treturn 1;\n\n\t\tfor ( i=0; i<len-1; i++ )\n\t\t\tline[i] = line[i+1];\n\t\tcurr_line = fgetc(infile);\n\t\tline[len-1] = (char)curr_line;\n\t\tif (curr_line == EOF )\n\t\t    return 0;\n\t\tline[len] = '\\0';\n\t}\n\n\t// shouldn't get here\n\treturn 0;\n}\n\n//\n// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.)\n//\nint\nReadHex(FILE *infile, unsigned char *A, int Length, char *str)\n{\n\tint\t\t\ti, ch, started;\n\tunsigned char\tich;\n\n\tif ( Length == 0 ) {\n\t\tA[0] = 0x00;\n\t\treturn 1;\n\t}\n\tmemset(A, 0x00, (size_t)Length);\n\tstarted = 0;\n\tif ( FindMarker(infile, str) )\n\t\twhile ( (ch = fgetc(infile)) != EOF ) {\n\t\t\tif ( !isxdigit(ch) ) {\n\t\t\t\tif ( !started ) {\n\t\t\t\t\tif ( ch == '\\n' )\n\t\t\t\t\t\tbreak;\n\t\t\t\t\telse\n\t\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t\tstarted = 1;\n\t\t\tif ( (ch >= '0') && (ch <= '9') )\n\t\t\t\tich = (unsigned char)(ch - '0');\n\t\t\telse if ( (ch >= 'A') && (ch <= 'F') )\n\t\t\t\tich = (unsigned char)(ch - 'A' + 10);\n\t\t\telse if ( (ch >= 'a') && (ch <= 'f') )\n\t\t\t\tich = (unsigned char)(ch - 'a' + 10);\n            else // shouldn't ever get here\n                ich = 0;\n\n\t\t\tfor ( i=0; i<Length-1; i++ )\n\t\t\t\tA[i] = (unsigned char)((A[i] << 4) | (A[i+1] >> 4));\n\t\t\tA[Length-1] = (unsigned char)((A[Length-1] << 4) | ich);\n\t\t}\n\telse\n\t\treturn 0;\n\n\treturn 1;\n}\n\nvoid\nfprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L)\n{\n\tunsigned long long  i;\n\n\tfprintf(fp, \"%s\", S);\n\n\tfor ( i=0; i<L; i++ )\n\t\tfprintf(fp, \"%02X\", A[i]);\n\n\tif ( L == 0 )\n\t\tfprintf(fp, \"00\");\n\n\tfprintf(fp, \"\\n\");\n}\n\n"
  },
  {
    "path": "ref/address.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n/*\n * Specify which level of Merkle tree (the \"layer\") we're working on\n */\nvoid set_layer_addr(uint32_t addr[8], uint32_t layer)\n{\n    ((unsigned char *)addr)[SPX_OFFSET_LAYER] = (unsigned char)layer;\n}\n\n/*\n * Specify which Merkle tree within the level (the \"tree address\") we're working on\n */\nvoid set_tree_addr(uint32_t addr[8], uint64_t tree)\n{\n#if (SPX_TREE_HEIGHT * (SPX_D - 1)) > 64\n    #error Subtree addressing is currently limited to at most 2^64 trees\n#endif\n    ull_to_bytes(&((unsigned char *)addr)[SPX_OFFSET_TREE], 8, tree );\n}\n\n/*\n * Specify the reason we'll use this address structure for, that is, what\n * hash will we compute with it.  This is used so that unrelated types of\n * hashes don't accidentally get the same address structure.  The type will be\n * one of the SPX_ADDR_TYPE constants\n */\nvoid set_type(uint32_t addr[8], uint32_t type)\n{\n    ((unsigned char *)addr)[SPX_OFFSET_TYPE] = (unsigned char)type;\n}\n\n/*\n * Copy the layer and tree fields of the address structure.  This is used\n * when we're doing multiple types of hashes within the same Merkle tree\n */\nvoid copy_subtree_addr(uint32_t out[8], const uint32_t in[8])\n{\n    memcpy( out, in, SPX_OFFSET_TREE+8 );\n}\n\n/* These functions are used for OTS addresses. */\n\n/*\n * Specify which Merkle leaf we're working on; that is, which OTS keypair\n * we're talking about.\n */\nvoid set_keypair_addr(uint32_t addr[8], uint32_t keypair)\n{\n    u32_to_bytes(&((unsigned char *)addr)[SPX_OFFSET_KP_ADDR], keypair);\n}\n\n/*\n * Copy the layer, tree and keypair fields of the address structure.  This is\n * used when we're doing multiple things within the same OTS keypair\n */\nvoid copy_keypair_addr(uint32_t out[8], const uint32_t in[8])\n{\n    memcpy( out, in, SPX_OFFSET_TREE+8 );\n    memcpy( (unsigned char *)out + SPX_OFFSET_KP_ADDR, (unsigned char *)in + SPX_OFFSET_KP_ADDR, 4); \n}\n\n/*\n * Specify which Merkle chain within the OTS we're working with\n * (the chain address)\n */\nvoid set_chain_addr(uint32_t addr[8], uint32_t chain)\n{\n    ((unsigned char *)addr)[SPX_OFFSET_CHAIN_ADDR] = (unsigned char)chain;\n}\n\n/*\n * Specify where in the Merkle chain we are\n* (the hash address)\n */\nvoid set_hash_addr(uint32_t addr[8], uint32_t hash)\n{\n    ((unsigned char *)addr)[SPX_OFFSET_HASH_ADDR] = (unsigned char)hash;\n}\n\n/* These functions are used for all hash tree addresses (including FORS). */\n\n/*\n * Specify the height of the node in the Merkle/FORS tree we are in\n * (the tree height)\n */\nvoid set_tree_height(uint32_t addr[8], uint32_t tree_height)\n{\n    ((unsigned char *)addr)[SPX_OFFSET_TREE_HGT] = (unsigned char)tree_height;\n}\n\n/*\n * Specify the distance from the left edge of the node in the Merkle/FORS tree\n * (the tree index)\n */\nvoid set_tree_index(uint32_t addr[8], uint32_t tree_index)\n{\n    u32_to_bytes(&((unsigned char *)addr)[SPX_OFFSET_TREE_INDEX], tree_index );\n}\n"
  },
  {
    "path": "ref/address.h",
    "content": "#ifndef SPX_ADDRESS_H\n#define SPX_ADDRESS_H\n\n#include <stdint.h>\n#include \"params.h\"\n\n/* The hash types that are passed to set_type */\n#define SPX_ADDR_TYPE_WOTS 0\n#define SPX_ADDR_TYPE_WOTSPK 1\n#define SPX_ADDR_TYPE_HASHTREE 2\n#define SPX_ADDR_TYPE_FORSTREE 3\n#define SPX_ADDR_TYPE_FORSPK 4\n#define SPX_ADDR_TYPE_WOTSPRF 5\n#define SPX_ADDR_TYPE_FORSPRF 6\n\n#define set_layer_addr SPX_NAMESPACE(set_layer_addr)\nvoid set_layer_addr(uint32_t addr[8], uint32_t layer);\n\n#define set_tree_addr SPX_NAMESPACE(set_tree_addr)\nvoid set_tree_addr(uint32_t addr[8], uint64_t tree);\n\n#define set_type SPX_NAMESPACE(set_type)\nvoid set_type(uint32_t addr[8], uint32_t type);\n\n/* Copies the layer and tree part of one address into the other */\n#define copy_subtree_addr SPX_NAMESPACE(copy_subtree_addr)\nvoid copy_subtree_addr(uint32_t out[8], const uint32_t in[8]);\n\n/* These functions are used for WOTS and FORS addresses. */\n\n#define set_keypair_addr SPX_NAMESPACE(set_keypair_addr)\nvoid set_keypair_addr(uint32_t addr[8], uint32_t keypair);\n\n#define set_chain_addr SPX_NAMESPACE(set_chain_addr)\nvoid set_chain_addr(uint32_t addr[8], uint32_t chain);\n\n#define set_hash_addr SPX_NAMESPACE(set_hash_addr)\nvoid set_hash_addr(uint32_t addr[8], uint32_t hash);\n\n#define copy_keypair_addr SPX_NAMESPACE(copy_keypair_addr)\nvoid copy_keypair_addr(uint32_t out[8], const uint32_t in[8]);\n\n/* These functions are used for all hash tree addresses (including FORS). */\n\n#define set_tree_height SPX_NAMESPACE(set_tree_height)\nvoid set_tree_height(uint32_t addr[8], uint32_t tree_height);\n\n#define set_tree_index SPX_NAMESPACE(set_tree_index)\nvoid set_tree_index(uint32_t addr[8], uint32_t tree_index);\n\n#endif\n"
  },
  {
    "path": "ref/api.h",
    "content": "#ifndef SPX_API_H\n#define SPX_API_H\n\n#include <stddef.h>\n#include <stdint.h>\n\n#include \"params.h\"\n\n#define CRYPTO_ALGNAME \"SPHINCS+\"\n\n#define CRYPTO_SECRETKEYBYTES SPX_SK_BYTES\n#define CRYPTO_PUBLICKEYBYTES SPX_PK_BYTES\n#define CRYPTO_BYTES SPX_BYTES\n#define CRYPTO_SEEDBYTES 3*SPX_N\n\n/*\n * Returns the length of a secret key, in bytes\n */\nunsigned long long crypto_sign_secretkeybytes(void);\n\n/*\n * Returns the length of a public key, in bytes\n */\nunsigned long long crypto_sign_publickeybytes(void);\n\n/*\n * Returns the length of a signature, in bytes\n */\nunsigned long long crypto_sign_bytes(void);\n\n/*\n * Returns the length of the seed required to generate a key pair, in bytes\n */\nunsigned long long crypto_sign_seedbytes(void);\n\n/*\n * Generates a SPHINCS+ key pair given a seed.\n * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root]\n * Format pk: [root || PUB_SEED]\n */\nint crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk,\n                             const unsigned char *seed);\n\n/*\n * Generates a SPHINCS+ key pair.\n * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root]\n * Format pk: [root || PUB_SEED]\n */\nint crypto_sign_keypair(unsigned char *pk, unsigned char *sk);\n\n/**\n * Returns an array containing a detached signature.\n */\nint crypto_sign_signature(uint8_t *sig, size_t *siglen,\n                          const uint8_t *m, size_t mlen, const uint8_t *sk);\n\n/**\n * Verifies a detached signature and message under a given public key.\n */\nint crypto_sign_verify(const uint8_t *sig, size_t siglen,\n                       const uint8_t *m, size_t mlen, const uint8_t *pk);\n\n/**\n * Returns an array containing the signature followed by the message.\n */\nint crypto_sign(unsigned char *sm, unsigned long long *smlen,\n                const unsigned char *m, unsigned long long mlen,\n                const unsigned char *sk);\n\n/**\n * Verifies a given signature-message pair under a given public key.\n */\nint crypto_sign_open(unsigned char *m, unsigned long long *mlen,\n                     const unsigned char *sm, unsigned long long smlen,\n                     const unsigned char *pk);\n\n#endif\n"
  },
  {
    "path": "ref/context.h",
    "content": "#ifndef SPX_CONTEXT_H\n#define SPX_CONTEXT_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n\ntypedef struct {\n    uint8_t pub_seed[SPX_N];\n    uint8_t sk_seed[SPX_N];\n\n#ifdef SPX_SHA2\n    // sha256 state that absorbed pub_seed\n    uint8_t state_seeded[40];\n\n# if SPX_SHA512\n    // sha512 state that absorbed pub_seed\n    uint8_t state_seeded_512[72];\n# endif\n#endif\n\n#ifdef SPX_HARAKA\n    uint64_t tweaked512_rc64[10][8];\n    uint32_t tweaked256_rc32[10][8];\n#endif\n} spx_ctx;\n\n#endif\n"
  },
  {
    "path": "ref/fips202.c",
    "content": "/* Based on the public domain implementation in\n * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html\n * by Ronny Van Keer\n * and the public domain \"TweetFips202\" implementation\n * from https://twitter.com/tweetfips202\n * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */\n\n#include <stddef.h>\n#include <stdint.h>\n\n#include \"fips202.h\"\n\n#define NROUNDS 24\n#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset))))\n\n/*************************************************\n * Name:        load64\n *\n * Description: Load 8 bytes into uint64_t in little-endian order\n *\n * Arguments:   - const uint8_t *x: pointer to input byte array\n *\n * Returns the loaded 64-bit unsigned integer\n **************************************************/\nstatic uint64_t load64(const uint8_t *x) {\n    uint64_t r = 0;\n    for (size_t i = 0; i < 8; ++i) {\n        r |= (uint64_t)x[i] << 8 * i;\n    }\n\n    return r;\n}\n\n/*************************************************\n * Name:        store64\n *\n * Description: Store a 64-bit integer to a byte array in little-endian order\n *\n * Arguments:   - uint8_t *x: pointer to the output byte array\n *              - uint64_t u: input 64-bit unsigned integer\n **************************************************/\nstatic void store64(uint8_t *x, uint64_t u) {\n    for (size_t i = 0; i < 8; ++i) {\n        x[i] = (uint8_t) (u >> 8 * i);\n    }\n}\n\n/* Keccak round constants */\nstatic const uint64_t KeccakF_RoundConstants[NROUNDS] = {\n    0x0000000000000001ULL, 0x0000000000008082ULL,\n    0x800000000000808aULL, 0x8000000080008000ULL,\n    0x000000000000808bULL, 0x0000000080000001ULL,\n    0x8000000080008081ULL, 0x8000000000008009ULL,\n    0x000000000000008aULL, 0x0000000000000088ULL,\n    0x0000000080008009ULL, 0x000000008000000aULL,\n    0x000000008000808bULL, 0x800000000000008bULL,\n    0x8000000000008089ULL, 0x8000000000008003ULL,\n    0x8000000000008002ULL, 0x8000000000000080ULL,\n    0x000000000000800aULL, 0x800000008000000aULL,\n    0x8000000080008081ULL, 0x8000000000008080ULL,\n    0x0000000080000001ULL, 0x8000000080008008ULL\n};\n\n/*************************************************\n * Name:        KeccakF1600_StatePermute\n *\n * Description: The Keccak F1600 Permutation\n *\n * Arguments:   - uint64_t *state: pointer to input/output Keccak state\n **************************************************/\nstatic void KeccakF1600_StatePermute(uint64_t *state) {\n    int round;\n\n    uint64_t Aba, Abe, Abi, Abo, Abu;\n    uint64_t Aga, Age, Agi, Ago, Agu;\n    uint64_t Aka, Ake, Aki, Ako, Aku;\n    uint64_t Ama, Ame, Ami, Amo, Amu;\n    uint64_t Asa, Ase, Asi, Aso, Asu;\n    uint64_t BCa, BCe, BCi, BCo, BCu;\n    uint64_t Da, De, Di, Do, Du;\n    uint64_t Eba, Ebe, Ebi, Ebo, Ebu;\n    uint64_t Ega, Ege, Egi, Ego, Egu;\n    uint64_t Eka, Eke, Eki, Eko, Eku;\n    uint64_t Ema, Eme, Emi, Emo, Emu;\n    uint64_t Esa, Ese, Esi, Eso, Esu;\n\n    // copyFromState(A, state)\n    Aba = state[0];\n    Abe = state[1];\n    Abi = state[2];\n    Abo = state[3];\n    Abu = state[4];\n    Aga = state[5];\n    Age = state[6];\n    Agi = state[7];\n    Ago = state[8];\n    Agu = state[9];\n    Aka = state[10];\n    Ake = state[11];\n    Aki = state[12];\n    Ako = state[13];\n    Aku = state[14];\n    Ama = state[15];\n    Ame = state[16];\n    Ami = state[17];\n    Amo = state[18];\n    Amu = state[19];\n    Asa = state[20];\n    Ase = state[21];\n    Asi = state[22];\n    Aso = state[23];\n    Asu = state[24];\n\n    for (round = 0; round < NROUNDS; round += 2) {\n        //    prepareTheta\n        BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa;\n        BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase;\n        BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi;\n        BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso;\n        BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu;\n\n        // thetaRhoPiChiIotaPrepareTheta(round  , A, E)\n        Da = BCu ^ ROL(BCe, 1);\n        De = BCa ^ ROL(BCi, 1);\n        Di = BCe ^ ROL(BCo, 1);\n        Do = BCi ^ ROL(BCu, 1);\n        Du = BCo ^ ROL(BCa, 1);\n\n        Aba ^= Da;\n        BCa = Aba;\n        Age ^= De;\n        BCe = ROL(Age, 44);\n        Aki ^= Di;\n        BCi = ROL(Aki, 43);\n        Amo ^= Do;\n        BCo = ROL(Amo, 21);\n        Asu ^= Du;\n        BCu = ROL(Asu, 14);\n        Eba = BCa ^ ((~BCe) & BCi);\n        Eba ^= KeccakF_RoundConstants[round];\n        Ebe = BCe ^ ((~BCi) & BCo);\n        Ebi = BCi ^ ((~BCo) & BCu);\n        Ebo = BCo ^ ((~BCu) & BCa);\n        Ebu = BCu ^ ((~BCa) & BCe);\n\n        Abo ^= Do;\n        BCa = ROL(Abo, 28);\n        Agu ^= Du;\n        BCe = ROL(Agu, 20);\n        Aka ^= Da;\n        BCi = ROL(Aka, 3);\n        Ame ^= De;\n        BCo = ROL(Ame, 45);\n        Asi ^= Di;\n        BCu = ROL(Asi, 61);\n        Ega = BCa ^ ((~BCe) & BCi);\n        Ege = BCe ^ ((~BCi) & BCo);\n        Egi = BCi ^ ((~BCo) & BCu);\n        Ego = BCo ^ ((~BCu) & BCa);\n        Egu = BCu ^ ((~BCa) & BCe);\n\n        Abe ^= De;\n        BCa = ROL(Abe, 1);\n        Agi ^= Di;\n        BCe = ROL(Agi, 6);\n        Ako ^= Do;\n        BCi = ROL(Ako, 25);\n        Amu ^= Du;\n        BCo = ROL(Amu, 8);\n        Asa ^= Da;\n        BCu = ROL(Asa, 18);\n        Eka = BCa ^ ((~BCe) & BCi);\n        Eke = BCe ^ ((~BCi) & BCo);\n        Eki = BCi ^ ((~BCo) & BCu);\n        Eko = BCo ^ ((~BCu) & BCa);\n        Eku = BCu ^ ((~BCa) & BCe);\n\n        Abu ^= Du;\n        BCa = ROL(Abu, 27);\n        Aga ^= Da;\n        BCe = ROL(Aga, 36);\n        Ake ^= De;\n        BCi = ROL(Ake, 10);\n        Ami ^= Di;\n        BCo = ROL(Ami, 15);\n        Aso ^= Do;\n        BCu = ROL(Aso, 56);\n        Ema = BCa ^ ((~BCe) & BCi);\n        Eme = BCe ^ ((~BCi) & BCo);\n        Emi = BCi ^ ((~BCo) & BCu);\n        Emo = BCo ^ ((~BCu) & BCa);\n        Emu = BCu ^ ((~BCa) & BCe);\n\n        Abi ^= Di;\n        BCa = ROL(Abi, 62);\n        Ago ^= Do;\n        BCe = ROL(Ago, 55);\n        Aku ^= Du;\n        BCi = ROL(Aku, 39);\n        Ama ^= Da;\n        BCo = ROL(Ama, 41);\n        Ase ^= De;\n        BCu = ROL(Ase, 2);\n        Esa = BCa ^ ((~BCe) & BCi);\n        Ese = BCe ^ ((~BCi) & BCo);\n        Esi = BCi ^ ((~BCo) & BCu);\n        Eso = BCo ^ ((~BCu) & BCa);\n        Esu = BCu ^ ((~BCa) & BCe);\n\n        //    prepareTheta\n        BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa;\n        BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese;\n        BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi;\n        BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso;\n        BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu;\n\n        // thetaRhoPiChiIotaPrepareTheta(round+1, E, A)\n        Da = BCu ^ ROL(BCe, 1);\n        De = BCa ^ ROL(BCi, 1);\n        Di = BCe ^ ROL(BCo, 1);\n        Do = BCi ^ ROL(BCu, 1);\n        Du = BCo ^ ROL(BCa, 1);\n\n        Eba ^= Da;\n        BCa = Eba;\n        Ege ^= De;\n        BCe = ROL(Ege, 44);\n        Eki ^= Di;\n        BCi = ROL(Eki, 43);\n        Emo ^= Do;\n        BCo = ROL(Emo, 21);\n        Esu ^= Du;\n        BCu = ROL(Esu, 14);\n        Aba = BCa ^ ((~BCe) & BCi);\n        Aba ^= KeccakF_RoundConstants[round + 1];\n        Abe = BCe ^ ((~BCi) & BCo);\n        Abi = BCi ^ ((~BCo) & BCu);\n        Abo = BCo ^ ((~BCu) & BCa);\n        Abu = BCu ^ ((~BCa) & BCe);\n\n        Ebo ^= Do;\n        BCa = ROL(Ebo, 28);\n        Egu ^= Du;\n        BCe = ROL(Egu, 20);\n        Eka ^= Da;\n        BCi = ROL(Eka, 3);\n        Eme ^= De;\n        BCo = ROL(Eme, 45);\n        Esi ^= Di;\n        BCu = ROL(Esi, 61);\n        Aga = BCa ^ ((~BCe) & BCi);\n        Age = BCe ^ ((~BCi) & BCo);\n        Agi = BCi ^ ((~BCo) & BCu);\n        Ago = BCo ^ ((~BCu) & BCa);\n        Agu = BCu ^ ((~BCa) & BCe);\n\n        Ebe ^= De;\n        BCa = ROL(Ebe, 1);\n        Egi ^= Di;\n        BCe = ROL(Egi, 6);\n        Eko ^= Do;\n        BCi = ROL(Eko, 25);\n        Emu ^= Du;\n        BCo = ROL(Emu, 8);\n        Esa ^= Da;\n        BCu = ROL(Esa, 18);\n        Aka = BCa ^ ((~BCe) & BCi);\n        Ake = BCe ^ ((~BCi) & BCo);\n        Aki = BCi ^ ((~BCo) & BCu);\n        Ako = BCo ^ ((~BCu) & BCa);\n        Aku = BCu ^ ((~BCa) & BCe);\n\n        Ebu ^= Du;\n        BCa = ROL(Ebu, 27);\n        Ega ^= Da;\n        BCe = ROL(Ega, 36);\n        Eke ^= De;\n        BCi = ROL(Eke, 10);\n        Emi ^= Di;\n        BCo = ROL(Emi, 15);\n        Eso ^= Do;\n        BCu = ROL(Eso, 56);\n        Ama = BCa ^ ((~BCe) & BCi);\n        Ame = BCe ^ ((~BCi) & BCo);\n        Ami = BCi ^ ((~BCo) & BCu);\n        Amo = BCo ^ ((~BCu) & BCa);\n        Amu = BCu ^ ((~BCa) & BCe);\n\n        Ebi ^= Di;\n        BCa = ROL(Ebi, 62);\n        Ego ^= Do;\n        BCe = ROL(Ego, 55);\n        Eku ^= Du;\n        BCi = ROL(Eku, 39);\n        Ema ^= Da;\n        BCo = ROL(Ema, 41);\n        Ese ^= De;\n        BCu = ROL(Ese, 2);\n        Asa = BCa ^ ((~BCe) & BCi);\n        Ase = BCe ^ ((~BCi) & BCo);\n        Asi = BCi ^ ((~BCo) & BCu);\n        Aso = BCo ^ ((~BCu) & BCa);\n        Asu = BCu ^ ((~BCa) & BCe);\n    }\n\n    // copyToState(state, A)\n    state[0] = Aba;\n    state[1] = Abe;\n    state[2] = Abi;\n    state[3] = Abo;\n    state[4] = Abu;\n    state[5] = Aga;\n    state[6] = Age;\n    state[7] = Agi;\n    state[8] = Ago;\n    state[9] = Agu;\n    state[10] = Aka;\n    state[11] = Ake;\n    state[12] = Aki;\n    state[13] = Ako;\n    state[14] = Aku;\n    state[15] = Ama;\n    state[16] = Ame;\n    state[17] = Ami;\n    state[18] = Amo;\n    state[19] = Amu;\n    state[20] = Asa;\n    state[21] = Ase;\n    state[22] = Asi;\n    state[23] = Aso;\n    state[24] = Asu;\n}\n\n/*************************************************\n * Name:        keccak_absorb\n *\n * Description: Absorb step of Keccak;\n *              non-incremental, starts by zeroeing the state.\n *\n * Arguments:   - uint64_t *s: pointer to (uninitialized) output Keccak state\n *              - uint32_t r: rate in bytes (e.g., 168 for SHAKE128)\n *              - const uint8_t *m: pointer to input to be absorbed into s\n *              - size_t mlen: length of input in bytes\n *              - uint8_t p: domain-separation byte for different\n *                                 Keccak-derived functions\n **************************************************/\nstatic void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m,\n                          size_t mlen, uint8_t p) {\n    size_t i;\n    uint8_t t[200];\n\n    /* Zero state */\n    for (i = 0; i < 25; ++i) {\n        s[i] = 0;\n    }\n\n    while (mlen >= r) {\n        for (i = 0; i < r / 8; ++i) {\n            s[i] ^= load64(m + 8 * i);\n        }\n\n        KeccakF1600_StatePermute(s);\n        mlen -= r;\n        m += r;\n    }\n\n    for (i = 0; i < r; ++i) {\n        t[i] = 0;\n    }\n    for (i = 0; i < mlen; ++i) {\n        t[i] = m[i];\n    }\n    t[i] = p;\n    t[r - 1] |= 128;\n    for (i = 0; i < r / 8; ++i) {\n        s[i] ^= load64(t + 8 * i);\n    }\n}\n\n/*************************************************\n * Name:        keccak_squeezeblocks\n *\n * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.\n *              Modifies the state. Can be called multiple times to keep\n *              squeezing, i.e., is incremental.\n *\n * Arguments:   - uint8_t *h: pointer to output blocks\n *              - size_t nblocks: number of blocks to be\n *                                                squeezed (written to h)\n *              - uint64_t *s: pointer to input/output Keccak state\n *              - uint32_t r: rate in bytes (e.g., 168 for SHAKE128)\n **************************************************/\nstatic void keccak_squeezeblocks(uint8_t *h, size_t nblocks,\n                                 uint64_t *s, uint32_t r) {\n    while (nblocks > 0) {\n        KeccakF1600_StatePermute(s);\n        for (size_t i = 0; i < (r >> 3); i++) {\n            store64(h + 8 * i, s[i]);\n        }\n        h += r;\n        nblocks--;\n    }\n}\n\n/*************************************************\n * Name:        keccak_inc_init\n *\n * Description: Initializes the incremental Keccak state to zero.\n *\n * Arguments:   - uint64_t *s_inc: pointer to input/output incremental state\n *                First 25 values represent Keccak state.\n *                26th value represents either the number of absorbed bytes\n *                that have not been permuted, or not-yet-squeezed bytes.\n **************************************************/\nstatic void keccak_inc_init(uint64_t *s_inc) {\n    size_t i;\n\n    for (i = 0; i < 25; ++i) {\n        s_inc[i] = 0;\n    }\n    s_inc[25] = 0;\n}\n\n/*************************************************\n * Name:        keccak_inc_absorb\n *\n * Description: Incremental keccak absorb\n *              Preceded by keccak_inc_init, succeeded by keccak_inc_finalize\n *\n * Arguments:   - uint64_t *s_inc: pointer to input/output incremental state\n *                First 25 values represent Keccak state.\n *                26th value represents either the number of absorbed bytes\n *                that have not been permuted, or not-yet-squeezed bytes.\n *              - uint32_t r: rate in bytes (e.g., 168 for SHAKE128)\n *              - const uint8_t *m: pointer to input to be absorbed into s\n *              - size_t mlen: length of input in bytes\n **************************************************/\nstatic void keccak_inc_absorb(uint64_t *s_inc, uint32_t r, const uint8_t *m,\n                              size_t mlen) {\n    size_t i;\n\n    /* Recall that s_inc[25] is the non-absorbed bytes xored into the state */\n    while (mlen + s_inc[25] >= r) {\n        for (i = 0; i < r - s_inc[25]; i++) {\n            /* Take the i'th byte from message\n               xor with the s_inc[25] + i'th byte of the state; little-endian */\n            s_inc[(s_inc[25] + i) >> 3] ^= (uint64_t)m[i] << (8 * ((s_inc[25] + i) & 0x07));\n        }\n        mlen -= (size_t)(r - s_inc[25]);\n        m += r - s_inc[25];\n        s_inc[25] = 0;\n\n        KeccakF1600_StatePermute(s_inc);\n    }\n\n    for (i = 0; i < mlen; i++) {\n        s_inc[(s_inc[25] + i) >> 3] ^= (uint64_t)m[i] << (8 * ((s_inc[25] + i) & 0x07));\n    }\n    s_inc[25] += mlen;\n}\n\n/*************************************************\n * Name:        keccak_inc_finalize\n *\n * Description: Finalizes Keccak absorb phase, prepares for squeezing\n *\n * Arguments:   - uint64_t *s_inc: pointer to input/output incremental state\n *                First 25 values represent Keccak state.\n *                26th value represents either the number of absorbed bytes\n *                that have not been permuted, or not-yet-squeezed bytes.\n *              - uint32_t r: rate in bytes (e.g., 168 for SHAKE128)\n *              - uint8_t p: domain-separation byte for different\n *                                 Keccak-derived functions\n **************************************************/\nstatic void keccak_inc_finalize(uint64_t *s_inc, uint32_t r, uint8_t p) {\n    /* After keccak_inc_absorb, we are guaranteed that s_inc[25] < r,\n       so we can always use one more byte for p in the current state. */\n    s_inc[s_inc[25] >> 3] ^= (uint64_t)p << (8 * (s_inc[25] & 0x07));\n    s_inc[(r - 1) >> 3] ^= (uint64_t)128 << (8 * ((r - 1) & 0x07));\n    s_inc[25] = 0;\n}\n\n/*************************************************\n * Name:        keccak_inc_squeeze\n *\n * Description: Incremental Keccak squeeze; can be called on byte-level\n *\n * Arguments:   - uint8_t *h: pointer to output bytes\n *              - size_t outlen: number of bytes to be squeezed\n *              - uint64_t *s_inc: pointer to input/output incremental state\n *                First 25 values represent Keccak state.\n *                26th value represents either the number of absorbed bytes\n *                that have not been permuted, or not-yet-squeezed bytes.\n *              - uint32_t r: rate in bytes (e.g., 168 for SHAKE128)\n **************************************************/\nstatic void keccak_inc_squeeze(uint8_t *h, size_t outlen,\n                               uint64_t *s_inc, uint32_t r) {\n    size_t i;\n\n    /* First consume any bytes we still have sitting around */\n    for (i = 0; i < outlen && i < s_inc[25]; i++) {\n        /* There are s_inc[25] bytes left, so r - s_inc[25] is the first\n           available byte. We consume from there, i.e., up to r. */\n        h[i] = (uint8_t)(s_inc[(r - s_inc[25] + i) >> 3] >> (8 * ((r - s_inc[25] + i) & 0x07)));\n    }\n    h += i;\n    outlen -= i;\n    s_inc[25] -= i;\n\n    /* Then squeeze the remaining necessary blocks */\n    while (outlen > 0) {\n        KeccakF1600_StatePermute(s_inc);\n\n        for (i = 0; i < outlen && i < r; i++) {\n            h[i] = (uint8_t)(s_inc[i >> 3] >> (8 * (i & 0x07)));\n        }\n        h += i;\n        outlen -= i;\n        s_inc[25] = r - i;\n    }\n}\n\nvoid shake256_inc_init(uint64_t *s_inc) {\n    keccak_inc_init(s_inc);\n}\n\nvoid shake256_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen) {\n    keccak_inc_absorb(s_inc, SHAKE256_RATE, input, inlen);\n}\n\nvoid shake256_inc_finalize(uint64_t *s_inc) {\n    keccak_inc_finalize(s_inc, SHAKE256_RATE, 0x1F);\n}\n\nvoid shake256_inc_squeeze(uint8_t *output, size_t outlen, uint64_t *s_inc) {\n    keccak_inc_squeeze(output, outlen, s_inc, SHAKE256_RATE);\n}\n\n/*************************************************\n * Name:        shake256_absorb\n *\n * Description: Absorb step of the SHAKE256 XOF.\n *              non-incremental, starts by zeroeing the state.\n *\n * Arguments:   - uint64_t *s: pointer to (uninitialized) output Keccak state\n *              - const uint8_t *input: pointer to input to be absorbed\n *                                            into s\n *              - size_t inlen: length of input in bytes\n **************************************************/\nvoid shake256_absorb(uint64_t *s, const uint8_t *input, size_t inlen) {\n    keccak_absorb(s, SHAKE256_RATE, input, inlen, 0x1F);\n}\n\n/*************************************************\n * Name:        shake256_squeezeblocks\n *\n * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of\n *              SHAKE256_RATE bytes each. Modifies the state. Can be called\n *              multiple times to keep squeezing, i.e., is incremental.\n *\n * Arguments:   - uint8_t *output: pointer to output blocks\n *              - size_t nblocks: number of blocks to be squeezed\n *                                (written to output)\n *              - uint64_t *s: pointer to input/output Keccak state\n **************************************************/\nvoid shake256_squeezeblocks(uint8_t *output, size_t nblocks, uint64_t *s) {\n    keccak_squeezeblocks(output, nblocks, s, SHAKE256_RATE);\n}\n\n/*************************************************\n * Name:        shake256\n *\n * Description: SHAKE256 XOF with non-incremental API\n *\n * Arguments:   - uint8_t *output: pointer to output\n *              - size_t outlen: requested output length in bytes\n *              - const uint8_t *input: pointer to input\n *              - size_t inlen: length of input in bytes\n **************************************************/\nvoid shake256(uint8_t *output, size_t outlen,\n              const uint8_t *input, size_t inlen) {\n    size_t nblocks = outlen / SHAKE256_RATE;\n    uint8_t t[SHAKE256_RATE];\n    uint64_t s[25];\n\n    shake256_absorb(s, input, inlen);\n    shake256_squeezeblocks(output, nblocks, s);\n\n    output += nblocks * SHAKE256_RATE;\n    outlen -= nblocks * SHAKE256_RATE;\n\n    if (outlen) {\n        shake256_squeezeblocks(t, 1, s);\n        for (size_t i = 0; i < outlen; ++i) {\n            output[i] = t[i];\n        }\n    }\n}\n"
  },
  {
    "path": "ref/fips202.h",
    "content": "#ifndef SPX_FIPS202_H\n#define SPX_FIPS202_H\n\n#include <stddef.h>\n#include <stdint.h>\n\n#define SHAKE128_RATE 168\n#define SHAKE256_RATE 136\n#define SHA3_256_RATE 136\n#define SHA3_512_RATE 72\n\nvoid shake128_absorb(uint64_t *s, const uint8_t *input, size_t inlen);\n\nvoid shake128_squeezeblocks(uint8_t *output, size_t nblocks, uint64_t *s);\n\nvoid shake128_inc_init(uint64_t *s_inc);\nvoid shake128_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen);\nvoid shake128_inc_finalize(uint64_t *s_inc);\nvoid shake128_inc_squeeze(uint8_t *output, size_t outlen, uint64_t *s_inc);\n\nvoid shake256_absorb(uint64_t *s, const uint8_t *input, size_t inlen);\nvoid shake256_squeezeblocks(uint8_t *output, size_t nblocks, uint64_t *s);\n\nvoid shake256_inc_init(uint64_t *s_inc);\nvoid shake256_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen);\nvoid shake256_inc_finalize(uint64_t *s_inc);\nvoid shake256_inc_squeeze(uint8_t *output, size_t outlen, uint64_t *s_inc);\n\nvoid shake128(uint8_t *output, size_t outlen,\n              const uint8_t *input, size_t inlen);\n\nvoid shake256(uint8_t *output, size_t outlen,\n              const uint8_t *input, size_t inlen);\n\nvoid sha3_256_inc_init(uint64_t *s_inc);\nvoid sha3_256_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen);\nvoid sha3_256_inc_finalize(uint8_t *output, uint64_t *s_inc);\n\nvoid sha3_256(uint8_t *output, const uint8_t *input, size_t inlen);\n\nvoid sha3_512_inc_init(uint64_t *s_inc);\nvoid sha3_512_inc_absorb(uint64_t *s_inc, const uint8_t *input, size_t inlen);\nvoid sha3_512_inc_finalize(uint8_t *output, uint64_t *s_inc);\n\nvoid sha3_512(uint8_t *output, const uint8_t *input, size_t inlen);\n\n#endif\n"
  },
  {
    "path": "ref/fors.c",
    "content": "#include <stdlib.h>\n#include <stdint.h>\n#include <string.h>\n\n#include \"fors.h\"\n#include \"utils.h\"\n#include \"utilsx1.h\"\n#include \"hash.h\"\n#include \"thash.h\"\n#include \"address.h\"\n\nstatic void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx,\n                        uint32_t fors_leaf_addr[8])\n{\n    prf_addr(sk, ctx, fors_leaf_addr);\n}\n\nstatic void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk,\n                            const spx_ctx *ctx,\n                            uint32_t fors_leaf_addr[8])\n{\n    thash(leaf, sk, 1, ctx, fors_leaf_addr);\n}\n\nstruct fors_gen_leaf_info {\n    uint32_t leaf_addrx[8];\n};\n\nstatic void fors_gen_leafx1(unsigned char *leaf,\n                            const spx_ctx *ctx,\n                            uint32_t addr_idx, void *info)\n{\n    struct fors_gen_leaf_info *fors_info = info;\n    uint32_t *fors_leaf_addr = fors_info->leaf_addrx;\n\n    /* Only set the parts that the caller doesn't set */\n    set_tree_index(fors_leaf_addr, addr_idx);\n    set_type(fors_leaf_addr, SPX_ADDR_TYPE_FORSPRF);\n    fors_gen_sk(leaf, ctx, fors_leaf_addr);\n\n    set_type(fors_leaf_addr, SPX_ADDR_TYPE_FORSTREE);\n    fors_sk_to_leaf(leaf, leaf,\n                    ctx, fors_leaf_addr);\n}\n\n/**\n * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n * Assumes indices has space for SPX_FORS_TREES integers.\n */\nstatic void message_to_indices(uint32_t *indices, const unsigned char *m)\n{\n    unsigned int i, j;\n    unsigned int offset = 0;\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        indices[i] = 0;\n        for (j = 0; j < SPX_FORS_HEIGHT; j++) {\n            indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 1u) << j;\n            offset++;\n        }\n    }\n}\n\n/**\n * Signs a message m, deriving the secret key from sk_seed and the FTS address.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_sign(unsigned char *sig, unsigned char *pk,\n               const unsigned char *m,\n               const spx_ctx *ctx,\n               const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    uint32_t fors_tree_addr[8] = {0};\n    struct fors_gen_leaf_info fors_info = {0};\n    uint32_t *fors_leaf_addr = fors_info.leaf_addrx;\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    copy_keypair_addr(fors_tree_addr, fors_addr);\n    copy_keypair_addr(fors_leaf_addr, fors_addr);\n\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF);\n\n        /* Include the secret key part that produces the selected leaf node. */\n        fors_gen_sk(sig, ctx, fors_tree_addr);\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n        sig += SPX_N;\n\n        /* Compute the authentication path for this leaf node. */\n        treehashx1(roots + i*SPX_N, sig, ctx,\n                 indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx1,\n                 fors_tree_addr, &fors_info);\n\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n\n/**\n * Derives the FORS public key from a signature.\n * This can be used for verification by comparing to a known public key, or to\n * subsequently verify a signature on the derived public key. The latter is the\n * typical use-case when used as an FTS below an OTS in a hypertree.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *m,\n                      const spx_ctx* ctx,\n                      const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    unsigned char leaf[SPX_N];\n    uint32_t fors_tree_addr[8] = {0};\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    copy_keypair_addr(fors_tree_addr, fors_addr);\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n\n    set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Derive the leaf from the included secret key part. */\n        fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr);\n        sig += SPX_N;\n\n        /* Derive the corresponding root node of this tree. */\n        compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset,\n                     sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr);\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n"
  },
  {
    "path": "ref/fors.h",
    "content": "#ifndef SPX_FORS_H\n#define SPX_FORS_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n#include \"context.h\"\n\n/**\n * Signs a message m, deriving the secret key from sk_seed and the FTS address.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\n#define fors_sign SPX_NAMESPACE(fors_sign)\nvoid fors_sign(unsigned char *sig, unsigned char *pk,\n               const unsigned char *m,\n               const spx_ctx* ctx,\n               const uint32_t fors_addr[8]);\n\n/**\n * Derives the FORS public key from a signature.\n * This can be used for verification by comparing to a known public key, or to\n * subsequently verify a signature on the derived public key. The latter is the\n * typical use-case when used as an FTS below an OTS in a hypertree.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\n#define fors_pk_from_sig SPX_NAMESPACE(fors_pk_from_sig)\nvoid fors_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *m,\n                      const spx_ctx* ctx,\n                      const uint32_t fors_addr[8]);\n\n#endif\n"
  },
  {
    "path": "ref/haraka.c",
    "content": "/*\n * Constant time implementation of the Haraka hash function.\n *\n * The bit-sliced implementation of the AES round functions are\n * based on the AES implementation in BearSSL written \n * by Thomas Pornin <pornin@bolet.org>, licensed as follows:\n *\n * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>\n *\n * Permission is hereby granted, free of charge, to any person obtaining \n * a copy of this software and associated documentation files (the\n * \"Software\"), to deal in the Software without restriction, including\n * without limitation the rights to use, copy, modify, merge, publish,\n * distribute, sublicense, and/or sell copies of the Software, and to\n * permit persons to whom the Software is furnished to do so, subject to\n * the following conditions:\n *\n * The above copyright notice and this permission notice shall be \n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, \n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND \n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n */\n\n#include <stdio.h>\n#include <string.h>\n#include <stdint.h>\n#include <stdlib.h>\n\n#include \"haraka.h\"\n#include \"utils.h\"\n\n#define HARAKAS_RATE 32\n\nstatic const uint64_t haraka512_rc64[10][8] = {\n    {0x24cf0ab9086f628b, 0xbdd6eeecc83b8382, 0xd96fb0306cdad0a7, 0xaace082ac8f95f89, 0x449d8e8870d7041f, 0x49bb2f80b2b3e2f8, 0x0569ae98d93bb258, 0x23dc9691e7d6a4b1},\n    {0xd8ba10ede0fe5b6e, 0x7ecf7dbe424c7b8e, 0x6ea9949c6df62a31, 0xbf3f3c97ec9c313e, 0x241d03a196a1861e, 0xead3a51116e5a2ea, 0x77d479fcad9574e3, 0x18657a1af894b7a0},\n    {0x10671e1a7f595522, 0xd9a00ff675d28c7b, 0x2f1edf0d2b9ba661, 0xb8ff58b8e3de45f9, 0xee29261da9865c02, 0xd1532aa4b50bdf43, 0x8bf858159b231bb1, 0xdf17439d22d4f599},\n    {0xdd4b2f0870b918c0, 0x757a81f3b39b1bb6, 0x7a5c556898952e3f, 0x7dd70a16d915d87a, 0x3ae61971982b8301, 0xc3ab319e030412be, 0x17c0033ac094a8cb, 0x5a0630fc1a8dc4ef},\n    {0x17708988c1632f73, 0xf92ddae090b44f4f, 0x11ac0285c43aa314, 0x509059941936b8ba, 0xd03e152fa2ce9b69, 0x3fbcbcb63a32998b, 0x6204696d692254f7, 0x915542ed93ec59b4},\n    {0xf4ed94aa8879236e, 0xff6cb41cd38e03c0, 0x069b38602368aeab, 0x669495b820f0ddba, 0xf42013b1b8bf9e3d, 0xcf935efe6439734d, 0xbc1dcf42ca29e3f8, 0x7e6d3ed29f78ad67},\n    {0xf3b0f6837ffcddaa, 0x3a76faef934ddf41, 0xcec7ae583a9c8e35, 0xe4dd18c68f0260af, 0x2c0e5df1ad398eaa, 0x478df5236ae22e8c, 0xfb944c46fe865f39, 0xaa48f82f028132ba},\n    {0x231b9ae2b76aca77, 0x292a76a712db0b40, 0x5850625dc8134491, 0x73137dd469810fb5, 0x8a12a6a202a474fd, 0xd36fd9daa78bdb80, 0xb34c5e733505706f, 0xbaf1cdca818d9d96},\n    {0x2e99781335e8c641, 0xbddfe5cce47d560e, 0xf74e9bf32e5e040c, 0x1d7a709d65996be9, 0x670df36a9cf66cdd, 0xd05ef84a176a2875, 0x0f888e828cb1c44e, 0x1a79e9c9727b052c},\n    {0x83497348628d84de, 0x2e9387d51f22a754, 0xb000068da2f852d6, 0x378c9e1190fd6fe5, 0x870027c316de7293, 0xe51a9d4462e047bb, 0x90ecf7f8c6251195, 0x655953bfbed90a9c},\n};\n\nstatic inline uint32_t br_dec32le(const unsigned char *src)\n{\n    return (uint32_t)src[0]\n           | ((uint32_t)src[1] << 8)\n           | ((uint32_t)src[2] << 16)\n           | ((uint32_t)src[3] << 24);\n}\n\nstatic void br_range_dec32le(uint32_t *v, size_t num, const unsigned char *src)\n{\n    while (num-- > 0) {\n        *v ++ = br_dec32le(src);\n        src += 4;\n    }\n}\n\nstatic inline void br_enc32le(unsigned char *dst, uint32_t x)\n{\n    dst[0] = (unsigned char)x;\n    dst[1] = (unsigned char)(x >> 8);\n    dst[2] = (unsigned char)(x >> 16);\n    dst[3] = (unsigned char)(x >> 24);\n}\n\n\nstatic void br_range_enc32le(unsigned char *dst, const uint32_t *v, size_t num)\n{\n    while (num-- > 0) {\n        br_enc32le(dst, *v ++);\n        dst += 4;\n    }\n}\n\nstatic void br_aes_ct64_bitslice_Sbox(uint64_t *q) {\n    /*\n     * This S-box implementation is a straightforward translation of\n     * the circuit described by Boyar and Peralta in \"A new\n     * combinational logic minimization technique with applications\n     * to cryptology\" (https://eprint.iacr.org/2009/191.pdf).\n     *\n     * Note that variables x* (input) and s* (output) are numbered\n     * in \"reverse\" order (x0 is the high bit, x7 is the low bit).\n     */\n\n    uint64_t x0, x1, x2, x3, x4, x5, x6, x7;\n    uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;\n    uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;\n    uint64_t y20, y21;\n    uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;\n    uint64_t z10, z11, z12, z13, z14, z15, z16, z17;\n    uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;\n    uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;\n    uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;\n    uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;\n    uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;\n    uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;\n    uint64_t t60, t61, t62, t63, t64, t65, t66, t67;\n    uint64_t s0, s1, s2, s3, s4, s5, s6, s7;\n\n    x0 = q[7];\n    x1 = q[6];\n    x2 = q[5];\n    x3 = q[4];\n    x4 = q[3];\n    x5 = q[2];\n    x6 = q[1];\n    x7 = q[0];\n\n    /*\n     * Top linear transformation.\n     */\n    y14 = x3 ^ x5;\n    y13 = x0 ^ x6;\n    y9 = x0 ^ x3;\n    y8 = x0 ^ x5;\n    t0 = x1 ^ x2;\n    y1 = t0 ^ x7;\n    y4 = y1 ^ x3;\n    y12 = y13 ^ y14;\n    y2 = y1 ^ x0;\n    y5 = y1 ^ x6;\n    y3 = y5 ^ y8;\n    t1 = x4 ^ y12;\n    y15 = t1 ^ x5;\n    y20 = t1 ^ x1;\n    y6 = y15 ^ x7;\n    y10 = y15 ^ t0;\n    y11 = y20 ^ y9;\n    y7 = x7 ^ y11;\n    y17 = y10 ^ y11;\n    y19 = y10 ^ y8;\n    y16 = t0 ^ y11;\n    y21 = y13 ^ y16;\n    y18 = x0 ^ y16;\n\n    /*\n     * Non-linear section.\n     */\n    t2 = y12 & y15;\n    t3 = y3 & y6;\n    t4 = t3 ^ t2;\n    t5 = y4 & x7;\n    t6 = t5 ^ t2;\n    t7 = y13 & y16;\n    t8 = y5 & y1;\n    t9 = t8 ^ t7;\n    t10 = y2 & y7;\n    t11 = t10 ^ t7;\n    t12 = y9 & y11;\n    t13 = y14 & y17;\n    t14 = t13 ^ t12;\n    t15 = y8 & y10;\n    t16 = t15 ^ t12;\n    t17 = t4 ^ t14;\n    t18 = t6 ^ t16;\n    t19 = t9 ^ t14;\n    t20 = t11 ^ t16;\n    t21 = t17 ^ y20;\n    t22 = t18 ^ y19;\n    t23 = t19 ^ y21;\n    t24 = t20 ^ y18;\n\n    t25 = t21 ^ t22;\n    t26 = t21 & t23;\n    t27 = t24 ^ t26;\n    t28 = t25 & t27;\n    t29 = t28 ^ t22;\n    t30 = t23 ^ t24;\n    t31 = t22 ^ t26;\n    t32 = t31 & t30;\n    t33 = t32 ^ t24;\n    t34 = t23 ^ t33;\n    t35 = t27 ^ t33;\n    t36 = t24 & t35;\n    t37 = t36 ^ t34;\n    t38 = t27 ^ t36;\n    t39 = t29 & t38;\n    t40 = t25 ^ t39;\n\n    t41 = t40 ^ t37;\n    t42 = t29 ^ t33;\n    t43 = t29 ^ t40;\n    t44 = t33 ^ t37;\n    t45 = t42 ^ t41;\n    z0 = t44 & y15;\n    z1 = t37 & y6;\n    z2 = t33 & x7;\n    z3 = t43 & y16;\n    z4 = t40 & y1;\n    z5 = t29 & y7;\n    z6 = t42 & y11;\n    z7 = t45 & y17;\n    z8 = t41 & y10;\n    z9 = t44 & y12;\n    z10 = t37 & y3;\n    z11 = t33 & y4;\n    z12 = t43 & y13;\n    z13 = t40 & y5;\n    z14 = t29 & y2;\n    z15 = t42 & y9;\n    z16 = t45 & y14;\n    z17 = t41 & y8;\n\n    /*\n     * Bottom linear transformation.\n     */\n    t46 = z15 ^ z16;\n    t47 = z10 ^ z11;\n    t48 = z5 ^ z13;\n    t49 = z9 ^ z10;\n    t50 = z2 ^ z12;\n    t51 = z2 ^ z5;\n    t52 = z7 ^ z8;\n    t53 = z0 ^ z3;\n    t54 = z6 ^ z7;\n    t55 = z16 ^ z17;\n    t56 = z12 ^ t48;\n    t57 = t50 ^ t53;\n    t58 = z4 ^ t46;\n    t59 = z3 ^ t54;\n    t60 = t46 ^ t57;\n    t61 = z14 ^ t57;\n    t62 = t52 ^ t58;\n    t63 = t49 ^ t58;\n    t64 = z4 ^ t59;\n    t65 = t61 ^ t62;\n    t66 = z1 ^ t63;\n    s0 = t59 ^ t63;\n    s6 = t56 ^ ~t62;\n    s7 = t48 ^ ~t60;\n    t67 = t64 ^ t65;\n    s3 = t53 ^ t66;\n    s4 = t51 ^ t66;\n    s5 = t47 ^ t65;\n    s1 = t64 ^ ~s3;\n    s2 = t55 ^ ~t67;\n\n    q[7] = s0;\n    q[6] = s1;\n    q[5] = s2;\n    q[4] = s3;\n    q[3] = s4;\n    q[2] = s5;\n    q[1] = s6;\n    q[0] = s7;\n}\n\nstatic void br_aes_ct_bitslice_Sbox(uint32_t *q)\n{\n    /*\n     * This S-box implementation is a straightforward translation of\n     * the circuit described by Boyar and Peralta in \"A new\n     * combinational logic minimization technique with applications\n     * to cryptology\" (https://eprint.iacr.org/2009/191.pdf).\n     *\n     * Note that variables x* (input) and s* (output) are numbered\n     * in \"reverse\" order (x0 is the high bit, x7 is the low bit).\n     */\n\n    uint32_t x0, x1, x2, x3, x4, x5, x6, x7;\n    uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;\n    uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;\n    uint32_t y20, y21;\n    uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;\n    uint32_t z10, z11, z12, z13, z14, z15, z16, z17;\n    uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;\n    uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;\n    uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;\n    uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;\n    uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;\n    uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;\n    uint32_t t60, t61, t62, t63, t64, t65, t66, t67;\n    uint32_t s0, s1, s2, s3, s4, s5, s6, s7;\n\n    x0 = q[7];\n    x1 = q[6];\n    x2 = q[5];\n    x3 = q[4];\n    x4 = q[3];\n    x5 = q[2];\n    x6 = q[1];\n    x7 = q[0];\n\n    /*\n     * Top linear transformation.\n     */\n    y14 = x3 ^ x5;\n    y13 = x0 ^ x6;\n    y9 = x0 ^ x3;\n    y8 = x0 ^ x5;\n    t0 = x1 ^ x2;\n    y1 = t0 ^ x7;\n    y4 = y1 ^ x3;\n    y12 = y13 ^ y14;\n    y2 = y1 ^ x0;\n    y5 = y1 ^ x6;\n    y3 = y5 ^ y8;\n    t1 = x4 ^ y12;\n    y15 = t1 ^ x5;\n    y20 = t1 ^ x1;\n    y6 = y15 ^ x7;\n    y10 = y15 ^ t0;\n    y11 = y20 ^ y9;\n    y7 = x7 ^ y11;\n    y17 = y10 ^ y11;\n    y19 = y10 ^ y8;\n    y16 = t0 ^ y11;\n    y21 = y13 ^ y16;\n    y18 = x0 ^ y16;\n\n    /*\n     * Non-linear section.\n     */\n    t2 = y12 & y15;\n    t3 = y3 & y6;\n    t4 = t3 ^ t2;\n    t5 = y4 & x7;\n    t6 = t5 ^ t2;\n    t7 = y13 & y16;\n    t8 = y5 & y1;\n    t9 = t8 ^ t7;\n    t10 = y2 & y7;\n    t11 = t10 ^ t7;\n    t12 = y9 & y11;\n    t13 = y14 & y17;\n    t14 = t13 ^ t12;\n    t15 = y8 & y10;\n    t16 = t15 ^ t12;\n    t17 = t4 ^ t14;\n    t18 = t6 ^ t16;\n    t19 = t9 ^ t14;\n    t20 = t11 ^ t16;\n    t21 = t17 ^ y20;\n    t22 = t18 ^ y19;\n    t23 = t19 ^ y21;\n    t24 = t20 ^ y18;\n\n    t25 = t21 ^ t22;\n    t26 = t21 & t23;\n    t27 = t24 ^ t26;\n    t28 = t25 & t27;\n    t29 = t28 ^ t22;\n    t30 = t23 ^ t24;\n    t31 = t22 ^ t26;\n    t32 = t31 & t30;\n    t33 = t32 ^ t24;\n    t34 = t23 ^ t33;\n    t35 = t27 ^ t33;\n    t36 = t24 & t35;\n    t37 = t36 ^ t34;\n    t38 = t27 ^ t36;\n    t39 = t29 & t38;\n    t40 = t25 ^ t39;\n\n    t41 = t40 ^ t37;\n    t42 = t29 ^ t33;\n    t43 = t29 ^ t40;\n    t44 = t33 ^ t37;\n    t45 = t42 ^ t41;\n    z0 = t44 & y15;\n    z1 = t37 & y6;\n    z2 = t33 & x7;\n    z3 = t43 & y16;\n    z4 = t40 & y1;\n    z5 = t29 & y7;\n    z6 = t42 & y11;\n    z7 = t45 & y17;\n    z8 = t41 & y10;\n    z9 = t44 & y12;\n    z10 = t37 & y3;\n    z11 = t33 & y4;\n    z12 = t43 & y13;\n    z13 = t40 & y5;\n    z14 = t29 & y2;\n    z15 = t42 & y9;\n    z16 = t45 & y14;\n    z17 = t41 & y8;\n\n    /*\n     * Bottom linear transformation.\n     */\n    t46 = z15 ^ z16;\n    t47 = z10 ^ z11;\n    t48 = z5 ^ z13;\n    t49 = z9 ^ z10;\n    t50 = z2 ^ z12;\n    t51 = z2 ^ z5;\n    t52 = z7 ^ z8;\n    t53 = z0 ^ z3;\n    t54 = z6 ^ z7;\n    t55 = z16 ^ z17;\n    t56 = z12 ^ t48;\n    t57 = t50 ^ t53;\n    t58 = z4 ^ t46;\n    t59 = z3 ^ t54;\n    t60 = t46 ^ t57;\n    t61 = z14 ^ t57;\n    t62 = t52 ^ t58;\n    t63 = t49 ^ t58;\n    t64 = z4 ^ t59;\n    t65 = t61 ^ t62;\n    t66 = z1 ^ t63;\n    s0 = t59 ^ t63;\n    s6 = t56 ^ ~t62;\n    s7 = t48 ^ ~t60;\n    t67 = t64 ^ t65;\n    s3 = t53 ^ t66;\n    s4 = t51 ^ t66;\n    s5 = t47 ^ t65;\n    s1 = t64 ^ ~s3;\n    s2 = t55 ^ ~t67;\n\n    q[7] = s0;\n    q[6] = s1;\n    q[5] = s2;\n    q[4] = s3;\n    q[3] = s4;\n    q[2] = s5;\n    q[1] = s6;\n    q[0] = s7;\n}\n\nstatic void br_aes_ct_ortho(uint32_t *q)\n{\n#define SWAPN_32(cl, ch, s, x, y)   do { \\\n        uint32_t a, b; \\\n        a = (x); \\\n        b = (y); \\\n        (x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \\\n        (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \\\n    } while (0)\n\n#define SWAP2_32(x, y)   SWAPN_32(0x55555555, 0xAAAAAAAA, 1, x, y)\n#define SWAP4_32(x, y)   SWAPN_32(0x33333333, 0xCCCCCCCC, 2, x, y)\n#define SWAP8_32(x, y)   SWAPN_32(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)\n\n    SWAP2_32(q[0], q[1]);\n    SWAP2_32(q[2], q[3]);\n    SWAP2_32(q[4], q[5]);\n    SWAP2_32(q[6], q[7]);\n\n    SWAP4_32(q[0], q[2]);\n    SWAP4_32(q[1], q[3]);\n    SWAP4_32(q[4], q[6]);\n    SWAP4_32(q[5], q[7]);\n\n    SWAP8_32(q[0], q[4]);\n    SWAP8_32(q[1], q[5]);\n    SWAP8_32(q[2], q[6]);\n    SWAP8_32(q[3], q[7]);\n}\n\nstatic inline void add_round_key32(uint32_t *q, const uint32_t *sk)\n{\n    q[0] ^= sk[0];\n    q[1] ^= sk[1];\n    q[2] ^= sk[2];\n    q[3] ^= sk[3];\n    q[4] ^= sk[4];\n    q[5] ^= sk[5];\n    q[6] ^= sk[6];\n    q[7] ^= sk[7];\n}\n\nstatic inline void shift_rows32(uint32_t *q)\n{\n    int i;\n\n    for (i = 0; i < 8; i++) {\n        uint32_t x;\n\n        x = q[i];\n        q[i] = (x & 0x000000FF)\n            | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)\n            | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)\n            | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);\n    }\n}\n\nstatic inline uint32_t rotr16(uint32_t x)\n{\n    return (x << 16) | (x >> 16);\n}\n\nstatic inline void mix_columns32(uint32_t *q)\n{\n    uint32_t q0, q1, q2, q3, q4, q5, q6, q7;\n    uint32_t r0, r1, r2, r3, r4, r5, r6, r7;\n\n    q0 = q[0];\n    q1 = q[1];\n    q2 = q[2];\n    q3 = q[3];\n    q4 = q[4];\n    q5 = q[5];\n    q6 = q[6];\n    q7 = q[7];\n    r0 = (q0 >> 8) | (q0 << 24);\n    r1 = (q1 >> 8) | (q1 << 24);\n    r2 = (q2 >> 8) | (q2 << 24);\n    r3 = (q3 >> 8) | (q3 << 24);\n    r4 = (q4 >> 8) | (q4 << 24);\n    r5 = (q5 >> 8) | (q5 << 24);\n    r6 = (q6 >> 8) | (q6 << 24);\n    r7 = (q7 >> 8) | (q7 << 24);\n\n    q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);\n    q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);\n    q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);\n    q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);\n    q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);\n    q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);\n    q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);\n    q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);\n}\n\nstatic void br_aes_ct64_ortho(uint64_t *q)\n{\n#define SWAPN(cl, ch, s, x, y)   do { \\\n        uint64_t a, b; \\\n        a = (x); \\\n        b = (y); \\\n        (x) = (a & (uint64_t)(cl)) | ((b & (uint64_t)(cl)) << (s)); \\\n        (y) = ((a & (uint64_t)(ch)) >> (s)) | (b & (uint64_t)(ch)); \\\n    } while (0)\n\n#define SWAP2(x, y)    SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA,  1, x, y)\n#define SWAP4(x, y)    SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC,  2, x, y)\n#define SWAP8(x, y)    SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0,  4, x, y)\n\n    SWAP2(q[0], q[1]);\n    SWAP2(q[2], q[3]);\n    SWAP2(q[4], q[5]);\n    SWAP2(q[6], q[7]);\n\n    SWAP4(q[0], q[2]);\n    SWAP4(q[1], q[3]);\n    SWAP4(q[4], q[6]);\n    SWAP4(q[5], q[7]);\n\n    SWAP8(q[0], q[4]);\n    SWAP8(q[1], q[5]);\n    SWAP8(q[2], q[6]);\n    SWAP8(q[3], q[7]);\n}\n\n\nstatic void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)\n{\n    uint64_t x0, x1, x2, x3;\n\n    x0 = w[0];\n    x1 = w[1];\n    x2 = w[2];\n    x3 = w[3];\n    x0 |= (x0 << 16);\n    x1 |= (x1 << 16);\n    x2 |= (x2 << 16);\n    x3 |= (x3 << 16);\n    x0 &= (uint64_t)0x0000FFFF0000FFFF;\n    x1 &= (uint64_t)0x0000FFFF0000FFFF;\n    x2 &= (uint64_t)0x0000FFFF0000FFFF;\n    x3 &= (uint64_t)0x0000FFFF0000FFFF;\n    x0 |= (x0 << 8);\n    x1 |= (x1 << 8);\n    x2 |= (x2 << 8);\n    x3 |= (x3 << 8);\n    x0 &= (uint64_t)0x00FF00FF00FF00FF;\n    x1 &= (uint64_t)0x00FF00FF00FF00FF;\n    x2 &= (uint64_t)0x00FF00FF00FF00FF;\n    x3 &= (uint64_t)0x00FF00FF00FF00FF;\n    *q0 = x0 | (x2 << 8);\n    *q1 = x1 | (x3 << 8);\n}\n\n\nstatic void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)\n{\n    uint64_t x0, x1, x2, x3;\n\n    x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;\n    x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;\n    x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;\n    x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;\n    x0 |= (x0 >> 8);\n    x1 |= (x1 >> 8);\n    x2 |= (x2 >> 8);\n    x3 |= (x3 >> 8);\n    x0 &= (uint64_t)0x0000FFFF0000FFFF;\n    x1 &= (uint64_t)0x0000FFFF0000FFFF;\n    x2 &= (uint64_t)0x0000FFFF0000FFFF;\n    x3 &= (uint64_t)0x0000FFFF0000FFFF;\n    w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);\n    w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);\n    w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);\n    w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);\n}\n\nstatic inline void add_round_key(uint64_t *q, const uint64_t *sk)\n{\n    q[0] ^= sk[0];\n    q[1] ^= sk[1];\n    q[2] ^= sk[2];\n    q[3] ^= sk[3];\n    q[4] ^= sk[4];\n    q[5] ^= sk[5];\n    q[6] ^= sk[6];\n    q[7] ^= sk[7];\n}\n\nstatic inline void shift_rows(uint64_t *q)\n{\n    int i;\n\n    for (i = 0; i < 8; i++) {\n        uint64_t x;\n\n        x = q[i];\n        q[i] = (x & (uint64_t)0x000000000000FFFF)\n               | ((x & (uint64_t)0x00000000FFF00000) >> 4)\n               | ((x & (uint64_t)0x00000000000F0000) << 12)\n               | ((x & (uint64_t)0x0000FF0000000000) >> 8)\n               | ((x & (uint64_t)0x000000FF00000000) << 8)\n               | ((x & (uint64_t)0xF000000000000000) >> 12)\n               | ((x & (uint64_t)0x0FFF000000000000) << 4);\n    }\n}\n\nstatic inline uint64_t rotr32(uint64_t x)\n{\n    return (x << 32) | (x >> 32);\n}\n\nstatic inline void mix_columns(uint64_t *q)\n{\n    uint64_t q0, q1, q2, q3, q4, q5, q6, q7;\n    uint64_t r0, r1, r2, r3, r4, r5, r6, r7;\n\n    q0 = q[0];\n    q1 = q[1];\n    q2 = q[2];\n    q3 = q[3];\n    q4 = q[4];\n    q5 = q[5];\n    q6 = q[6];\n    q7 = q[7];\n    r0 = (q0 >> 16) | (q0 << 48);\n    r1 = (q1 >> 16) | (q1 << 48);\n    r2 = (q2 >> 16) | (q2 << 48);\n    r3 = (q3 >> 16) | (q3 << 48);\n    r4 = (q4 >> 16) | (q4 << 48);\n    r5 = (q5 >> 16) | (q5 << 48);\n    r6 = (q6 >> 16) | (q6 << 48);\n    r7 = (q7 >> 16) | (q7 << 48);\n\n    q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);\n    q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);\n    q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);\n    q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);\n    q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);\n    q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);\n    q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);\n    q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);\n}\n\nstatic void interleave_constant(uint64_t *out, const unsigned char *in)\n{\n    uint32_t tmp_32_constant[16];\n    int i;\n\n    br_range_dec32le(tmp_32_constant, 16, in);\n    for (i = 0; i < 4; i++) {\n        br_aes_ct64_interleave_in(&out[i], &out[i + 4], tmp_32_constant + (i << 2));\n    }\n    br_aes_ct64_ortho(out);\n}\n\nstatic void interleave_constant32(uint32_t *out, const unsigned char *in)\n{\n    int i;\n    for (i = 0; i < 4; i++) {\n        out[2*i] = br_dec32le(in + 4*i);\n        out[2*i + 1] = br_dec32le(in + 4*i + 16);\n    }\n    br_aes_ct_ortho(out);\n}\n\nvoid tweak_constants(spx_ctx *ctx)\n{\n    unsigned char buf[40*16];\n    int i;\n\n    /* Use the standard constants to generate tweaked ones. */\n    memcpy((uint8_t *)ctx->tweaked512_rc64, (uint8_t *)haraka512_rc64, 40*16);\n\n    /* Constants for pk.seed */\n    haraka_S(buf, 40*16, ctx->pub_seed, SPX_N, ctx);\n    for (i = 0; i < 10; i++) {\n        interleave_constant32(ctx->tweaked256_rc32[i], buf + 32*i);\n        interleave_constant(ctx->tweaked512_rc64[i], buf + 64*i);\n    }\n}\n\nstatic void haraka_S_absorb(unsigned char *s, unsigned int r,\n                            const unsigned char *m, unsigned long long mlen,\n                            unsigned char p, const spx_ctx *ctx)\n{\n    unsigned long long i;\n    SPX_VLA(uint8_t, t, r);\n\n    while (mlen >= r) {\n        /* XOR block to state */\n        for (i = 0; i < r; ++i) {\n            s[i] ^= m[i];\n        }\n        haraka512_perm(s, s, ctx);\n        mlen -= r;\n        m += r;\n    }\n\n    for (i = 0; i < r; ++i) {\n        t[i] = 0;\n    }\n    for (i = 0; i < mlen; ++i) {\n        t[i] = m[i];\n    }\n    t[i] = p;\n    t[r - 1] |= 128;\n    for (i = 0; i < r; ++i) {\n        s[i] ^= t[i];\n    }\n}\n\nstatic void haraka_S_squeezeblocks(unsigned char *h, unsigned long long nblocks,\n                                   unsigned char *s, unsigned int r,\n                                   const spx_ctx *ctx)\n{\n    while (nblocks > 0) {\n        haraka512_perm(s, s, ctx);\n        memcpy(h, s, HARAKAS_RATE);\n        h += r;\n        nblocks--;\n    }\n}\n\nvoid haraka_S_inc_init(uint8_t *s_inc)\n{\n    size_t i;\n\n    for (i = 0; i < 64; i++) {\n        s_inc[i] = 0;\n    }\n    s_inc[64] = 0;\n}\n\nvoid haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen,\n        const spx_ctx *ctx)\n{\n    size_t i;\n\n    /* Recall that s_inc[64] is the non-absorbed bytes xored into the state */\n    while (mlen + s_inc[64] >= HARAKAS_RATE) {\n        for (i = 0; i < (size_t)(HARAKAS_RATE - s_inc[64]); i++) {\n            /* Take the i'th byte from message\n               xor with the s_inc[64] + i'th byte of the state */\n            s_inc[s_inc[64] + i] ^= m[i];\n        }\n        mlen -= (size_t)(HARAKAS_RATE - s_inc[64]);\n        m += HARAKAS_RATE - (uint8_t)s_inc[64];\n        s_inc[64] = 0;\n\n        haraka512_perm(s_inc, s_inc, ctx);\n    }\n\n    for (i = 0; i < mlen; i++) {\n        s_inc[s_inc[64] + i] ^= m[i];\n    }\n    s_inc[64] += (uint8_t)mlen;\n}\n\nvoid haraka_S_inc_finalize(uint8_t *s_inc)\n{\n    /* After haraka_S_inc_absorb, we are guaranteed that s_inc[64] < HARAKAS_RATE,\n       so we can always use one more byte for p in the current state. */\n    s_inc[s_inc[64]] ^= 0x1F;\n    s_inc[HARAKAS_RATE - 1] ^= 128;\n    s_inc[64] = 0;\n}\n\nvoid haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc,\n        const spx_ctx *ctx)\n{\n    size_t i;\n\n    /* First consume any bytes we still have sitting around */\n    for (i = 0; i < outlen && i < s_inc[64]; i++) {\n        /* There are s_inc[64] bytes left, so r - s_inc[64] is the first\n           available byte. We consume from there, i.e., up to r. */\n        out[i] = (uint8_t)s_inc[(HARAKAS_RATE - s_inc[64] + i)];\n    }\n    out += i;\n    outlen -= i;\n    s_inc[64] -= (uint8_t)i;\n\n    /* Then squeeze the remaining necessary blocks */\n    while (outlen > 0) {\n        haraka512_perm(s_inc, s_inc, ctx);\n\n        for (i = 0; i < outlen && i < HARAKAS_RATE; i++) {\n            out[i] = s_inc[i];\n        }\n        out += i;\n        outlen -= i;\n        s_inc[64] = (uint8_t)(HARAKAS_RATE - i);\n    }\n}\n\nvoid haraka_S(unsigned char *out, unsigned long long outlen,\n              const unsigned char *in, unsigned long long inlen,\n              const spx_ctx *ctx)\n{\n    unsigned long long i;\n    unsigned char s[64];\n    unsigned char d[32];\n\n    for (i = 0; i < 64; i++) {\n        s[i] = 0;\n    }\n    haraka_S_absorb(s, 32, in, inlen, 0x1F, ctx);\n\n    haraka_S_squeezeblocks(out, outlen / 32, s, 32, ctx);\n    out += (outlen / 32) * 32;\n\n    if (outlen % 32) {\n        haraka_S_squeezeblocks(d, 1, s, 32, ctx);\n        for (i = 0; i < outlen % 32; i++) {\n            out[i] = d[i];\n        }\n    }\n}\n\nvoid haraka512_perm(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx)\n{\n    uint32_t w[16];\n    uint64_t q[8], tmp_q;\n    unsigned int i, j;\n\n    br_range_dec32le(w, 16, in);\n    for (i = 0; i < 4; i++) {\n        br_aes_ct64_interleave_in(&q[i], &q[i + 4], w + (i << 2));\n    }\n    br_aes_ct64_ortho(q);\n\n    /* AES rounds */\n    for (i = 0; i < 5; i++) {\n        for (j = 0; j < 2; j++) {\n            br_aes_ct64_bitslice_Sbox(q);\n            shift_rows(q);\n            mix_columns(q);\n            add_round_key(q, ctx->tweaked512_rc64[2*i + j]);\n        }\n        /* Mix states */\n        for (j = 0; j < 8; j++) {\n            tmp_q = q[j];\n            q[j] = (tmp_q & 0x0001000100010001) << 5 |\n                   (tmp_q & 0x0002000200020002) << 12 |\n                   (tmp_q & 0x0004000400040004) >> 1 |\n                   (tmp_q & 0x0008000800080008) << 6 |\n                   (tmp_q & 0x0020002000200020) << 9 |\n                   (tmp_q & 0x0040004000400040) >> 4 |\n                   (tmp_q & 0x0080008000800080) << 3 |\n                   (tmp_q & 0x2100210021002100) >> 5 |\n                   (tmp_q & 0x0210021002100210) << 2 |\n                   (tmp_q & 0x0800080008000800) << 4 |\n                   (tmp_q & 0x1000100010001000) >> 12 |\n                   (tmp_q & 0x4000400040004000) >> 10 |\n                   (tmp_q & 0x8400840084008400) >> 3;\n        }\n    }\n\n    br_aes_ct64_ortho(q);\n    for (i = 0; i < 4; i ++) {\n        br_aes_ct64_interleave_out(w + (i << 2), q[i], q[i + 4]);\n    }\n    br_range_enc32le(out, w, 16);\n}\n\nvoid haraka512(unsigned char *out, const unsigned char *in, const spx_ctx *ctx)\n{\n    int i;\n\n    unsigned char buf[64];\n\n    haraka512_perm(buf, in, ctx);\n    /* Feed-forward */\n    for (i = 0; i < 64; i++) {\n        buf[i] = buf[i] ^ in[i];\n    }\n\n    /* Truncated */\n    memcpy(out,      buf + 8, 8);\n    memcpy(out + 8,  buf + 24, 8);\n    memcpy(out + 16, buf + 32, 8);\n    memcpy(out + 24, buf + 48, 8);\n}\n\n\nvoid haraka256(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx)\n{\n    uint32_t q[8], tmp_q;\n    int i, j;\n\n    for (i = 0; i < 4; i++) {\n        q[2*i] = br_dec32le(in + 4*i);\n        q[2*i + 1] = br_dec32le(in + 4*i + 16);\n    }\n    br_aes_ct_ortho(q);\n\n    /* AES rounds */\n    for (i = 0; i < 5; i++) {\n        for (j = 0; j < 2; j++) {\n            br_aes_ct_bitslice_Sbox(q);\n            shift_rows32(q);\n            mix_columns32(q);\n            add_round_key32(q, ctx->tweaked256_rc32[2*i + j]);\n        }\n\n        /* Mix states */\n        for (j = 0; j < 8; j++) {\n            tmp_q = q[j];\n            q[j] = (tmp_q & 0x81818181) |\n                   (tmp_q & 0x02020202) << 1 |\n                   (tmp_q & 0x04040404) << 2 |\n                   (tmp_q & 0x08080808) << 3 |\n                   (tmp_q & 0x10101010) >> 3 |\n                   (tmp_q & 0x20202020) >> 2 |\n                   (tmp_q & 0x40404040) >> 1;\n        }\n    }\n\n    br_aes_ct_ortho(q);\n    for (i = 0; i < 4; i++) {\n        br_enc32le(out + 4*i, q[2*i]);\n        br_enc32le(out + 4*i + 16, q[2*i + 1]);\n    }\n\n    for (i = 0; i < 32; i++) {\n        out[i] ^= in[i];\n    }\n}\n"
  },
  {
    "path": "ref/haraka.h",
    "content": "#ifndef SPX_HARAKA_H\n#define SPX_HARAKA_H\n\n#include \"context.h\"\n\n/* Tweak constants with seed */\n#define tweak_constants SPX_NAMESPACE(tweak_constants)\nvoid tweak_constants(spx_ctx *ctx);\n\n/* Haraka Sponge */\n#define haraka_S_inc_init SPX_NAMESPACE(haraka_S_inc_init)\nvoid haraka_S_inc_init(uint8_t *s_inc);\n#define haraka_S_inc_absorb SPX_NAMESPACE(haraka_S_inc_absorb)\nvoid haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen,\n        const spx_ctx *ctx);\n#define haraka_S_inc_finalize SPX_NAMESPACE(haraka_S_inc_finalize)\nvoid haraka_S_inc_finalize(uint8_t *s_inc);\n#define haraka_S_inc_squeeze SPX_NAMESPACE(haraka_S_inc_squeeze)\nvoid haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc,\n        const spx_ctx *ctx);\n#define haraka_S SPX_NAMESPACE(haraka_S)\nvoid haraka_S(unsigned char *out, unsigned long long outlen,\n              const unsigned char *in, unsigned long long inlen,\n              const spx_ctx *ctx);\n\n/* Applies the 512-bit Haraka permutation to in. */\n#define haraka512_perm SPX_NAMESPACE(haraka512_perm)\nvoid haraka512_perm(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx);\n\n/* Implementation of Haraka-512 */\n#define haraka512 SPX_NAMESPACE(haraka512)\nvoid haraka512(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx);\n\n/* Implementation of Haraka-256 */\n#define haraka256 SPX_NAMESPACE(haraka256)\nvoid haraka256(unsigned char *out, const unsigned char *in,\n        const spx_ctx *ctx);\n\n#endif\n"
  },
  {
    "path": "ref/haraka_offsets.h",
    "content": "#if !defined( HARAKA_OFFSETS_H_ )\n#define HARAKA_OFFSETS_H_\n\n/*\n * Offsets of various fields in the address structure when we use Haraka as\n * the Sphincs+ hash function\n */\n\n#define SPX_OFFSET_LAYER     3   /* The byte used to specify the Merkle tree layer */\n#define SPX_OFFSET_TREE      8   /* The start of the 8 byte field used to specify the tree */\n#define SPX_OFFSET_TYPE      19  /* The byte used to specify the hash type (reason) */\n#define SPX_OFFSET_KP_ADDR   20  /* The start of the 4 byte field used to specify the key pair address */\n#define SPX_OFFSET_CHAIN_ADDR 27  /* The byte used to specify the chain address (which Winternitz chain) */\n#define SPX_OFFSET_HASH_ADDR 31  /* The byte used to specify the hash address (where in the Winternitz chain) */\n#define SPX_OFFSET_TREE_HGT  27  /* The byte used to specify the height of this node in the FORS or Merkle tree */\n#define SPX_OFFSET_TREE_INDEX 28 /* The start of the 4 byte field used to specify the node in the FORS or Merkle tree */\n\n#define SPX_HARAKA 1\n\n#endif /* HARAKA_OFFSETS_H_ */\n"
  },
  {
    "path": "ref/hash.h",
    "content": "#ifndef SPX_HASH_H\n#define SPX_HASH_H\n\n#include <stdint.h>\n#include \"context.h\"\n#include \"params.h\"\n\n#define initialize_hash_function SPX_NAMESPACE(initialize_hash_function)\nvoid initialize_hash_function(spx_ctx *ctx);\n\n#define prf_addr SPX_NAMESPACE(prf_addr)\nvoid prf_addr(unsigned char *out, const spx_ctx *ctx,\n              const uint32_t addr[8]);\n\n#define gen_message_random SPX_NAMESPACE(gen_message_random)\nvoid gen_message_random(unsigned char *R, const unsigned char *sk_prf,\n                        const unsigned char *optrand,\n                        const unsigned char *m, unsigned long long mlen,\n                        const spx_ctx *ctx);\n\n#define hash_message SPX_NAMESPACE(hash_message)\nvoid hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx,\n                  const unsigned char *R, const unsigned char *pk,\n                  const unsigned char *m, unsigned long long mlen,\n                  const spx_ctx *ctx);\n\n#endif\n"
  },
  {
    "path": "ref/hash_haraka.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"utils.h\"\n#include \"params.h\"\n\n#include \"haraka.h\"\n#include \"hash.h\"\n\nvoid initialize_hash_function(spx_ctx* ctx)\n{\n    tweak_constants(ctx);\n}\n\n/*\n * Computes PRF(key, addr), given a secret key of SPX_N bytes and an address\n */\nvoid prf_addr(unsigned char *out, const spx_ctx *ctx,\n              const uint32_t addr[8])\n{\n    /* Since SPX_N may be smaller than 32, we need temporary buffers. */\n    unsigned char outbuf[32];\n    unsigned char buf[64] = {0};\n\n    memcpy(buf, addr, SPX_ADDR_BYTES);\n    memcpy(buf + SPX_ADDR_BYTES, ctx->sk_seed, SPX_N);\n\n    haraka512(outbuf, (const void *)buf, ctx);\n    memcpy(out, outbuf, SPX_N);\n}\n\n/**\n * Computes the message-dependent randomness R, using a secret seed and an\n * optional randomization value as well as the message.\n */\nvoid gen_message_random(unsigned char *R, const unsigned char* sk_prf,\n                        const unsigned char *optrand,\n                        const unsigned char *m, unsigned long long mlen,\n                        const spx_ctx *ctx)\n{\n    uint8_t s_inc[65];\n\n    haraka_S_inc_init(s_inc);\n    haraka_S_inc_absorb(s_inc, sk_prf, SPX_N, ctx);\n    haraka_S_inc_absorb(s_inc, optrand, SPX_N, ctx);\n    haraka_S_inc_absorb(s_inc, m, mlen, ctx);\n    haraka_S_inc_finalize(s_inc);\n    haraka_S_inc_squeeze(R, SPX_N, s_inc, ctx);\n}\n\n/**\n * Computes the message hash using R, the public key, and the message.\n * Outputs the message digest and the index of the leaf. The index is split in\n * the tree index and the leaf index, for convenient copying to an address.\n */\nvoid hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx,\n                  const unsigned char *R, const unsigned char *pk,\n                  const unsigned char *m, unsigned long long mlen,\n                  const spx_ctx *ctx)\n{\n#define SPX_TREE_BITS (SPX_TREE_HEIGHT * (SPX_D - 1))\n#define SPX_TREE_BYTES ((SPX_TREE_BITS + 7) / 8)\n#define SPX_LEAF_BITS SPX_TREE_HEIGHT\n#define SPX_LEAF_BYTES ((SPX_LEAF_BITS + 7) / 8)\n#define SPX_DGST_BYTES (SPX_FORS_MSG_BYTES + SPX_TREE_BYTES + SPX_LEAF_BYTES)\n\n    unsigned char buf[SPX_DGST_BYTES];\n    unsigned char *bufp = buf;\n    uint8_t s_inc[65];\n\n    haraka_S_inc_init(s_inc);\n    haraka_S_inc_absorb(s_inc, R, SPX_N, ctx);\n    haraka_S_inc_absorb(s_inc, pk + SPX_N, SPX_N, ctx); // Only absorb root part of pk\n    haraka_S_inc_absorb(s_inc, m, mlen, ctx);\n    haraka_S_inc_finalize(s_inc);\n    haraka_S_inc_squeeze(buf, SPX_DGST_BYTES, s_inc, ctx);\n\n    memcpy(digest, bufp, SPX_FORS_MSG_BYTES);\n    bufp += SPX_FORS_MSG_BYTES;\n\n#if SPX_TREE_BITS > 64\n    #error For given height and depth, 64 bits cannot represent all subtrees\n#endif\n\n    if (SPX_D == 1) {\n\t*tree = 0;\n    } else {\n        *tree = bytes_to_ull(bufp, SPX_TREE_BYTES);\n        *tree &= (~(uint64_t)0) >> (64 - SPX_TREE_BITS);\n    }\n    bufp += SPX_TREE_BYTES;\n\n    *leaf_idx = (uint32_t)bytes_to_ull(bufp, SPX_LEAF_BYTES);\n    *leaf_idx &= (~(uint32_t)0) >> (32 - SPX_LEAF_BITS);\n}\n"
  },
  {
    "path": "ref/hash_sha2.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"utils.h\"\n#include \"params.h\"\n#include \"hash.h\"\n#include \"sha2.h\"\n\n#if SPX_N >= 24\n#define SPX_SHAX_OUTPUT_BYTES SPX_SHA512_OUTPUT_BYTES\n#define SPX_SHAX_BLOCK_BYTES SPX_SHA512_BLOCK_BYTES\n#define shaX_inc_init sha512_inc_init\n#define shaX_inc_blocks sha512_inc_blocks\n#define shaX_inc_finalize sha512_inc_finalize\n#define shaX sha512\n#define mgf1_X mgf1_512\n#else\n#define SPX_SHAX_OUTPUT_BYTES SPX_SHA256_OUTPUT_BYTES\n#define SPX_SHAX_BLOCK_BYTES SPX_SHA256_BLOCK_BYTES\n#define shaX_inc_init sha256_inc_init\n#define shaX_inc_blocks sha256_inc_blocks\n#define shaX_inc_finalize sha256_inc_finalize\n#define shaX sha256\n#define mgf1_X mgf1_256\n#endif\n\n\n/* For SHA, there is no immediate reason to initialize at the start,\n   so this function is an empty operation. */\nvoid initialize_hash_function(spx_ctx *ctx)\n{\n    seed_state(ctx);\n}\n\n/*\n * Computes PRF(pk_seed, sk_seed, addr).\n */\nvoid prf_addr(unsigned char *out, const spx_ctx *ctx,\n              const uint32_t addr[8])\n{\n    uint8_t sha2_state[40];\n    unsigned char buf[SPX_SHA256_ADDR_BYTES + SPX_N];\n    unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES];\n\n    /* Retrieve precomputed state containing pub_seed */\n    memcpy(sha2_state, ctx->state_seeded, 40 * sizeof(uint8_t));\n\n    /* Remainder: ADDR^c ‖ SK.seed */\n    memcpy(buf, addr, SPX_SHA256_ADDR_BYTES);\n    memcpy(buf + SPX_SHA256_ADDR_BYTES, ctx->sk_seed, SPX_N);\n\n    sha256_inc_finalize(outbuf, sha2_state, buf, SPX_SHA256_ADDR_BYTES + SPX_N);\n\n    memcpy(out, outbuf, SPX_N);\n}\n\n/**\n * Computes the message-dependent randomness R, using a secret seed as a key\n * for HMAC, and an optional randomization value prefixed to the message.\n * This requires m to have at least SPX_SHAX_BLOCK_BYTES + SPX_N space\n * available in front of the pointer, i.e. before the message to use for the\n * prefix. This is necessary to prevent having to move the message around (and\n * allocate memory for it).\n */\nvoid gen_message_random(unsigned char *R, const unsigned char *sk_prf,\n                        const unsigned char *optrand,\n                        const unsigned char *m, unsigned long long mlen,\n                        const spx_ctx *ctx)\n{\n    (void)ctx;\n\n    unsigned char buf[SPX_SHAX_BLOCK_BYTES + SPX_SHAX_OUTPUT_BYTES];\n    uint8_t state[8 + SPX_SHAX_OUTPUT_BYTES];\n    int i;\n\n#if SPX_N > SPX_SHAX_BLOCK_BYTES\n    #error \"Currently only supports SPX_N of at most SPX_SHAX_BLOCK_BYTES\"\n#endif\n\n    /* This implements HMAC-SHA */\n    for (i = 0; i < SPX_N; i++) {\n        buf[i] = 0x36 ^ sk_prf[i];\n    }\n    memset(buf + SPX_N, 0x36, SPX_SHAX_BLOCK_BYTES - SPX_N);\n\n    shaX_inc_init(state);\n    shaX_inc_blocks(state, buf, 1);\n\n    memcpy(buf, optrand, SPX_N);\n\n    /* If optrand + message cannot fill up an entire block */\n    if (SPX_N + mlen < SPX_SHAX_BLOCK_BYTES) {\n        memcpy(buf + SPX_N, m, mlen);\n        shaX_inc_finalize(buf + SPX_SHAX_BLOCK_BYTES, state,\n                            buf, mlen + SPX_N);\n    }\n    /* Otherwise first fill a block, so that finalize only uses the message */\n    else {\n        memcpy(buf + SPX_N, m, SPX_SHAX_BLOCK_BYTES - SPX_N);\n        shaX_inc_blocks(state, buf, 1);\n\n        m += SPX_SHAX_BLOCK_BYTES - SPX_N;\n        mlen -= SPX_SHAX_BLOCK_BYTES - SPX_N;\n        shaX_inc_finalize(buf + SPX_SHAX_BLOCK_BYTES, state, m, mlen);\n    }\n\n    for (i = 0; i < SPX_N; i++) {\n        buf[i] = 0x5c ^ sk_prf[i];\n    }\n    memset(buf + SPX_N, 0x5c, SPX_SHAX_BLOCK_BYTES - SPX_N);\n\n    shaX(buf, buf, SPX_SHAX_BLOCK_BYTES + SPX_SHAX_OUTPUT_BYTES);\n    memcpy(R, buf, SPX_N);\n}\n\n/**\n * Computes the message hash using R, the public key, and the message.\n * Outputs the message digest and the index of the leaf. The index is split in\n * the tree index and the leaf index, for convenient copying to an address.\n */\nvoid hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx,\n                  const unsigned char *R, const unsigned char *pk,\n                  const unsigned char *m, unsigned long long mlen,\n                  const spx_ctx *ctx)\n{\n    (void)ctx;\n#define SPX_TREE_BITS (SPX_TREE_HEIGHT * (SPX_D - 1))\n#define SPX_TREE_BYTES ((SPX_TREE_BITS + 7) / 8)\n#define SPX_LEAF_BITS SPX_TREE_HEIGHT\n#define SPX_LEAF_BYTES ((SPX_LEAF_BITS + 7) / 8)\n#define SPX_DGST_BYTES (SPX_FORS_MSG_BYTES + SPX_TREE_BYTES + SPX_LEAF_BYTES)\n\n    unsigned char seed[2*SPX_N + SPX_SHAX_OUTPUT_BYTES];\n\n    /* Round to nearest multiple of SPX_SHAX_BLOCK_BYTES */\n#if (SPX_SHAX_BLOCK_BYTES & (SPX_SHAX_BLOCK_BYTES-1)) != 0\n    #error \"Assumes that SPX_SHAX_BLOCK_BYTES is a power of 2\"\n#endif\n#define SPX_INBLOCKS (((SPX_N + SPX_PK_BYTES + SPX_SHAX_BLOCK_BYTES - 1) & \\\n                        -SPX_SHAX_BLOCK_BYTES) / SPX_SHAX_BLOCK_BYTES)\n    unsigned char inbuf[SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES];\n\n    unsigned char buf[SPX_DGST_BYTES];\n    unsigned char *bufp = buf;\n    uint8_t state[8 + SPX_SHAX_OUTPUT_BYTES];\n\n    shaX_inc_init(state);\n\n    // seed: SHA-X(R ‖ PK.seed ‖ PK.root ‖ M)\n    memcpy(inbuf, R, SPX_N);\n    memcpy(inbuf + SPX_N, pk, SPX_PK_BYTES);\n\n    /* If R + pk + message cannot fill up an entire block */\n    if (SPX_N + SPX_PK_BYTES + mlen < SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES) {\n        memcpy(inbuf + SPX_N + SPX_PK_BYTES, m, mlen);\n        shaX_inc_finalize(seed + 2*SPX_N, state, inbuf, SPX_N + SPX_PK_BYTES + mlen);\n    }\n    /* Otherwise first fill a block, so that finalize only uses the message */\n    else {\n        memcpy(inbuf + SPX_N + SPX_PK_BYTES, m,\n               SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES - SPX_N - SPX_PK_BYTES);\n        shaX_inc_blocks(state, inbuf, SPX_INBLOCKS);\n\n        m += SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES - SPX_N - SPX_PK_BYTES;\n        mlen -= SPX_INBLOCKS * SPX_SHAX_BLOCK_BYTES - SPX_N - SPX_PK_BYTES;\n        shaX_inc_finalize(seed + 2*SPX_N, state, m, mlen);\n    }\n\n    // H_msg: MGF1-SHA-X(R ‖ PK.seed ‖ seed)\n    memcpy(seed, R, SPX_N);\n    memcpy(seed + SPX_N, pk, SPX_N);\n\n    /* By doing this in two steps, we prevent hashing the message twice;\n       otherwise each iteration in MGF1 would hash the message again. */\n    mgf1_X(bufp, SPX_DGST_BYTES, seed, 2*SPX_N + SPX_SHAX_OUTPUT_BYTES);\n\n    memcpy(digest, bufp, SPX_FORS_MSG_BYTES);\n    bufp += SPX_FORS_MSG_BYTES;\n\n#if SPX_TREE_BITS > 64\n    #error For given height and depth, 64 bits cannot represent all subtrees\n#endif\n\n    if (SPX_D == 1) {\n\t*tree = 0;\n    } else {\n        *tree = bytes_to_ull(bufp, SPX_TREE_BYTES);\n        *tree &= (~(uint64_t)0) >> (64 - SPX_TREE_BITS);\n    }\n    bufp += SPX_TREE_BYTES;\n\n    *leaf_idx = (uint32_t)bytes_to_ull(bufp, SPX_LEAF_BYTES);\n    *leaf_idx &= (~(uint32_t)0) >> (32 - SPX_LEAF_BITS);\n}\n\n\n"
  },
  {
    "path": "ref/hash_shake.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"utils.h\"\n#include \"params.h\"\n#include \"hash.h\"\n#include \"fips202.h\"\n\n/* For SHAKE256, there is no immediate reason to initialize at the start,\n   so this function is an empty operation. */\nvoid initialize_hash_function(spx_ctx* ctx)\n{\n    (void)ctx; /* Suppress an 'unused parameter' warning. */\n}\n\n/*\n * Computes PRF(pk_seed, sk_seed, addr)\n */\nvoid prf_addr(unsigned char *out, const spx_ctx *ctx,\n              const uint32_t addr[8])\n{\n    unsigned char buf[2*SPX_N + SPX_ADDR_BYTES];\n\n    memcpy(buf, ctx->pub_seed, SPX_N);\n    memcpy(buf + SPX_N, addr, SPX_ADDR_BYTES);\n    memcpy(buf + SPX_N + SPX_ADDR_BYTES, ctx->sk_seed, SPX_N);\n\n    shake256(out, SPX_N, buf, 2*SPX_N + SPX_ADDR_BYTES);\n}\n\n/**\n * Computes the message-dependent randomness R, using a secret seed and an\n * optional randomization value as well as the message.\n */\nvoid gen_message_random(unsigned char *R, const unsigned char *sk_prf,\n                        const unsigned char *optrand,\n                        const unsigned char *m, unsigned long long mlen,\n                        const spx_ctx *ctx)\n{\n    (void)ctx;\n    uint64_t s_inc[26];\n\n    shake256_inc_init(s_inc);\n    shake256_inc_absorb(s_inc, sk_prf, SPX_N);\n    shake256_inc_absorb(s_inc, optrand, SPX_N);\n    shake256_inc_absorb(s_inc, m, mlen);\n    shake256_inc_finalize(s_inc);\n    shake256_inc_squeeze(R, SPX_N, s_inc);\n}\n\n/**\n * Computes the message hash using R, the public key, and the message.\n * Outputs the message digest and the index of the leaf. The index is split in\n * the tree index and the leaf index, for convenient copying to an address.\n */\nvoid hash_message(unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx,\n                  const unsigned char *R, const unsigned char *pk,\n                  const unsigned char *m, unsigned long long mlen,\n                  const spx_ctx *ctx)\n{\n    (void)ctx;\n#define SPX_TREE_BITS (SPX_TREE_HEIGHT * (SPX_D - 1))\n#define SPX_TREE_BYTES ((SPX_TREE_BITS + 7) / 8)\n#define SPX_LEAF_BITS SPX_TREE_HEIGHT\n#define SPX_LEAF_BYTES ((SPX_LEAF_BITS + 7) / 8)\n#define SPX_DGST_BYTES (SPX_FORS_MSG_BYTES + SPX_TREE_BYTES + SPX_LEAF_BYTES)\n\n    unsigned char buf[SPX_DGST_BYTES];\n    unsigned char *bufp = buf;\n    uint64_t s_inc[26];\n\n    shake256_inc_init(s_inc);\n    shake256_inc_absorb(s_inc, R, SPX_N);\n    shake256_inc_absorb(s_inc, pk, SPX_PK_BYTES);\n    shake256_inc_absorb(s_inc, m, mlen);\n    shake256_inc_finalize(s_inc);\n    shake256_inc_squeeze(buf, SPX_DGST_BYTES, s_inc);\n\n    memcpy(digest, bufp, SPX_FORS_MSG_BYTES);\n    bufp += SPX_FORS_MSG_BYTES;\n\n#if SPX_TREE_BITS > 64\n    #error For given height and depth, 64 bits cannot represent all subtrees\n#endif\n\n    if (SPX_D == 1) {\n        *tree = 0;\n    } else {\n        *tree = bytes_to_ull(bufp, SPX_TREE_BYTES);\n        *tree &= (~(uint64_t)0) >> (64 - SPX_TREE_BITS);\n    }\n    bufp += SPX_TREE_BYTES;\n\n    *leaf_idx = (uint32_t)bytes_to_ull(bufp, SPX_LEAF_BYTES);\n    *leaf_idx &= (~(uint32_t)0) >> (32 - SPX_LEAF_BITS);\n}\n"
  },
  {
    "path": "ref/merkle.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx1.h\"\n#include \"wots.h\"\n#include \"wotsx1.h\"\n#include \"merkle.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n/*\n * This generates a Merkle signature (WOTS signature followed by the Merkle\n * authentication path).  This is in this file because most of the complexity\n * is involved with the WOTS signature; the Merkle authentication path logic\n * is mostly hidden in treehashx4\n */\nvoid merkle_sign(uint8_t *sig, unsigned char *root,\n                 const spx_ctx *ctx,\n                 uint32_t wots_addr[8], uint32_t tree_addr[8],\n                 uint32_t idx_leaf)\n{\n    unsigned char *auth_path = sig + SPX_WOTS_BYTES;\n    struct leaf_info_x1 info = { 0 };\n    unsigned steps[ SPX_WOTS_LEN ];\n\n    info.wots_sig = sig;\n    chain_lengths(steps, root);\n    info.wots_steps = steps;\n\n    set_type(&tree_addr[0], SPX_ADDR_TYPE_HASHTREE);\n    set_type(&info.pk_addr[0], SPX_ADDR_TYPE_WOTSPK);\n    copy_subtree_addr(&info.leaf_addr[0], wots_addr);\n    copy_subtree_addr(&info.pk_addr[0], wots_addr);\n\n    info.wots_sign_leaf = idx_leaf;\n\n    treehashx1(root, auth_path, ctx,\n                idx_leaf, 0,\n                SPX_TREE_HEIGHT,\n                wots_gen_leafx1,\n                tree_addr, &info);\n}\n\n/* Compute root node of the top-most subtree. */\nvoid merkle_gen_root(unsigned char *root, const spx_ctx *ctx)\n{\n    /* We do not need the auth path in key generation, but it simplifies the\n       code to have just one treehash routine that computes both root and path\n       in one function. */\n    unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES];\n    uint32_t top_tree_addr[8] = {0};\n    uint32_t wots_addr[8] = {0};\n\n    set_layer_addr(top_tree_addr, SPX_D - 1);\n    set_layer_addr(wots_addr, SPX_D - 1);\n\n    merkle_sign(auth_path, root, ctx,\n                wots_addr, top_tree_addr,\n                (uint32_t)~0 /* ~0 means \"don't bother generating an auth path */ );\n}\n"
  },
  {
    "path": "ref/merkle.h",
    "content": "#if !defined( MERKLE_H_ )\n#define MERKLE_H_\n\n#include <stdint.h>\n\n/* Generate a Merkle signature (WOTS signature followed by the Merkle */\n/* authentication path) */\n#define merkle_sign SPX_NAMESPACE(merkle_sign)\nvoid merkle_sign(uint8_t *sig, unsigned char *root,\n        const spx_ctx* ctx,\n        uint32_t wots_addr[8], uint32_t tree_addr[8],\n        uint32_t idx_leaf);\n\n/* Compute the root node of the top-most subtree. */\n#define merkle_gen_root SPX_NAMESPACE(merkle_gen_root)\nvoid merkle_gen_root(unsigned char *root, const spx_ctx* ctx);\n\n#endif /* MERKLE_H_ */\n"
  },
  {
    "path": "ref/params/params-sphincs-haraka-128f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 16\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 66\n/* Number of subtree layer. */\n#define SPX_D 22\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 6\n#define SPX_FORS_TREES 33\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../haraka_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-haraka-128s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 16\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 63\n/* Number of subtree layer. */\n#define SPX_D 7\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 12\n#define SPX_FORS_TREES 14\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../haraka_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-haraka-192f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 24\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 66\n/* Number of subtree layer. */\n#define SPX_D 22\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 8\n#define SPX_FORS_TREES 33\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../haraka_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-haraka-192s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 24\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 63\n/* Number of subtree layer. */\n#define SPX_D 7\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 14\n#define SPX_FORS_TREES 17\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../haraka_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-haraka-256f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 32\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 68\n/* Number of subtree layer. */\n#define SPX_D 17\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 9\n#define SPX_FORS_TREES 35\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../haraka_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-haraka-256s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 32\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 64\n/* Number of subtree layer. */\n#define SPX_D 8\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 14\n#define SPX_FORS_TREES 22\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../haraka_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-sha2-128f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 16\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 66\n/* Number of subtree layer. */\n#define SPX_D 22\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 6\n#define SPX_FORS_TREES 33\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* This is a SHA2-based parameter set, hence whether we use SHA-256\n * exclusively or we use both SHA-256 and SHA-512 is controlled by\n * the following #define */\n#define SPX_SHA512 0  /* Use SHA-256 for all hashes */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../sha2_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-sha2-128s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 16\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 63\n/* Number of subtree layer. */\n#define SPX_D 7\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 12\n#define SPX_FORS_TREES 14\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* This is a SHA2-based parameter set, hence whether we use SHA-256\n * exclusively or we use both SHA-256 and SHA-512 is controlled by\n * the following #define */\n#define SPX_SHA512 0  /* Use SHA-256 for all hashes */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../sha2_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-sha2-192f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 24\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 66\n/* Number of subtree layer. */\n#define SPX_D 22\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 8\n#define SPX_FORS_TREES 33\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* This is a SHA2-based parameter set, hence whether we use SHA-256\n * exclusively or we use both SHA-256 and SHA-512 is controlled by\n * the following #define */\n#define SPX_SHA512 1  /* Use SHA-512 for H and T_l, l >= 2 */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../sha2_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-sha2-192s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 24\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 63\n/* Number of subtree layer. */\n#define SPX_D 7\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 14\n#define SPX_FORS_TREES 17\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* This is a SHA2-based parameter set, hence whether we use SHA-256\n * exclusively or we use both SHA-256 and SHA-512 is controlled by\n * the following #define */\n#define SPX_SHA512 1  /* Use SHA-512 for H and T_l, l >= 2 */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../sha2_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-sha2-256f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 32\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 68\n/* Number of subtree layer. */\n#define SPX_D 17\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 9\n#define SPX_FORS_TREES 35\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* This is a SHA2-based parameter set, hence whether we use SHA-256\n * exclusively or we use both SHA-256 and SHA-512 is controlled by\n * the following #define */\n#define SPX_SHA512 1  /* Use SHA-512 for H and T_l, l >= 2 */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../sha2_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-sha2-256s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 32\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 64\n/* Number of subtree layer. */\n#define SPX_D 8\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 14\n#define SPX_FORS_TREES 22\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* This is a SHA2-based parameter set, hence whether we use SHA-256\n * exclusively or we use both SHA-256 and SHA-512 is controlled by\n * the following #define */\n#define SPX_SHA512 1  /* Use SHA-512 for H and T_l, l >= 2 */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../sha2_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-shake-128f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 16\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 66\n/* Number of subtree layer. */\n#define SPX_D 22\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 6\n#define SPX_FORS_TREES 33\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../shake_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-shake-128s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 16\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 63\n/* Number of subtree layer. */\n#define SPX_D 7\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 12\n#define SPX_FORS_TREES 14\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../shake_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-shake-192f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 24\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 66\n/* Number of subtree layer. */\n#define SPX_D 22\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 8\n#define SPX_FORS_TREES 33\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../shake_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-shake-192s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 24\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 63\n/* Number of subtree layer. */\n#define SPX_D 7\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 14\n#define SPX_FORS_TREES 17\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../shake_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-shake-256f.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 32\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 68\n/* Number of subtree layer. */\n#define SPX_D 17\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 9\n#define SPX_FORS_TREES 35\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../shake_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params/params-sphincs-shake-256s.h",
    "content": "#ifndef SPX_PARAMS_H\n#define SPX_PARAMS_H\n\n#define SPX_NAMESPACE(s) SPX_##s\n\n/* Hash output length in bytes. */\n#define SPX_N 32\n/* Height of the hypertree. */\n#define SPX_FULL_HEIGHT 64\n/* Number of subtree layer. */\n#define SPX_D 8\n/* FORS tree dimensions. */\n#define SPX_FORS_HEIGHT 14\n#define SPX_FORS_TREES 22\n/* Winternitz parameter, */\n#define SPX_WOTS_W 16\n\n/* The hash function is defined by linking a different hash.c file, as opposed\n   to setting a #define constant. */\n\n/* For clarity */\n#define SPX_ADDR_BYTES 32\n\n/* WOTS parameters. */\n#if SPX_WOTS_W == 256\n    #define SPX_WOTS_LOGW 8\n#elif SPX_WOTS_W == 16\n    #define SPX_WOTS_LOGW 4\n#else\n    #error SPX_WOTS_W assumed 16 or 256\n#endif\n\n#define SPX_WOTS_LEN1 (8 * SPX_N / SPX_WOTS_LOGW)\n\n/* SPX_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */\n#if SPX_WOTS_W == 256\n    #if SPX_N <= 1\n        #define SPX_WOTS_LEN2 1\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 2\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#elif SPX_WOTS_W == 16\n    #if SPX_N <= 8\n        #define SPX_WOTS_LEN2 2\n    #elif SPX_N <= 136\n        #define SPX_WOTS_LEN2 3\n    #elif SPX_N <= 256\n        #define SPX_WOTS_LEN2 4\n    #else\n        #error Did not precompute SPX_WOTS_LEN2 for n outside {2, .., 256}\n    #endif\n#endif\n\n#define SPX_WOTS_LEN (SPX_WOTS_LEN1 + SPX_WOTS_LEN2)\n#define SPX_WOTS_BYTES (SPX_WOTS_LEN * SPX_N)\n#define SPX_WOTS_PK_BYTES SPX_WOTS_BYTES\n\n/* Subtree size. */\n#define SPX_TREE_HEIGHT (SPX_FULL_HEIGHT / SPX_D)\n\n#if SPX_TREE_HEIGHT * SPX_D != SPX_FULL_HEIGHT\n    #error SPX_D should always divide SPX_FULL_HEIGHT\n#endif\n\n/* FORS parameters. */\n#define SPX_FORS_MSG_BYTES ((SPX_FORS_HEIGHT * SPX_FORS_TREES + 7) / 8)\n#define SPX_FORS_BYTES ((SPX_FORS_HEIGHT + 1) * SPX_FORS_TREES * SPX_N)\n#define SPX_FORS_PK_BYTES SPX_N\n\n/* Resulting SPX sizes. */\n#define SPX_BYTES (SPX_N + SPX_FORS_BYTES + SPX_D * SPX_WOTS_BYTES +\\\n                   SPX_FULL_HEIGHT * SPX_N)\n#define SPX_PK_BYTES (2 * SPX_N)\n#define SPX_SK_BYTES (2 * SPX_N + SPX_PK_BYTES)\n\n#include \"../shake_offsets.h\"\n\n#endif\n"
  },
  {
    "path": "ref/params.h",
    "content": "#define str(s) #s\n#define xstr(s) str(s)\n\n#include xstr(params/params-PARAMS.h)\n\n"
  },
  {
    "path": "ref/randombytes.c",
    "content": "/*\nThis code was taken from the SPHINCS reference implementation and is public domain.\n*/\n\n#include <fcntl.h>\n#include <unistd.h>\n\n#include \"randombytes.h\"\n\nstatic int fd = -1;\n\nvoid randombytes(unsigned char *x, unsigned long long xlen)\n{\n    unsigned long long i;\n\n    if (fd == -1) {\n        for (;;) {\n            fd = open(\"/dev/urandom\", O_RDONLY);\n            if (fd != -1) {\n                break;\n            }\n            sleep(1);\n        }\n    }\n\n    while (xlen > 0) {\n        if (xlen < 1048576) {\n            i = xlen;\n        }\n        else {\n            i = 1048576;\n        }\n\n        i = (unsigned long long)read(fd, x, i);\n        if (i < 1) {\n            sleep(1);\n            continue;\n        }\n\n        x += i;\n        xlen -= i;\n    }\n}\n"
  },
  {
    "path": "ref/randombytes.h",
    "content": "#ifndef SPX_RANDOMBYTES_H\n#define SPX_RANDOMBYTES_H\n\nextern void randombytes(unsigned char * x,unsigned long long xlen);\n\n#endif\n"
  },
  {
    "path": "ref/rng.c",
    "content": "//\n//  rng.c\n//\n//  Created by Bassham, Lawrence E (Fed) on 8/29/17.\n//  Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved.\n//\n\n#include <string.h>\n#include \"rng.h\"\n#include <openssl/conf.h>\n#include <openssl/evp.h>\n#include <openssl/err.h>\n\nAES256_CTR_DRBG_struct  DRBG_ctx;\n\nvoid    AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer);\n\n/*\n seedexpander_init()\n ctx            - stores the current state of an instance of the seed expander\n seed           - a 32 byte random value\n diversifier    - an 8 byte diversifier\n maxlen         - maximum number of bytes (less than 2**32) generated under this seed and diversifier\n */\nint\nseedexpander_init(AES_XOF_struct *ctx,\n                  unsigned char *seed,\n                  unsigned char *diversifier,\n                  unsigned long maxlen)\n{\n    if ( maxlen >= 0x100000000 )\n        return RNG_BAD_MAXLEN;\n    \n    ctx->length_remaining = maxlen;\n    \n    memcpy(ctx->key, seed, 32);\n    \n    memcpy(ctx->ctr, diversifier, 8);\n    ctx->ctr[11] = (unsigned char)(maxlen % 256);\n    maxlen >>= 8;\n    ctx->ctr[10] = (unsigned char)(maxlen % 256);\n    maxlen >>= 8;\n    ctx->ctr[9] = (unsigned char)(maxlen % 256);\n    maxlen >>= 8;\n    ctx->ctr[8] = (unsigned char)(maxlen % 256);\n    memset(ctx->ctr+12, 0x00, 4);\n    \n    ctx->buffer_pos = 16;\n    memset(ctx->buffer, 0x00, 16);\n    \n    return RNG_SUCCESS;\n}\n\n/*\n seedexpander()\n    ctx  - stores the current state of an instance of the seed expander\n    x    - returns the XOF data\n    xlen - number of bytes to return\n */\nint\nseedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen)\n{\n    unsigned long   offset;\n    \n    if ( x == NULL )\n        return RNG_BAD_OUTBUF;\n    if ( xlen >= ctx->length_remaining )\n        return RNG_BAD_REQ_LEN;\n    \n    ctx->length_remaining -= xlen;\n    \n    offset = 0;\n    while ( xlen > 0 ) {\n        if ( xlen <= (16-ctx->buffer_pos) ) { // buffer has what we need\n            memcpy(x+offset, ctx->buffer+ctx->buffer_pos, xlen);\n            ctx->buffer_pos += xlen;\n            \n            return RNG_SUCCESS;\n        }\n        \n        // take what's in the buffer\n        memcpy(x+offset, ctx->buffer+ctx->buffer_pos, 16-ctx->buffer_pos);\n        xlen -= 16-ctx->buffer_pos;\n        offset += 16-ctx->buffer_pos;\n        \n        AES256_ECB(ctx->key, ctx->ctr, ctx->buffer);\n        ctx->buffer_pos = 0;\n        \n        //increment the counter\n        for (int i=15; i>=12; i--) {\n            if ( ctx->ctr[i] == 0xff )\n                ctx->ctr[i] = 0x00;\n            else {\n                ctx->ctr[i]++;\n                break;\n            }\n        }\n        \n    }\n    \n    return RNG_SUCCESS;\n}\n\n\nstatic void handleErrors(void)\n{\n    ERR_print_errors_fp(stderr);\n    abort();\n}\n\n// Use whatever AES implementation you have. This uses AES from openSSL library\n//    key - 256-bit AES key\n//    ctr - a 128-bit plaintext value\n//    buffer - a 128-bit ciphertext value\nvoid\nAES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer)\n{\n    EVP_CIPHER_CTX *ctx;\n    \n    int len;\n    \n    /* Create and initialise the context */\n    if(!(ctx = EVP_CIPHER_CTX_new())) handleErrors();\n    \n    if(1 != EVP_EncryptInit_ex(ctx, EVP_aes_256_ecb(), NULL, key, NULL))\n        handleErrors();\n    \n    if(1 != EVP_EncryptUpdate(ctx, buffer, &len, ctr, 16))\n        handleErrors();\n    \n    /* Clean up */\n    EVP_CIPHER_CTX_free(ctx);\n}\n\nvoid\nrandombytes_init(unsigned char *entropy_input,\n                 unsigned char *personalization_string)\n{\n    unsigned char   seed_material[48];\n    \n    memcpy(seed_material, entropy_input, 48);\n    if (personalization_string)\n        for (int i=0; i<48; i++)\n            seed_material[i] ^= personalization_string[i];\n    memset(DRBG_ctx.Key, 0x00, 32);\n    memset(DRBG_ctx.V, 0x00, 16);\n    AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V);\n    DRBG_ctx.reseed_counter = 1;\n}\n\nint\nrandombytes(unsigned char *x, unsigned long long xlen)\n{\n    unsigned char   block[16];\n    int             i = 0;\n    \n    while ( xlen > 0 ) {\n        //increment V\n        for (int j=15; j>=0; j--) {\n            if ( DRBG_ctx.V[j] == 0xff )\n                DRBG_ctx.V[j] = 0x00;\n            else {\n                DRBG_ctx.V[j]++;\n                break;\n            }\n        }\n        AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block);\n        if ( xlen > 15 ) {\n            memcpy(x+i, block, 16);\n            i += 16;\n            xlen -= 16;\n        }\n        else {\n            memcpy(x+i, block, xlen);\n            xlen = 0;\n        }\n    }\n    AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V);\n    DRBG_ctx.reseed_counter++;\n    \n    return RNG_SUCCESS;\n}\n\nvoid\nAES256_CTR_DRBG_Update(unsigned char *provided_data,\n                       unsigned char *Key,\n                       unsigned char *V)\n{\n    unsigned char   temp[48];\n    \n    for (int i=0; i<3; i++) {\n        //increment V\n        for (int j=15; j>=0; j--) {\n            if ( V[j] == 0xff )\n                V[j] = 0x00;\n            else {\n                V[j]++;\n                break;\n            }\n        }\n        \n        AES256_ECB(Key, V, temp+16*i);\n    }\n    if ( provided_data != NULL )\n        for (int i=0; i<48; i++)\n            temp[i] ^= provided_data[i];\n    memcpy(Key, temp, 32);\n    memcpy(V, temp+32, 16);\n}\n\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "ref/rng.h",
    "content": "//\n//  rng.h\n//\n//  Created by Bassham, Lawrence E (Fed) on 8/29/17.\n//  Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved.\n//\n\n#ifndef rng_h\n#define rng_h\n\n#include <stdio.h>\n\n#define RNG_SUCCESS      0\n#define RNG_BAD_MAXLEN  -1\n#define RNG_BAD_OUTBUF  -2\n#define RNG_BAD_REQ_LEN -3\n\ntypedef struct {\n    unsigned char   buffer[16];\n    unsigned long   buffer_pos;\n    unsigned long   length_remaining;\n    unsigned char   key[32];\n    unsigned char   ctr[16];\n} AES_XOF_struct;\n\ntypedef struct {\n    unsigned char   Key[32];\n    unsigned char   V[16];\n    int             reseed_counter;\n} AES256_CTR_DRBG_struct;\n\n\nvoid\nAES256_CTR_DRBG_Update(unsigned char *provided_data,\n                       unsigned char *Key,\n                       unsigned char *V);\n\nint\nseedexpander_init(AES_XOF_struct *ctx,\n                  unsigned char *seed,\n                  unsigned char *diversifier,\n                  unsigned long maxlen);\n\nint\nseedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen);\n\nvoid\nrandombytes_init(unsigned char *entropy_input,\n                 unsigned char *personalization_string);\n\nint\nrandombytes(unsigned char *x, unsigned long long xlen);\n\n#endif /* rng_h */\n"
  },
  {
    "path": "ref/sha2.c",
    "content": "/* Based on the public domain implementation in\n * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html\n * by D. J. Bernstein */\n\n#include <stddef.h>\n#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"sha2.h\"\n\nstatic uint32_t load_bigendian_32(const uint8_t *x) {\n    return (uint32_t)(x[3]) | (((uint32_t)(x[2])) << 8) |\n           (((uint32_t)(x[1])) << 16) | (((uint32_t)(x[0])) << 24);\n}\n\nstatic uint64_t load_bigendian_64(const uint8_t *x) {\n    return (uint64_t)(x[7]) | (((uint64_t)(x[6])) << 8) |\n           (((uint64_t)(x[5])) << 16) | (((uint64_t)(x[4])) << 24) |\n           (((uint64_t)(x[3])) << 32) | (((uint64_t)(x[2])) << 40) |\n           (((uint64_t)(x[1])) << 48) | (((uint64_t)(x[0])) << 56);\n}\n\nstatic void store_bigendian_32(uint8_t *x, uint64_t u) {\n    x[3] = (uint8_t) u;\n    u >>= 8;\n    x[2] = (uint8_t) u;\n    u >>= 8;\n    x[1] = (uint8_t) u;\n    u >>= 8;\n    x[0] = (uint8_t) u;\n}\n\nstatic void store_bigendian_64(uint8_t *x, uint64_t u) {\n    x[7] = (uint8_t) u;\n    u >>= 8;\n    x[6] = (uint8_t) u;\n    u >>= 8;\n    x[5] = (uint8_t) u;\n    u >>= 8;\n    x[4] = (uint8_t) u;\n    u >>= 8;\n    x[3] = (uint8_t) u;\n    u >>= 8;\n    x[2] = (uint8_t) u;\n    u >>= 8;\n    x[1] = (uint8_t) u;\n    u >>= 8;\n    x[0] = (uint8_t) u;\n}\n\n#define SHR(x, c) ((x) >> (c))\n#define ROTR_32(x, c) (((x) >> (c)) | ((x) << (32 - (c))))\n#define ROTR_64(x,c) (((x) >> (c)) | ((x) << (64 - (c))))\n\n#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))\n#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))\n\n#define Sigma0_32(x) (ROTR_32(x, 2) ^ ROTR_32(x,13) ^ ROTR_32(x,22))\n#define Sigma1_32(x) (ROTR_32(x, 6) ^ ROTR_32(x,11) ^ ROTR_32(x,25))\n#define sigma0_32(x) (ROTR_32(x, 7) ^ ROTR_32(x,18) ^ SHR(x, 3))\n#define sigma1_32(x) (ROTR_32(x,17) ^ ROTR_32(x,19) ^ SHR(x,10))\n\n#define Sigma0_64(x) (ROTR_64(x,28) ^ ROTR_64(x,34) ^ ROTR_64(x,39))\n#define Sigma1_64(x) (ROTR_64(x,14) ^ ROTR_64(x,18) ^ ROTR_64(x,41))\n#define sigma0_64(x) (ROTR_64(x, 1) ^ ROTR_64(x, 8) ^ SHR(x,7))\n#define sigma1_64(x) (ROTR_64(x,19) ^ ROTR_64(x,61) ^ SHR(x,6))\n\n#define M_32(w0, w14, w9, w1) w0 = sigma1_32(w14) + (w9) + sigma0_32(w1) + (w0);\n#define M_64(w0, w14, w9, w1) w0 = sigma1_64(w14) + (w9) + sigma0_64(w1) + (w0);\n\n#define EXPAND_32           \\\n    M_32(w0, w14, w9, w1)   \\\n    M_32(w1, w15, w10, w2)  \\\n    M_32(w2, w0, w11, w3)   \\\n    M_32(w3, w1, w12, w4)   \\\n    M_32(w4, w2, w13, w5)   \\\n    M_32(w5, w3, w14, w6)   \\\n    M_32(w6, w4, w15, w7)   \\\n    M_32(w7, w5, w0, w8)    \\\n    M_32(w8, w6, w1, w9)    \\\n    M_32(w9, w7, w2, w10)   \\\n    M_32(w10, w8, w3, w11)  \\\n    M_32(w11, w9, w4, w12)  \\\n    M_32(w12, w10, w5, w13) \\\n    M_32(w13, w11, w6, w14) \\\n    M_32(w14, w12, w7, w15) \\\n    M_32(w15, w13, w8, w0)\n\n#define EXPAND_64 \\\n  M_64(w0 ,w14,w9 ,w1 ) \\\n  M_64(w1 ,w15,w10,w2 ) \\\n  M_64(w2 ,w0 ,w11,w3 ) \\\n  M_64(w3 ,w1 ,w12,w4 ) \\\n  M_64(w4 ,w2 ,w13,w5 ) \\\n  M_64(w5 ,w3 ,w14,w6 ) \\\n  M_64(w6 ,w4 ,w15,w7 ) \\\n  M_64(w7 ,w5 ,w0 ,w8 ) \\\n  M_64(w8 ,w6 ,w1 ,w9 ) \\\n  M_64(w9 ,w7 ,w2 ,w10) \\\n  M_64(w10,w8 ,w3 ,w11) \\\n  M_64(w11,w9 ,w4 ,w12) \\\n  M_64(w12,w10,w5 ,w13) \\\n  M_64(w13,w11,w6 ,w14) \\\n  M_64(w14,w12,w7 ,w15) \\\n  M_64(w15,w13,w8 ,w0 )\n\n#define F_32(w, k)                                   \\\n    T1 = h + Sigma1_32(e) + Ch(e, f, g) + (k) + (w); \\\n    T2 = Sigma0_32(a) + Maj(a, b, c);                \\\n    h = g;                                           \\\n    g = f;                                           \\\n    f = e;                                           \\\n    e = d + T1;                                      \\\n    d = c;                                           \\\n    c = b;                                           \\\n    b = a;                                           \\\n    a = T1 + T2;\n\n#define F_64(w,k) \\\n    T1 = h + Sigma1_64(e) + Ch(e,f,g) + k + w; \\\n    T2 = Sigma0_64(a) + Maj(a,b,c); \\\n    h = g; \\\n    g = f; \\\n    f = e; \\\n    e = d + T1; \\\n    d = c; \\\n    c = b; \\\n    b = a; \\\n    a = T1 + T2;\n\nstatic size_t crypto_hashblocks_sha256(uint8_t *statebytes,\n                                       const uint8_t *in, size_t inlen) {\n    uint32_t state[8];\n    uint32_t a;\n    uint32_t b;\n    uint32_t c;\n    uint32_t d;\n    uint32_t e;\n    uint32_t f;\n    uint32_t g;\n    uint32_t h;\n    uint32_t T1;\n    uint32_t T2;\n\n    a = load_bigendian_32(statebytes + 0);\n    state[0] = a;\n    b = load_bigendian_32(statebytes + 4);\n    state[1] = b;\n    c = load_bigendian_32(statebytes + 8);\n    state[2] = c;\n    d = load_bigendian_32(statebytes + 12);\n    state[3] = d;\n    e = load_bigendian_32(statebytes + 16);\n    state[4] = e;\n    f = load_bigendian_32(statebytes + 20);\n    state[5] = f;\n    g = load_bigendian_32(statebytes + 24);\n    state[6] = g;\n    h = load_bigendian_32(statebytes + 28);\n    state[7] = h;\n\n    while (inlen >= 64) {\n        uint32_t w0  = load_bigendian_32(in + 0);\n        uint32_t w1  = load_bigendian_32(in + 4);\n        uint32_t w2  = load_bigendian_32(in + 8);\n        uint32_t w3  = load_bigendian_32(in + 12);\n        uint32_t w4  = load_bigendian_32(in + 16);\n        uint32_t w5  = load_bigendian_32(in + 20);\n        uint32_t w6  = load_bigendian_32(in + 24);\n        uint32_t w7  = load_bigendian_32(in + 28);\n        uint32_t w8  = load_bigendian_32(in + 32);\n        uint32_t w9  = load_bigendian_32(in + 36);\n        uint32_t w10 = load_bigendian_32(in + 40);\n        uint32_t w11 = load_bigendian_32(in + 44);\n        uint32_t w12 = load_bigendian_32(in + 48);\n        uint32_t w13 = load_bigendian_32(in + 52);\n        uint32_t w14 = load_bigendian_32(in + 56);\n        uint32_t w15 = load_bigendian_32(in + 60);\n\n        F_32(w0, 0x428a2f98)\n        F_32(w1, 0x71374491)\n        F_32(w2, 0xb5c0fbcf)\n        F_32(w3, 0xe9b5dba5)\n        F_32(w4, 0x3956c25b)\n        F_32(w5, 0x59f111f1)\n        F_32(w6, 0x923f82a4)\n        F_32(w7, 0xab1c5ed5)\n        F_32(w8, 0xd807aa98)\n        F_32(w9, 0x12835b01)\n        F_32(w10, 0x243185be)\n        F_32(w11, 0x550c7dc3)\n        F_32(w12, 0x72be5d74)\n        F_32(w13, 0x80deb1fe)\n        F_32(w14, 0x9bdc06a7)\n        F_32(w15, 0xc19bf174)\n\n        EXPAND_32\n\n        F_32(w0, 0xe49b69c1)\n        F_32(w1, 0xefbe4786)\n        F_32(w2, 0x0fc19dc6)\n        F_32(w3, 0x240ca1cc)\n        F_32(w4, 0x2de92c6f)\n        F_32(w5, 0x4a7484aa)\n        F_32(w6, 0x5cb0a9dc)\n        F_32(w7, 0x76f988da)\n        F_32(w8, 0x983e5152)\n        F_32(w9, 0xa831c66d)\n        F_32(w10, 0xb00327c8)\n        F_32(w11, 0xbf597fc7)\n        F_32(w12, 0xc6e00bf3)\n        F_32(w13, 0xd5a79147)\n        F_32(w14, 0x06ca6351)\n        F_32(w15, 0x14292967)\n\n        EXPAND_32\n\n        F_32(w0, 0x27b70a85)\n        F_32(w1, 0x2e1b2138)\n        F_32(w2, 0x4d2c6dfc)\n        F_32(w3, 0x53380d13)\n        F_32(w4, 0x650a7354)\n        F_32(w5, 0x766a0abb)\n        F_32(w6, 0x81c2c92e)\n        F_32(w7, 0x92722c85)\n        F_32(w8, 0xa2bfe8a1)\n        F_32(w9, 0xa81a664b)\n        F_32(w10, 0xc24b8b70)\n        F_32(w11, 0xc76c51a3)\n        F_32(w12, 0xd192e819)\n        F_32(w13, 0xd6990624)\n        F_32(w14, 0xf40e3585)\n        F_32(w15, 0x106aa070)\n\n        EXPAND_32\n\n        F_32(w0, 0x19a4c116)\n        F_32(w1, 0x1e376c08)\n        F_32(w2, 0x2748774c)\n        F_32(w3, 0x34b0bcb5)\n        F_32(w4, 0x391c0cb3)\n        F_32(w5, 0x4ed8aa4a)\n        F_32(w6, 0x5b9cca4f)\n        F_32(w7, 0x682e6ff3)\n        F_32(w8, 0x748f82ee)\n        F_32(w9, 0x78a5636f)\n        F_32(w10, 0x84c87814)\n        F_32(w11, 0x8cc70208)\n        F_32(w12, 0x90befffa)\n        F_32(w13, 0xa4506ceb)\n        F_32(w14, 0xbef9a3f7)\n        F_32(w15, 0xc67178f2)\n\n        a += state[0];\n        b += state[1];\n        c += state[2];\n        d += state[3];\n        e += state[4];\n        f += state[5];\n        g += state[6];\n        h += state[7];\n\n        state[0] = a;\n        state[1] = b;\n        state[2] = c;\n        state[3] = d;\n        state[4] = e;\n        state[5] = f;\n        state[6] = g;\n        state[7] = h;\n\n        in += 64;\n        inlen -= 64;\n    }\n\n    store_bigendian_32(statebytes + 0, state[0]);\n    store_bigendian_32(statebytes + 4, state[1]);\n    store_bigendian_32(statebytes + 8, state[2]);\n    store_bigendian_32(statebytes + 12, state[3]);\n    store_bigendian_32(statebytes + 16, state[4]);\n    store_bigendian_32(statebytes + 20, state[5]);\n    store_bigendian_32(statebytes + 24, state[6]);\n    store_bigendian_32(statebytes + 28, state[7]);\n\n    return inlen;\n}\n\nstatic int crypto_hashblocks_sha512(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)\n{\n  uint64_t state[8];\n  uint64_t a;\n  uint64_t b;\n  uint64_t c;\n  uint64_t d;\n  uint64_t e;\n  uint64_t f;\n  uint64_t g;\n  uint64_t h;\n  uint64_t T1;\n  uint64_t T2;\n\n  a = load_bigendian_64(statebytes +  0); state[0] = a;\n  b = load_bigendian_64(statebytes +  8); state[1] = b;\n  c = load_bigendian_64(statebytes + 16); state[2] = c;\n  d = load_bigendian_64(statebytes + 24); state[3] = d;\n  e = load_bigendian_64(statebytes + 32); state[4] = e;\n  f = load_bigendian_64(statebytes + 40); state[5] = f;\n  g = load_bigendian_64(statebytes + 48); state[6] = g;\n  h = load_bigendian_64(statebytes + 56); state[7] = h;\n\n  while (inlen >= 128) {\n    uint64_t w0  = load_bigendian_64(in +   0);\n    uint64_t w1  = load_bigendian_64(in +   8);\n    uint64_t w2  = load_bigendian_64(in +  16);\n    uint64_t w3  = load_bigendian_64(in +  24);\n    uint64_t w4  = load_bigendian_64(in +  32);\n    uint64_t w5  = load_bigendian_64(in +  40);\n    uint64_t w6  = load_bigendian_64(in +  48);\n    uint64_t w7  = load_bigendian_64(in +  56);\n    uint64_t w8  = load_bigendian_64(in +  64);\n    uint64_t w9  = load_bigendian_64(in +  72);\n    uint64_t w10 = load_bigendian_64(in +  80);\n    uint64_t w11 = load_bigendian_64(in +  88);\n    uint64_t w12 = load_bigendian_64(in +  96);\n    uint64_t w13 = load_bigendian_64(in + 104);\n    uint64_t w14 = load_bigendian_64(in + 112);\n    uint64_t w15 = load_bigendian_64(in + 120);\n\n    F_64(w0 ,0x428a2f98d728ae22ULL)\n    F_64(w1 ,0x7137449123ef65cdULL)\n    F_64(w2 ,0xb5c0fbcfec4d3b2fULL)\n    F_64(w3 ,0xe9b5dba58189dbbcULL)\n    F_64(w4 ,0x3956c25bf348b538ULL)\n    F_64(w5 ,0x59f111f1b605d019ULL)\n    F_64(w6 ,0x923f82a4af194f9bULL)\n    F_64(w7 ,0xab1c5ed5da6d8118ULL)\n    F_64(w8 ,0xd807aa98a3030242ULL)\n    F_64(w9 ,0x12835b0145706fbeULL)\n    F_64(w10,0x243185be4ee4b28cULL)\n    F_64(w11,0x550c7dc3d5ffb4e2ULL)\n    F_64(w12,0x72be5d74f27b896fULL)\n    F_64(w13,0x80deb1fe3b1696b1ULL)\n    F_64(w14,0x9bdc06a725c71235ULL)\n    F_64(w15,0xc19bf174cf692694ULL)\n\n    EXPAND_64\n\n    F_64(w0 ,0xe49b69c19ef14ad2ULL)\n    F_64(w1 ,0xefbe4786384f25e3ULL)\n    F_64(w2 ,0x0fc19dc68b8cd5b5ULL)\n    F_64(w3 ,0x240ca1cc77ac9c65ULL)\n    F_64(w4 ,0x2de92c6f592b0275ULL)\n    F_64(w5 ,0x4a7484aa6ea6e483ULL)\n    F_64(w6 ,0x5cb0a9dcbd41fbd4ULL)\n    F_64(w7 ,0x76f988da831153b5ULL)\n    F_64(w8 ,0x983e5152ee66dfabULL)\n    F_64(w9 ,0xa831c66d2db43210ULL)\n    F_64(w10,0xb00327c898fb213fULL)\n    F_64(w11,0xbf597fc7beef0ee4ULL)\n    F_64(w12,0xc6e00bf33da88fc2ULL)\n    F_64(w13,0xd5a79147930aa725ULL)\n    F_64(w14,0x06ca6351e003826fULL)\n    F_64(w15,0x142929670a0e6e70ULL)\n\n    EXPAND_64\n\n    F_64(w0 ,0x27b70a8546d22ffcULL)\n    F_64(w1 ,0x2e1b21385c26c926ULL)\n    F_64(w2 ,0x4d2c6dfc5ac42aedULL)\n    F_64(w3 ,0x53380d139d95b3dfULL)\n    F_64(w4 ,0x650a73548baf63deULL)\n    F_64(w5 ,0x766a0abb3c77b2a8ULL)\n    F_64(w6 ,0x81c2c92e47edaee6ULL)\n    F_64(w7 ,0x92722c851482353bULL)\n    F_64(w8 ,0xa2bfe8a14cf10364ULL)\n    F_64(w9 ,0xa81a664bbc423001ULL)\n    F_64(w10,0xc24b8b70d0f89791ULL)\n    F_64(w11,0xc76c51a30654be30ULL)\n    F_64(w12,0xd192e819d6ef5218ULL)\n    F_64(w13,0xd69906245565a910ULL)\n    F_64(w14,0xf40e35855771202aULL)\n    F_64(w15,0x106aa07032bbd1b8ULL)\n\n    EXPAND_64\n\n    F_64(w0 ,0x19a4c116b8d2d0c8ULL)\n    F_64(w1 ,0x1e376c085141ab53ULL)\n    F_64(w2 ,0x2748774cdf8eeb99ULL)\n    F_64(w3 ,0x34b0bcb5e19b48a8ULL)\n    F_64(w4 ,0x391c0cb3c5c95a63ULL)\n    F_64(w5 ,0x4ed8aa4ae3418acbULL)\n    F_64(w6 ,0x5b9cca4f7763e373ULL)\n    F_64(w7 ,0x682e6ff3d6b2b8a3ULL)\n    F_64(w8 ,0x748f82ee5defb2fcULL)\n    F_64(w9 ,0x78a5636f43172f60ULL)\n    F_64(w10,0x84c87814a1f0ab72ULL)\n    F_64(w11,0x8cc702081a6439ecULL)\n    F_64(w12,0x90befffa23631e28ULL)\n    F_64(w13,0xa4506cebde82bde9ULL)\n    F_64(w14,0xbef9a3f7b2c67915ULL)\n    F_64(w15,0xc67178f2e372532bULL)\n\n    EXPAND_64\n\n    F_64(w0 ,0xca273eceea26619cULL)\n    F_64(w1 ,0xd186b8c721c0c207ULL)\n    F_64(w2 ,0xeada7dd6cde0eb1eULL)\n    F_64(w3 ,0xf57d4f7fee6ed178ULL)\n    F_64(w4 ,0x06f067aa72176fbaULL)\n    F_64(w5 ,0x0a637dc5a2c898a6ULL)\n    F_64(w6 ,0x113f9804bef90daeULL)\n    F_64(w7 ,0x1b710b35131c471bULL)\n    F_64(w8 ,0x28db77f523047d84ULL)\n    F_64(w9 ,0x32caab7b40c72493ULL)\n    F_64(w10,0x3c9ebe0a15c9bebcULL)\n    F_64(w11,0x431d67c49c100d4cULL)\n    F_64(w12,0x4cc5d4becb3e42b6ULL)\n    F_64(w13,0x597f299cfc657e2aULL)\n    F_64(w14,0x5fcb6fab3ad6faecULL)\n    F_64(w15,0x6c44198c4a475817ULL)\n\n    a += state[0];\n    b += state[1];\n    c += state[2];\n    d += state[3];\n    e += state[4];\n    f += state[5];\n    g += state[6];\n    h += state[7];\n  \n    state[0] = a;\n    state[1] = b;\n    state[2] = c;\n    state[3] = d;\n    state[4] = e;\n    state[5] = f;\n    state[6] = g;\n    state[7] = h;\n\n    in += 128;\n    inlen -= 128;\n  }\n\n  store_bigendian_64(statebytes +  0,state[0]);\n  store_bigendian_64(statebytes +  8,state[1]);\n  store_bigendian_64(statebytes + 16,state[2]);\n  store_bigendian_64(statebytes + 24,state[3]);\n  store_bigendian_64(statebytes + 32,state[4]);\n  store_bigendian_64(statebytes + 40,state[5]);\n  store_bigendian_64(statebytes + 48,state[6]);\n  store_bigendian_64(statebytes + 56,state[7]);\n\n  return inlen;\n}\n\n\nstatic const uint8_t iv_256[32] = {\n    0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85,\n    0x3c, 0x6e, 0xf3, 0x72, 0xa5, 0x4f, 0xf5, 0x3a,\n    0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, 0x68, 0x8c,\n    0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19\n};\n\nstatic const uint8_t iv_512[64] = {\n    0x6a, 0x09, 0xe6, 0x67, 0xf3, 0xbc, 0xc9, 0x08, 0xbb, 0x67, 0xae,\n    0x85, 0x84, 0xca, 0xa7, 0x3b, 0x3c, 0x6e, 0xf3, 0x72, 0xfe, 0x94,\n    0xf8, 0x2b, 0xa5, 0x4f, 0xf5, 0x3a, 0x5f, 0x1d, 0x36, 0xf1, 0x51,\n    0x0e, 0x52, 0x7f, 0xad, 0xe6, 0x82, 0xd1, 0x9b, 0x05, 0x68, 0x8c,\n    0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd,\n    0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, 0x7e, 0x21, 0x79\n};\n\nvoid sha256_inc_init(uint8_t *state) {\n    for (size_t i = 0; i < 32; ++i) {\n        state[i] = iv_256[i];\n    }\n    for (size_t i = 32; i < 40; ++i) {\n        state[i] = 0;\n    }\n}\n\nvoid sha512_inc_init(uint8_t *state) {\n    for (size_t i = 0; i < 64; ++i) {\n        state[i] = iv_512[i];\n    }\n    for (size_t i = 64; i < 72; ++i) {\n        state[i] = 0;\n    }\n}\n\nvoid sha256_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks) {\n    uint64_t bytes = load_bigendian_64(state + 32);\n\n    crypto_hashblocks_sha256(state, in, 64 * inblocks);\n    bytes += 64 * inblocks;\n\n    store_bigendian_64(state + 32, bytes);\n}\n\nvoid sha512_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks) {\n    uint64_t bytes = load_bigendian_64(state + 64);\n\n    crypto_hashblocks_sha512(state, in, 128 * inblocks);\n    bytes += 128 * inblocks;\n\n    store_bigendian_64(state + 64, bytes);\n}\n\nvoid sha256_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen) {\n    uint8_t padded[128];\n    uint64_t bytes = load_bigendian_64(state + 32) + inlen;\n\n    crypto_hashblocks_sha256(state, in, inlen);\n    in += inlen;\n    inlen &= 63;\n    in -= inlen;\n\n    for (size_t i = 0; i < inlen; ++i) {\n        padded[i] = in[i];\n    }\n    padded[inlen] = 0x80;\n\n    if (inlen < 56) {\n        for (size_t i = inlen + 1; i < 56; ++i) {\n            padded[i] = 0;\n        }\n        padded[56] = (uint8_t) (bytes >> 53);\n        padded[57] = (uint8_t) (bytes >> 45);\n        padded[58] = (uint8_t) (bytes >> 37);\n        padded[59] = (uint8_t) (bytes >> 29);\n        padded[60] = (uint8_t) (bytes >> 21);\n        padded[61] = (uint8_t) (bytes >> 13);\n        padded[62] = (uint8_t) (bytes >> 5);\n        padded[63] = (uint8_t) (bytes << 3);\n        crypto_hashblocks_sha256(state, padded, 64);\n    } else {\n        for (size_t i = inlen + 1; i < 120; ++i) {\n            padded[i] = 0;\n        }\n        padded[120] = (uint8_t) (bytes >> 53);\n        padded[121] = (uint8_t) (bytes >> 45);\n        padded[122] = (uint8_t) (bytes >> 37);\n        padded[123] = (uint8_t) (bytes >> 29);\n        padded[124] = (uint8_t) (bytes >> 21);\n        padded[125] = (uint8_t) (bytes >> 13);\n        padded[126] = (uint8_t) (bytes >> 5);\n        padded[127] = (uint8_t) (bytes << 3);\n        crypto_hashblocks_sha256(state, padded, 128);\n    }\n\n    for (size_t i = 0; i < 32; ++i) {\n        out[i] = state[i];\n    }\n\n}\n\nvoid sha512_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen) {\n    uint8_t padded[256];\n    uint64_t bytes = load_bigendian_64(state + 64) + inlen;\n\n    crypto_hashblocks_sha512(state, in, inlen);\n    in += inlen;\n    inlen &= 127;\n    in -= inlen;\n\n    for (size_t i = 0; i < inlen; ++i) {\n        padded[i] = in[i];\n    }\n    padded[inlen] = 0x80;\n\n    if (inlen < 112) {\n        for (size_t i = inlen + 1; i < 119; ++i) {\n            padded[i] = 0;\n        }\n        padded[119] = (uint8_t) (bytes >> 61);\n        padded[120] = (uint8_t) (bytes >> 53);\n        padded[121] = (uint8_t) (bytes >> 45);\n        padded[122] = (uint8_t) (bytes >> 37);\n        padded[123] = (uint8_t) (bytes >> 29);\n        padded[124] = (uint8_t) (bytes >> 21);\n        padded[125] = (uint8_t) (bytes >> 13);\n        padded[126] = (uint8_t) (bytes >> 5);\n        padded[127] = (uint8_t) (bytes << 3);\n        crypto_hashblocks_sha512(state, padded, 128);\n    } else {\n        for (size_t i = inlen + 1; i < 247; ++i) {\n            padded[i] = 0;\n        }\n        padded[247] = (uint8_t) (bytes >> 61);\n        padded[248] = (uint8_t) (bytes >> 53);\n        padded[249] = (uint8_t) (bytes >> 45);\n        padded[250] = (uint8_t) (bytes >> 37);\n        padded[251] = (uint8_t) (bytes >> 29);\n        padded[252] = (uint8_t) (bytes >> 21);\n        padded[253] = (uint8_t) (bytes >> 13);\n        padded[254] = (uint8_t) (bytes >> 5);\n        padded[255] = (uint8_t) (bytes << 3);\n        crypto_hashblocks_sha512(state, padded, 256);\n    }\n\n    for (size_t i = 0; i < 64; ++i) {\n        out[i] = state[i];\n    }\n}\n\nvoid sha256(uint8_t *out, const uint8_t *in, size_t inlen) {\n    uint8_t state[40];\n\n    sha256_inc_init(state);\n    sha256_inc_finalize(out, state, in, inlen);\n}\n\nvoid sha512(uint8_t *out, const uint8_t *in, size_t inlen) {\n    uint8_t state[72];\n\n    sha512_inc_init(state);\n    sha512_inc_finalize(out, state, in, inlen);\n}\n\n/**\n * mgf1 function based on the SHA-256 hash function\n * Note that inlen should be sufficiently small that it still allows for\n * an array to be allocated on the stack. Typically 'in' is merely a seed.\n * Outputs outlen number of bytes\n */\nvoid mgf1_256(unsigned char *out, unsigned long outlen,\n          const unsigned char *in, unsigned long inlen)\n{\n    SPX_VLA(uint8_t, inbuf, inlen+4);\n    unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES];\n    unsigned long i;\n\n    memcpy(inbuf, in, inlen);\n\n    /* While we can fit in at least another full block of SHA256 output.. */\n    for (i = 0; (i+1)*SPX_SHA256_OUTPUT_BYTES <= outlen; i++) {\n        u32_to_bytes(inbuf + inlen, i);\n        sha256(out, inbuf, inlen + 4);\n        out += SPX_SHA256_OUTPUT_BYTES;\n    }\n    /* Until we cannot anymore, and we fill the remainder. */\n    if (outlen > i*SPX_SHA256_OUTPUT_BYTES) {\n        u32_to_bytes(inbuf + inlen, i);\n        sha256(outbuf, inbuf, inlen + 4);\n        memcpy(out, outbuf, outlen - i*SPX_SHA256_OUTPUT_BYTES);\n    }\n}\n\n/*\n * mgf1 function based on the SHA-512 hash function\n */\nvoid mgf1_512(unsigned char *out, unsigned long outlen,\n          const unsigned char *in, unsigned long inlen)\n{\n    SPX_VLA(uint8_t, inbuf, inlen+4);\n    unsigned char outbuf[SPX_SHA512_OUTPUT_BYTES];\n    unsigned long i;\n\n    memcpy(inbuf, in, inlen);\n\n    /* While we can fit in at least another full block of SHA512 output.. */\n    for (i = 0; (i+1)*SPX_SHA512_OUTPUT_BYTES <= outlen; i++) {\n        u32_to_bytes(inbuf + inlen, i);\n        sha512(out, inbuf, inlen + 4);\n        out += SPX_SHA512_OUTPUT_BYTES;\n    }\n    /* Until we cannot anymore, and we fill the remainder. */\n    if (outlen > i*SPX_SHA512_OUTPUT_BYTES) {\n        u32_to_bytes(inbuf + inlen, i);\n        sha512(outbuf, inbuf, inlen + 4);\n        memcpy(out, outbuf, outlen - i*SPX_SHA512_OUTPUT_BYTES);\n    }\n}\n\n\n/**\n * Absorb the constant pub_seed using one round of the compression function\n * This initializes state_seeded and state_seeded_512, which can then be\n * reused in thash\n **/\nvoid seed_state(spx_ctx *ctx) {\n    uint8_t block[SPX_SHA512_BLOCK_BYTES];\n    size_t i;\n\n    for (i = 0; i < SPX_N; ++i) {\n        block[i] = ctx->pub_seed[i];\n    }\n    for (i = SPX_N; i < SPX_SHA512_BLOCK_BYTES; ++i) {\n        block[i] = 0;\n    }\n    /* block has been properly initialized for both SHA-256 and SHA-512 */\n\n    sha256_inc_init(ctx->state_seeded);\n    sha256_inc_blocks(ctx->state_seeded, block, 1);\n#if SPX_SHA512\n    sha512_inc_init(ctx->state_seeded_512);\n    sha512_inc_blocks(ctx->state_seeded_512, block, 1);\n#endif\n}\n"
  },
  {
    "path": "ref/sha2.h",
    "content": "#ifndef SPX_SHA2_H\n#define SPX_SHA2_H\n\n#include \"params.h\"\n\n#define SPX_SHA256_BLOCK_BYTES 64\n#define SPX_SHA256_OUTPUT_BYTES 32  /* This does not necessarily equal SPX_N */\n\n#define SPX_SHA512_BLOCK_BYTES 128\n#define SPX_SHA512_OUTPUT_BYTES 64\n\n#if SPX_SHA256_OUTPUT_BYTES < SPX_N\n    #error Linking against SHA-256 with N larger than 32 bytes is not supported\n#endif\n\n#define SPX_SHA256_ADDR_BYTES 22\n\n#include <stddef.h>\n#include <stdint.h>\n\nvoid sha256_inc_init(uint8_t *state);\nvoid sha256_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks);\nvoid sha256_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen);\nvoid sha256(uint8_t *out, const uint8_t *in, size_t inlen);\n\nvoid sha512_inc_init(uint8_t *state);\nvoid sha512_inc_blocks(uint8_t *state, const uint8_t *in, size_t inblocks);\nvoid sha512_inc_finalize(uint8_t *out, uint8_t *state, const uint8_t *in, size_t inlen);\nvoid sha512(uint8_t *out, const uint8_t *in, size_t inlen);\n\n#define mgf1_256 SPX_NAMESPACE(mgf1_256)\nvoid mgf1_256(unsigned char *out, unsigned long outlen,\n          const unsigned char *in, unsigned long inlen);\n\n#define mgf1_512 SPX_NAMESPACE(mgf1_512)\nvoid mgf1_512(unsigned char *out, unsigned long outlen,\n          const unsigned char *in, unsigned long inlen);\n\n#define seed_state SPX_NAMESPACE(seed_state)\nvoid seed_state(spx_ctx *ctx);\n\n\n#endif\n"
  },
  {
    "path": "ref/sha2_offsets.h",
    "content": "#ifndef SHA2_OFFSETS_H_\n#define SHA2_OFFSETS_H_\n\n/*\n * Offsets of various fields in the address structure when we use SHA2 as\n * the Sphincs+ hash function\n */\n\n#define SPX_OFFSET_LAYER     0   /* The byte used to specify the Merkle tree layer */\n#define SPX_OFFSET_TREE      1   /* The start of the 8 byte field used to specify the tree */\n#define SPX_OFFSET_TYPE      9   /* The byte used to specify the hash type (reason) */\n#define SPX_OFFSET_KP_ADDR   10  /* The start of the 4 byte field used to specify the key pair address */ \n#define SPX_OFFSET_CHAIN_ADDR 17  /* The byte used to specify the chain address (which Winternitz chain) */\n#define SPX_OFFSET_HASH_ADDR 21  /* The byte used to specify the hash address (where in the Winternitz chain) */\n#define SPX_OFFSET_TREE_HGT  17  /* The byte used to specify the height of this node in the FORS or Merkle tree */\n#define SPX_OFFSET_TREE_INDEX 18 /* The start of the 4 byte field used to specify the node in the FORS or Merkle tree */\n\n#define SPX_SHA2 1\n\n#endif /* SHA2_OFFSETS_H_ */\n"
  },
  {
    "path": "ref/shake_offsets.h",
    "content": "#if !defined( SHAKE_OFFSETS_H_ )\n#define SHAKE_OFFSETS_H_\n\n/*\n * Offsets of various fields in the address structure when we use SHAKE as\n * the Sphincs+ hash function\n */\n\n#define SPX_OFFSET_LAYER     3   /* The byte used to specify the Merkle tree layer */\n#define SPX_OFFSET_TREE      8   /* The start of the 8 byte field used to specify the tree */\n#define SPX_OFFSET_TYPE      19  /* The byte used to specify the hash type (reason) */\n#define SPX_OFFSET_KP_ADDR   20  /* The start of the 4 byte field used to specify the key pair address */ \n#define SPX_OFFSET_CHAIN_ADDR 27  /* The byte used to specify the chain address (which Winternitz chain) */\n#define SPX_OFFSET_HASH_ADDR 31  /* The byte used to specify the hash address (where in the Winternitz chain) */\n#define SPX_OFFSET_TREE_HGT  27  /* The byte used to specify the height of this node in the FORS or Merkle tree */\n#define SPX_OFFSET_TREE_INDEX 28 /* The start of the 4 byte field used to specify the node in the FORS or Merkle tree */\n\n#define SPX_SHAKE 1\n\n#endif /* SHAKE_OFFSETS_H_ */\n"
  },
  {
    "path": "ref/sign.c",
    "content": "#include <stddef.h>\n#include <string.h>\n#include <stdint.h>\n\n#include \"api.h\"\n#include \"params.h\"\n#include \"wots.h\"\n#include \"fors.h\"\n#include \"hash.h\"\n#include \"thash.h\"\n#include \"address.h\"\n#include \"randombytes.h\"\n#include \"utils.h\"\n#include \"merkle.h\"\n\n/*\n * Returns the length of a secret key, in bytes\n */\nunsigned long long crypto_sign_secretkeybytes(void)\n{\n    return CRYPTO_SECRETKEYBYTES;\n}\n\n/*\n * Returns the length of a public key, in bytes\n */\nunsigned long long crypto_sign_publickeybytes(void)\n{\n    return CRYPTO_PUBLICKEYBYTES;\n}\n\n/*\n * Returns the length of a signature, in bytes\n */\nunsigned long long crypto_sign_bytes(void)\n{\n    return CRYPTO_BYTES;\n}\n\n/*\n * Returns the length of the seed required to generate a key pair, in bytes\n */\nunsigned long long crypto_sign_seedbytes(void)\n{\n    return CRYPTO_SEEDBYTES;\n}\n\n/*\n * Generates an SPX key pair given a seed of length\n * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root]\n * Format pk: [PUB_SEED || root]\n */\nint crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk,\n                             const unsigned char *seed)\n{\n    spx_ctx ctx;\n\n    /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */\n    memcpy(sk, seed, CRYPTO_SEEDBYTES);\n\n    memcpy(pk, sk + 2*SPX_N, SPX_N);\n\n    memcpy(ctx.pub_seed, pk, SPX_N);\n    memcpy(ctx.sk_seed, sk, SPX_N);\n\n    /* This hook allows the hash function instantiation to do whatever\n       preparation or computation it needs, based on the public seed. */\n    initialize_hash_function(&ctx);\n\n    /* Compute root node of the top-most subtree. */\n    merkle_gen_root(sk + 3*SPX_N, &ctx);\n\n    memcpy(pk + SPX_N, sk + 3*SPX_N, SPX_N);\n\n    return 0;\n}\n\n/*\n * Generates an SPX key pair.\n * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root]\n * Format pk: [PUB_SEED || root]\n */\nint crypto_sign_keypair(unsigned char *pk, unsigned char *sk)\n{\n  unsigned char seed[CRYPTO_SEEDBYTES];\n  randombytes(seed, CRYPTO_SEEDBYTES);\n  crypto_sign_seed_keypair(pk, sk, seed);\n\n  return 0;\n}\n\n/**\n * Returns an array containing a detached signature.\n */\nint crypto_sign_signature(uint8_t *sig, size_t *siglen,\n                          const uint8_t *m, size_t mlen, const uint8_t *sk)\n{\n    spx_ctx ctx;\n\n    const unsigned char *sk_prf = sk + SPX_N;\n    const unsigned char *pk = sk + 2*SPX_N;\n\n    unsigned char optrand[SPX_N];\n    unsigned char mhash[SPX_FORS_MSG_BYTES];\n    unsigned char root[SPX_N];\n    uint32_t i;\n    uint64_t tree;\n    uint32_t idx_leaf;\n    uint32_t wots_addr[8] = {0};\n    uint32_t tree_addr[8] = {0};\n\n    memcpy(ctx.sk_seed, sk, SPX_N);\n    memcpy(ctx.pub_seed, pk, SPX_N);\n\n    /* This hook allows the hash function instantiation to do whatever\n       preparation or computation it needs, based on the public seed. */\n    initialize_hash_function(&ctx);\n\n    set_type(wots_addr, SPX_ADDR_TYPE_WOTS);\n    set_type(tree_addr, SPX_ADDR_TYPE_HASHTREE);\n\n    /* Optionally, signing can be made non-deterministic using optrand.\n       This can help counter side-channel attacks that would benefit from\n       getting a large number of traces when the signer uses the same nodes. */\n    randombytes(optrand, SPX_N);\n    /* Compute the digest randomization value. */\n    gen_message_random(sig, sk_prf, optrand, m, mlen, &ctx);\n\n    /* Derive the message digest and leaf index from R, PK and M. */\n    hash_message(mhash, &tree, &idx_leaf, sig, pk, m, mlen, &ctx);\n    sig += SPX_N;\n\n    set_tree_addr(wots_addr, tree);\n    set_keypair_addr(wots_addr, idx_leaf);\n\n    /* Sign the message hash using FORS. */\n    fors_sign(sig, root, mhash, &ctx, wots_addr);\n    sig += SPX_FORS_BYTES;\n\n    for (i = 0; i < SPX_D; i++) {\n        set_layer_addr(tree_addr, i);\n        set_tree_addr(tree_addr, tree);\n\n        copy_subtree_addr(wots_addr, tree_addr);\n        set_keypair_addr(wots_addr, idx_leaf);\n\n        merkle_sign(sig, root, &ctx, wots_addr, tree_addr, idx_leaf);\n        sig += SPX_WOTS_BYTES + SPX_TREE_HEIGHT * SPX_N;\n\n        /* Update the indices for the next layer. */\n        idx_leaf = (tree & ((1 << SPX_TREE_HEIGHT)-1));\n        tree = tree >> SPX_TREE_HEIGHT;\n    }\n\n    *siglen = SPX_BYTES;\n\n    return 0;\n}\n\n/**\n * Verifies a detached signature and message under a given public key.\n */\nint crypto_sign_verify(const uint8_t *sig, size_t siglen,\n                       const uint8_t *m, size_t mlen, const uint8_t *pk)\n{\n    spx_ctx ctx;\n    const unsigned char *pub_root = pk + SPX_N;\n    unsigned char mhash[SPX_FORS_MSG_BYTES];\n    unsigned char wots_pk[SPX_WOTS_BYTES];\n    unsigned char root[SPX_N];\n    unsigned char leaf[SPX_N];\n    unsigned int i;\n    uint64_t tree;\n    uint32_t idx_leaf;\n    uint32_t wots_addr[8] = {0};\n    uint32_t tree_addr[8] = {0};\n    uint32_t wots_pk_addr[8] = {0};\n\n    if (siglen != SPX_BYTES) {\n        return -1;\n    }\n\n    memcpy(ctx.pub_seed, pk, SPX_N);\n\n    /* This hook allows the hash function instantiation to do whatever\n       preparation or computation it needs, based on the public seed. */\n    initialize_hash_function(&ctx);\n\n    set_type(wots_addr, SPX_ADDR_TYPE_WOTS);\n    set_type(tree_addr, SPX_ADDR_TYPE_HASHTREE);\n    set_type(wots_pk_addr, SPX_ADDR_TYPE_WOTSPK);\n\n    /* Derive the message digest and leaf index from R || PK || M. */\n    /* The additional SPX_N is a result of the hash domain separator. */\n    hash_message(mhash, &tree, &idx_leaf, sig, pk, m, mlen, &ctx);\n    sig += SPX_N;\n\n    /* Layer correctly defaults to 0, so no need to set_layer_addr */\n    set_tree_addr(wots_addr, tree);\n    set_keypair_addr(wots_addr, idx_leaf);\n\n    fors_pk_from_sig(root, sig, mhash, &ctx, wots_addr);\n    sig += SPX_FORS_BYTES;\n\n    /* For each subtree.. */\n    for (i = 0; i < SPX_D; i++) {\n        set_layer_addr(tree_addr, i);\n        set_tree_addr(tree_addr, tree);\n\n        copy_subtree_addr(wots_addr, tree_addr);\n        set_keypair_addr(wots_addr, idx_leaf);\n\n        copy_keypair_addr(wots_pk_addr, wots_addr);\n\n        /* The WOTS public key is only correct if the signature was correct. */\n        /* Initially, root is the FORS pk, but on subsequent iterations it is\n           the root of the subtree below the currently processed subtree. */\n        wots_pk_from_sig(wots_pk, sig, root, &ctx, wots_addr);\n        sig += SPX_WOTS_BYTES;\n\n        /* Compute the leaf node using the WOTS public key. */\n        thash(leaf, wots_pk, SPX_WOTS_LEN, &ctx, wots_pk_addr);\n\n        /* Compute the root node of this subtree. */\n        compute_root(root, leaf, idx_leaf, 0, sig, SPX_TREE_HEIGHT,\n                     &ctx, tree_addr);\n        sig += SPX_TREE_HEIGHT * SPX_N;\n\n        /* Update the indices for the next layer. */\n        idx_leaf = (tree & ((1 << SPX_TREE_HEIGHT)-1));\n        tree = tree >> SPX_TREE_HEIGHT;\n    }\n\n    /* Check if the root node equals the root node in the public key. */\n    if (memcmp(root, pub_root, SPX_N)) {\n        return -1;\n    }\n\n    return 0;\n}\n\n\n/**\n * Returns an array containing the signature followed by the message.\n */\nint crypto_sign(unsigned char *sm, unsigned long long *smlen,\n                const unsigned char *m, unsigned long long mlen,\n                const unsigned char *sk)\n{\n    size_t siglen;\n\n    crypto_sign_signature(sm, &siglen, m, (size_t)mlen, sk);\n\n    memmove(sm + SPX_BYTES, m, mlen);\n    *smlen = siglen + mlen;\n\n    return 0;\n}\n\n/**\n * Verifies a given signature-message pair under a given public key.\n */\nint crypto_sign_open(unsigned char *m, unsigned long long *mlen,\n                     const unsigned char *sm, unsigned long long smlen,\n                     const unsigned char *pk)\n{\n    /* The API caller does not necessarily know what size a signature should be\n       but SPHINCS+ signatures are always exactly SPX_BYTES. */\n    if (smlen < SPX_BYTES) {\n        memset(m, 0, smlen);\n        *mlen = 0;\n        return -1;\n    }\n\n    *mlen = smlen - SPX_BYTES;\n\n    if (crypto_sign_verify(sm, SPX_BYTES, sm + SPX_BYTES, (size_t)*mlen, pk)) {\n        memset(m, 0, smlen);\n        *mlen = 0;\n        return -1;\n    }\n\n    /* If verification was successful, move the message to the right place. */\n    memmove(m, sm + SPX_BYTES, *mlen);\n\n    return 0;\n}\n"
  },
  {
    "path": "ref/test/benchmark.c",
    "content": "#define _POSIX_C_SOURCE 199309L\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n\n#include \"../thash.h\"\n#include \"../api.h\"\n#include \"../fors.h\"\n#include \"../wotsx1.h\"\n#include \"../params.h\"\n#include \"../randombytes.h\"\n#include \"cycles.h\"\n\n#define SPX_MLEN 32\n#define NTESTS 10\n\nstatic void wots_gen_pkx1(unsigned char *pk, const spx_ctx* ctx,\n                uint32_t addr[8]);\n\nstatic int cmp_llu(const void *a, const void*b)\n{\n  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;\n  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;\n  return 0;\n}\n\nstatic unsigned long long median(unsigned long long *l, size_t llen)\n{\n  qsort(l,llen,sizeof(unsigned long long),cmp_llu);\n\n  if(llen%2) return l[llen/2];\n  else return (l[llen/2-1]+l[llen/2])/2;\n}\n\nstatic void delta(unsigned long long *l, size_t llen)\n{\n    unsigned int i;\n    for(i = 0; i < llen - 1; i++) {\n        l[i] = l[i+1] - l[i];\n    }\n}\n\n\nstatic void printfcomma (unsigned long long n)\n{\n    if (n < 1000) {\n        printf(\"%llu\", n);\n        return;\n    }\n    printfcomma(n / 1000);\n    printf (\",%03llu\", n % 1000);\n}\n\nstatic void printfalignedcomma (unsigned long long n, int len)\n{\n    unsigned long long ncopy = n;\n    int i = 0;\n\n    while (ncopy > 9) {\n        len -= 1;\n        ncopy /= 10;\n        i += 1;  // to account for commas\n    }\n    i = i/3 - 1;  // to account for commas\n    for (; i < len; i++) {\n        printf(\" \");\n    }\n    printfcomma(n);\n}\n\nstatic void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul)\n{\n    unsigned long long med;\n\n    result /= NTESTS;\n    delta(l, NTESTS + 1);\n    med = median(l, llen);\n    printf(\"avg. %11.2lf us (%2.2lf sec); median \", result, result / 1e6);\n    printfalignedcomma(med, 12);\n    printf(\" cycles,  %5llux: \", mul);\n    printfalignedcomma(mul*med, 12);\n    printf(\" cycles\\n\");\n}\n\n#define MEASURE_GENERIC(TEXT, MUL, FNCALL, CORR)\\\n    printf(TEXT);\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\\\n    for(i = 0; i < NTESTS; i++) {\\\n        t[i] = cpucycles() / CORR;\\\n        FNCALL;\\\n    }\\\n    t[NTESTS] = cpucycles();\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\\\n    result = ((stop.tv_sec - start.tv_sec) * 1e6 + \\\n        (stop.tv_nsec - start.tv_nsec) / 1e3) / (double)CORR;\\\n    display_result(result, t, NTESTS, MUL);\n#define MEASURT(TEXT, MUL, FNCALL)\\\n    MEASURE_GENERIC(\\\n        TEXT, MUL,\\\n        do {\\\n          for (int j = 0; j < 1000; j++) {\\\n            FNCALL;\\\n          }\\\n        } while (0);,\\\n    1000);\n#define MEASURE(TEXT, MUL, FNCALL) MEASURE_GENERIC(TEXT, MUL, FNCALL, 1)\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n    init_cpucycles();\n\n    spx_ctx ctx;\n\n    unsigned char pk[SPX_PK_BYTES];\n    unsigned char sk[SPX_SK_BYTES];\n    unsigned char *m = malloc(SPX_MLEN);\n    unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN);\n\n    unsigned char fors_pk[SPX_FORS_PK_BYTES];\n    unsigned char fors_m[SPX_FORS_MSG_BYTES];\n    unsigned char fors_sig[SPX_FORS_BYTES];\n    unsigned char addr[SPX_ADDR_BYTES];\n    unsigned char block[SPX_N];\n\n    unsigned char wots_pk[SPX_WOTS_PK_BYTES];\n\n    unsigned long long smlen;\n    unsigned long long mlen;\n    unsigned long long t[NTESTS+1];\n    struct timespec start, stop;\n    double result;\n    int i;\n\n    randombytes(m, SPX_MLEN);\n    randombytes(addr, SPX_ADDR_BYTES);\n\n    printf(\"Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\\n\",\n           SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES,\n           SPX_WOTS_W);\n\n    printf(\"Running %d iterations.\\n\", NTESTS);\n\n    MEASURT(\"thash                \", 1, thash(block, block, 1, &ctx, (uint32_t*)addr));\n    MEASURE(\"Generating keypair.. \", 1, crypto_sign_keypair(pk, sk));\n    MEASURE(\"  - WOTS pk gen..    \", (1 << SPX_TREE_HEIGHT), wots_gen_pkx1(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Signing..            \", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk));\n    MEASURE(\"  - FORS signing..   \", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr));\n    MEASURE(\"  - WOTS pk gen..    \", SPX_D * (1 << SPX_TREE_HEIGHT), wots_gen_pkx1(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Verifying..          \", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk));\n\n    printf(\"Signature size: %d (%.2f KiB)\\n\", SPX_BYTES, SPX_BYTES / 1024.0);\n    printf(\"Public key size: %d (%.2f KiB)\\n\", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0);\n    printf(\"Secret key size: %d (%.2f KiB)\\n\", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0);\n\n    free(m);\n    free(sm);\n    free(mout);\n\n    return 0;\n}\n\nstatic void wots_gen_pkx1(unsigned char *pk, const spx_ctx *ctx,\n                  uint32_t addr[8]) {\n    struct leaf_info_x1 leaf;\n    unsigned steps[ SPX_WOTS_LEN ] = { 0 };\n    INITIALIZE_LEAF_INFO_X1(leaf, addr, steps);\n    wots_gen_leafx1(pk, ctx, 0, &leaf);\n}\n\n"
  },
  {
    "path": "ref/test/cycles.c",
    "content": "#include \"cycles.h\"\n\n#if defined(__aarch64__) && defined(__APPLE__)\n// Adapted from\n// https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2021/03/24/\n\n#include <dlfcn.h>\n#include <pthread.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#define KPERF_LIST                                                             \\\n  F(int, kpc_force_all_ctrs_set, int)                                          \\\n  F(int, kpc_set_counting, uint32_t)                                           \\\n  F(int, kpc_set_thread_counting, uint32_t)                                    \\\n  F(int, kpc_set_config, uint32_t, void *)                                     \\\n  F(uint32_t, kpc_get_counter_count, uint32_t)                                 \\\n  F(uint32_t, kpc_get_config_count, uint32_t)                                  \\\n  F(int, kpc_get_thread_counters, int, unsigned int, void *)\n\n#define F(ret, name, ...)                                                      \\\n  typedef ret name##proc(__VA_ARGS__);                                         \\\n  static name##proc *name;\nKPERF_LIST\n#undef F\n\nuint64_t g_counters[10];\nuint64_t g_config[10];\n\nstatic void configure_rdtsc(void) {\n  if (kpc_set_config(3, g_config)) {\n    printf(\"kpc_set_config failed\\n\");\n    return;\n  }\n\n  if (kpc_force_all_ctrs_set(1)) {\n    printf(\"kpc_force_all_ctrs_set failed\\n\");\n    return;\n  }\n\n  if (kpc_set_counting(3)) {\n    printf(\"kpc_set_counting failed\\n\");\n    return;\n  }\n\n  if (kpc_set_thread_counting(3)) {\n    printf(\"kpc_set_thread_counting failed\\n\");\n    return;\n  }\n}\n\nvoid init_cpucycles(void) {\n  void *kperf = dlopen(\n      \"/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf\",\n      RTLD_LAZY);\n  if (!kperf) {\n    printf(\"kperf = %p\\n\", kperf);\n    return;\n  }\n#define F(ret, name, ...)                                                      \\\n  name = (name##proc *)(dlsym(kperf, #name));                                  \\\n  if (!name) {                                                                 \\\n    printf(\"%s = %p\\n\", #name, (void *)name);                                  \\\n    return;                                                                    \\\n  }\n  KPERF_LIST\n#undef F\n\n  if (kpc_get_counter_count(3) != 10) {\n    printf(\"wrong fixed counters count\\n\");\n    return;\n  }\n\n  if (kpc_get_config_count(3) != 8) {\n    printf(\"wrong fixed config count\\n\");\n    return;\n  }\n  g_config[0] = 0x02 | 0x20000;\n  g_config[3] = 0x8d | 0x20000;\n  g_config[4] = 0xcb | 0x20000;\n  g_config[5] = 0x8c | 0x20000;\n\n  configure_rdtsc();\n}\n\nunsigned long long cpucycles(void) {\n  static int warned = 0;\n  if (kpc_get_thread_counters(0, 10, g_counters)) {\n    if (!warned) {\n      printf(\"kpc_get_thread_counters failed, run as sudo?\\n\");\n      warned = 1;\n    }\n    return 1;\n  }\n  // g_counters[3 + 2] gives you the number of instructions 'decoded'\n  // whereas g_counters[1] might give you the number of instructions 'retired'.\n  return g_counters[0 + 2];\n}\n#else\nvoid init_cpucycles(void) {\n}\n\nunsigned long long cpucycles(void)\n{\n  unsigned long long result;\n  __asm volatile(\".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax\"\n    : \"=a\" (result) ::  \"%rdx\");\n  return result;\n}\n#endif\n"
  },
  {
    "path": "ref/test/cycles.h",
    "content": "#ifndef SPX_CYCLES_H\n#define SPX_CYCLES_H\n\nvoid init_cpucycles(void);\nunsigned long long cpucycles(void);\n\n#endif\n"
  },
  {
    "path": "ref/test/fors.c",
    "content": "#include <stdio.h>\n#include <string.h>\n\n#include \"../context.h\"\n#include \"../hash.h\"\n#include \"../fors.h\"\n#include \"../randombytes.h\"\n#include \"../params.h\"\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    spx_ctx ctx;\n\n    unsigned char pk1[SPX_FORS_PK_BYTES];\n    unsigned char pk2[SPX_FORS_PK_BYTES];\n    unsigned char sig[SPX_FORS_BYTES];\n    unsigned char m[SPX_FORS_MSG_BYTES];\n    uint32_t addr[8] = {0};\n\n    randombytes(ctx.sk_seed, SPX_N);\n    randombytes(ctx.pub_seed, SPX_N);\n    randombytes(m, SPX_FORS_MSG_BYTES);\n    randombytes((unsigned char *)addr, 8 * sizeof(uint32_t));\n\n    printf(\"Testing FORS signature and PK derivation.. \");\n\n    initialize_hash_function(&ctx);\n\n    fors_sign(sig, pk1, m, &ctx, addr);\n    fors_pk_from_sig(pk2, sig, m, &ctx, addr);\n\n    if (memcmp(pk1, pk2, SPX_FORS_PK_BYTES)) {\n        printf(\"failed!\\n\");\n        return -1;\n    }\n    printf(\"successful.\\n\");\n    return 0;\n}\n"
  },
  {
    "path": "ref/test/haraka.c",
    "content": "#include <stdio.h>\n#include <stdint.h>\n#include <string.h>\n#include <inttypes.h>\n\n#include \"../haraka.c\"\n#include \"../randombytes.h\"\n\nstatic int test_haraka_S_incremental(void) {\n    unsigned char input[521];\n    unsigned char check[521];\n    unsigned char output[521];\n    uint8_t s_inc_absorb[65];\n    uint8_t s_inc_squeeze[65];\n    uint8_t s_inc_squeeze_all[65];\n    uint8_t s_inc_both[65];\n    uint8_t s_combined[64];\n    int i;\n    int absorbed;\n    int squeezed;\n    int returncode = 0;\n\n    randombytes(input, 521);\n\n    haraka_S(check, 521, input, 521);\n\n    haraka_S_inc_init(s_inc_absorb);\n\n    absorbed = 0;\n    for (i = 0; i < 521 && absorbed + i <= 521; i++) {\n        haraka_S_inc_absorb(s_inc_absorb, input + absorbed, i);\n        absorbed += i;\n    }\n    haraka_S_inc_absorb(s_inc_absorb, input + absorbed, 521 - absorbed);\n\n    haraka_S_inc_finalize(s_inc_absorb);\n\n    memset(s_combined, 0, 64);\n    haraka_S_absorb(s_combined, HARAKAS_RATE, input, 521, 0x1F);\n\n    if (memcmp(s_inc_absorb, s_combined, 64 * sizeof(uint8_t))) {\n        printf(\"ERROR haraka_S state after incremental absorb did not match all-at-once absorb.\\n\");\n        printf(\"  Expected: \");\n        for (i = 0; i < 64; i++) {\n            printf(\"%02X\", s_combined[i]);\n        }\n        printf(\"\\n\");\n        printf(\"  State:    \");\n        for (i = 0; i < 64; i++) {\n            printf(\"%02X\", s_inc_absorb[i]);\n        }\n        printf(\"\\n\");\n        returncode = 1;\n    }\n\n    memcpy(s_inc_both, s_inc_absorb, 65 * sizeof(uint8_t));\n\n    haraka_S_squeezeblocks(output, 3, s_inc_absorb, HARAKAS_RATE);\n\n    if (memcmp(check, output, 3*HARAKAS_RATE)) {\n        printf(\"ERROR haraka_S incremental absorb did not match haraka_S.\\n\");\n        printf(\"  Expected: \");\n        for (i = 0; i < 3*HARAKAS_RATE; i++) {\n            printf(\"%02X\", check[i]);\n        }\n        printf(\"\\n\");\n        printf(\"  Received: \");\n        for (i = 0; i < 3*HARAKAS_RATE; i++) {\n            printf(\"%02X\", output[i]);\n        }\n        printf(\"\\n\");\n        returncode = 1;\n    }\n\n    memset(s_inc_squeeze, 0, 65);\n    haraka_S_absorb(s_inc_squeeze, HARAKAS_RATE, input, 521, 0x1F);\n    s_inc_squeeze[64] = 0;\n\n    memcpy(s_inc_squeeze_all, s_inc_squeeze, 65 * sizeof(uint8_t));\n\n    haraka_S_inc_squeeze(output, 521, s_inc_squeeze_all);\n\n    if (memcmp(check, output, 521)) {\n        printf(\"ERROR haraka_S incremental squeeze-all did not match haraka_S.\\n\");\n        printf(\"  Expected: \");\n        for (i = 0; i < 521; i++) {\n            printf(\"%02X\", check[i]);\n        }\n        printf(\"\\n\");\n        printf(\"  Received: \");\n        for (i = 0; i < 521; i++) {\n            printf(\"%02X\", output[i]);\n        }\n        printf(\"\\n\");\n        returncode = 1;\n    }\n\n    squeezed = 0;\n    memset(output, 0, 521);\n    for (i = 0; i < 521 && squeezed + i <= 521; i++) {\n        haraka_S_inc_squeeze(output + squeezed, i, s_inc_squeeze);\n        squeezed += i;\n    }\n    haraka_S_inc_squeeze(output + squeezed, 521 - squeezed, s_inc_squeeze);\n\n    if (memcmp(check, output, 521)) {\n        printf(\"ERROR haraka_S incremental squeeze did not match haraka_S.\\n\");\n        printf(\"  Expected: \");\n        for (i = 0; i < 521; i++) {\n            printf(\"%02X\", check[i]);\n        }\n        printf(\"\\n\");\n        printf(\"  Received: \");\n        for (i = 0; i < 521; i++) {\n            printf(\"%02X\", output[i]);\n        }\n        printf(\"\\n\");\n        returncode = 1;\n    }\n\n    squeezed = 0;\n    memset(output, 0, 521);\n    for (i = 0; i < 521 && squeezed + i <= 521; i++) {\n        haraka_S_inc_squeeze(output + squeezed, i, s_inc_both);\n        squeezed += i;\n    }\n    haraka_S_inc_squeeze(output + squeezed, 521 - squeezed, s_inc_both);\n\n    if (memcmp(check, output, 521)) {\n        printf(\"ERROR haraka_S incremental absorb + squeeze did not match haraka_S.\\n\");\n        printf(\"  Expected: \");\n        for (i = 0; i < 521; i++) {\n            printf(\"%02X\", check[i]);\n        }\n        printf(\"\\n\");\n        printf(\"  Received: \");\n        for (i = 0; i < 521; i++) {\n            printf(\"%02X\", output[i]);\n        }\n        printf(\"\\n\");\n        returncode = 1;\n    }\n\n    return returncode;\n}\n\nint main(void) {\n    int result = 0;\n    result += test_haraka_S_incremental();\n\n    if (result != 0) {\n        puts(\"Errors occurred\");\n    }\n    return result;\n}\n"
  },
  {
    "path": "ref/test/spx.c",
    "content": "#include <stdio.h>\n#include <string.h>\n#include <stdlib.h>\n\n#include \"../api.h\"\n#include \"../params.h\"\n#include \"../randombytes.h\"\n\n#define SPX_MLEN 32\n#define SPX_SIGNATURES 1\n\nint main(void)\n{\n    int ret = 0;\n    int i;\n\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    unsigned char pk[SPX_PK_BYTES];\n    unsigned char sk[SPX_SK_BYTES];\n    unsigned char *m = malloc(SPX_MLEN);\n    unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned long long smlen;\n    unsigned long long mlen;\n\n    randombytes(m, SPX_MLEN);\n\n    printf(\"Generating keypair.. \");\n\n    if (crypto_sign_keypair(pk, sk)) {\n        printf(\"failed!\\n\");\n        return -1;\n    }\n    printf(\"successful.\\n\");\n\n    printf(\"Testing %d signatures.. \\n\", SPX_SIGNATURES);\n\n    for (i = 0; i < SPX_SIGNATURES; i++) {\n        printf(\"  - iteration #%d:\\n\", i);\n\n        crypto_sign(sm, &smlen, m, SPX_MLEN, sk);\n\n        if (smlen != SPX_BYTES + SPX_MLEN) {\n            printf(\"  X smlen incorrect [%llu != %u]!\\n\",\n                   smlen, SPX_BYTES);\n            ret = -1;\n        }\n        else {\n            printf(\"    smlen as expected [%llu].\\n\", smlen);\n        }\n\n        /* Test if signature is valid. */\n        if (crypto_sign_open(mout, &mlen, sm, smlen, pk)) {\n            printf(\"  X verification failed!\\n\");\n            ret = -1;\n        }\n        else {\n            printf(\"    verification succeeded.\\n\");\n        }\n\n        /* Test if the correct message was recovered. */\n        if (mlen != SPX_MLEN) {\n            printf(\"  X mlen incorrect [%llu != %u]!\\n\", mlen, SPX_MLEN);\n            ret = -1;\n        }\n        else {\n            printf(\"    mlen as expected [%llu].\\n\", mlen);\n        }\n        if (memcmp(m, mout, SPX_MLEN)) {\n            printf(\"  X output message incorrect!\\n\");\n            ret = -1;\n        }\n        else {\n            printf(\"    output message as expected.\\n\");\n        }\n\n        /* Test if signature is valid when validating in-place. */\n        if (crypto_sign_open(sm, &mlen, sm, smlen, pk)) {\n            printf(\"  X in-place verification failed!\\n\");\n            ret = -1;\n        }\n        else {\n            printf(\"    in-place verification succeeded.\\n\");\n        }\n\n        /* Test if flipping bits invalidates the signature (it should). */\n\n        /* Flip the first bit of the message. Should invalidate. */\n        sm[smlen - 1] ^= 1;\n        if (!crypto_sign_open(mout, &mlen, sm, smlen, pk)) {\n            printf(\"  X flipping a bit of m DID NOT invalidate signature!\\n\");\n            ret = -1;\n        }\n        else {\n            printf(\"    flipping a bit of m invalidates signature.\\n\");\n        }\n        sm[smlen - 1] ^= 1;\n\n#ifdef SPX_TEST_INVALIDSIG\n        int j;\n        /* Flip one bit per hash; the signature is entirely hashes. */\n        for (j = 0; j < (int)(smlen - SPX_MLEN); j += SPX_N) {\n            sm[j] ^= 1;\n            if (!crypto_sign_open(mout, &mlen, sm, smlen, pk)) {\n                printf(\"  X flipping bit %d DID NOT invalidate sig + m!\\n\", j);\n                sm[j] ^= 1;\n                ret = -1;\n                break;\n            }\n            sm[j] ^= 1;\n        }\n        if (j >= (int)(smlen - SPX_MLEN)) {\n            printf(\"    changing any signature hash invalidates signature.\\n\");\n        }\n#endif\n    }\n\n    free(m);\n    free(sm);\n    free(mout);\n\n    return ret;\n}\n"
  },
  {
    "path": "ref/thash.h",
    "content": "#ifndef SPX_THASH_H\n#define SPX_THASH_H\n\n#include \"context.h\"\n#include \"params.h\"\n\n#include <stdint.h>\n\n#define thash SPX_NAMESPACE(thash)\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8]);\n\n#endif\n"
  },
  {
    "path": "ref/thash_haraka_robust.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"haraka.h\"\n\n/**\n * Takes an array of inblocks concatenated arrays of SPX_N bytes.\n */\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n    SPX_VLA(uint8_t, buf, SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(uint8_t, bitmask, inblocks*SPX_N);\n    unsigned char outbuf[32];\n    unsigned char buf_tmp[64];\n    unsigned int i;\n\n    if (inblocks == 1) {\n        /* F function */\n        /* Since SPX_N may be smaller than 32, we need a temporary buffer. */\n        memset(buf_tmp, 0, 64);\n        memcpy(buf_tmp, addr, 32);\n\n        haraka256(outbuf, buf_tmp, ctx);\n        for (i = 0; i < inblocks * SPX_N; i++) {\n            buf_tmp[SPX_ADDR_BYTES + i] = in[i] ^ outbuf[i];\n        }\n        haraka512(outbuf, buf_tmp, ctx);\n        memcpy(out, outbuf, SPX_N);\n    } else {\n        /* All other tweakable hashes*/\n        memcpy(buf, addr, 32);\n        haraka_S(bitmask, inblocks * SPX_N, buf, SPX_ADDR_BYTES, ctx);\n\n        for (i = 0; i < inblocks * SPX_N; i++) {\n            buf[SPX_ADDR_BYTES + i] = in[i] ^ bitmask[i];\n        }\n\n        haraka_S(out, SPX_N, buf, SPX_ADDR_BYTES + inblocks*SPX_N, ctx);\n    }\n}\n"
  },
  {
    "path": "ref/thash_haraka_simple.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"haraka.h\"\n\n/**\n * Takes an array of inblocks concatenated arrays of SPX_N bytes.\n */\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n    SPX_VLA(uint8_t, buf, SPX_ADDR_BYTES + inblocks*SPX_N);\n    unsigned char outbuf[32];\n    unsigned char buf_tmp[64];\n\n    if (inblocks == 1) {\n        /* F function */\n        /* Since SPX_N may be smaller than 32, we need a temporary buffer. */\n        memset(buf_tmp, 0, 64);\n        memcpy(buf_tmp, addr, 32);\n        memcpy(buf_tmp + SPX_ADDR_BYTES, in, SPX_N);\n\n        haraka512(outbuf, buf_tmp, ctx);\n        memcpy(out, outbuf, SPX_N);\n    } else {\n        /* All other tweakable hashes*/\n        memcpy(buf, addr, 32);\n        memcpy(buf + SPX_ADDR_BYTES, in, inblocks * SPX_N);\n\n        haraka_S(out, SPX_N, buf, SPX_ADDR_BYTES + inblocks*SPX_N, ctx);\n    }\n}\n"
  },
  {
    "path": "ref/thash_sha2_robust.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n#include \"sha2.h\"\n\n#if SPX_SHA512\nstatic void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8]);\n#endif\n\n/**\n * Takes an array of inblocks concatenated arrays of SPX_N bytes.\n */\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n#if SPX_SHA512\n    if (inblocks > 1) {\n\tthash_512(out, in, inblocks, ctx, addr);\n        return;\n    }\n#endif\n    unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES];\n    SPX_VLA(uint8_t, bitmask, inblocks * SPX_N);\n    SPX_VLA(uint8_t, buf, SPX_N + SPX_SHA256_OUTPUT_BYTES + inblocks*SPX_N);\n    uint8_t sha2_state[40];\n    unsigned int i;\n\n    memcpy(buf, ctx->pub_seed, SPX_N);\n    memcpy(buf + SPX_N, addr, SPX_SHA256_ADDR_BYTES);\n    mgf1_256(bitmask, inblocks * SPX_N, buf, SPX_N + SPX_SHA256_ADDR_BYTES);\n\n    /* Retrieve precomputed state containing pub_seed */\n    memcpy(sha2_state, ctx->state_seeded, 40 * sizeof(uint8_t));\n\n    for (i = 0; i < inblocks * SPX_N; i++) {\n        buf[SPX_N + SPX_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i];\n    }\n\n    sha256_inc_finalize(outbuf, sha2_state, buf + SPX_N,\n                        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n    memcpy(out, outbuf, SPX_N);\n}\n\n#if SPX_SHA512\nstatic void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n    unsigned char outbuf[SPX_SHA512_OUTPUT_BYTES];\n    SPX_VLA(uint8_t, bitmask, inblocks * SPX_N);\n    SPX_VLA(uint8_t, buf, SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n    uint8_t sha2_state[72];\n    unsigned int i;\n\n    memcpy(buf, ctx->pub_seed, SPX_N);\n    memcpy(buf + SPX_N, addr, SPX_SHA256_ADDR_BYTES);\n    mgf1_512(bitmask, inblocks * SPX_N, buf, SPX_N + SPX_SHA256_ADDR_BYTES);\n\n    /* Retrieve precomputed state containing pub_seed */\n    memcpy(sha2_state, ctx->state_seeded_512, 72 * sizeof(uint8_t));\n\n    for (i = 0; i < inblocks * SPX_N; i++) {\n        buf[SPX_N + SPX_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i];\n    }\n\n    sha512_inc_finalize(outbuf, sha2_state, buf + SPX_N,\n                        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n    memcpy(out, outbuf, SPX_N);\n}\n#endif\n"
  },
  {
    "path": "ref/thash_sha2_simple.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n#include \"sha2.h\"\n\n#if SPX_SHA512\nstatic void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8]);\n#endif\n\n/**\n * Takes an array of inblocks concatenated arrays of SPX_N bytes.\n */\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n#if SPX_SHA512\n    if (inblocks > 1) {\n\tthash_512(out, in, inblocks, ctx, addr);\n        return;\n    }\n#endif\n\n    unsigned char outbuf[SPX_SHA256_OUTPUT_BYTES];\n    uint8_t sha2_state[40];\n    SPX_VLA(uint8_t, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n\n    /* Retrieve precomputed state containing pub_seed */\n    memcpy(sha2_state, ctx->state_seeded, 40 * sizeof(uint8_t));\n\n    memcpy(buf, addr, SPX_SHA256_ADDR_BYTES);\n    memcpy(buf + SPX_SHA256_ADDR_BYTES, in, inblocks * SPX_N);\n\n    sha256_inc_finalize(outbuf, sha2_state, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n    memcpy(out, outbuf, SPX_N);\n}\n\n#if SPX_SHA512\nstatic void thash_512(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n    unsigned char outbuf[SPX_SHA512_OUTPUT_BYTES];\n    uint8_t sha2_state[72];\n    SPX_VLA(uint8_t, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n\n    /* Retrieve precomputed state containing pub_seed */\n    memcpy(sha2_state, ctx->state_seeded_512, 72 * sizeof(uint8_t));\n\n    memcpy(buf, addr, SPX_SHA256_ADDR_BYTES);\n    memcpy(buf + SPX_SHA256_ADDR_BYTES, in, inblocks * SPX_N);\n\n    sha512_inc_finalize(outbuf, sha2_state, buf, SPX_SHA256_ADDR_BYTES + inblocks*SPX_N);\n    memcpy(out, outbuf, SPX_N);\n}\n#endif\n"
  },
  {
    "path": "ref/thash_shake_robust.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"fips202.h\"\n\n/**\n * Takes an array of inblocks concatenated arrays of SPX_N bytes.\n */\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n    SPX_VLA(uint8_t, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n    SPX_VLA(uint8_t, bitmask, inblocks * SPX_N);\n    unsigned int i;\n\n    memcpy(buf, ctx->pub_seed, SPX_N);\n    memcpy(buf + SPX_N, addr, SPX_ADDR_BYTES);\n\n    shake256(bitmask, inblocks * SPX_N, buf, SPX_N + SPX_ADDR_BYTES);\n\n    for (i = 0; i < inblocks * SPX_N; i++) {\n        buf[SPX_N + SPX_ADDR_BYTES + i] = in[i] ^ bitmask[i];\n    }\n\n    shake256(out, SPX_N, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n}\n"
  },
  {
    "path": "ref/thash_shake_simple.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"fips202.h\"\n\n/**\n * Takes an array of inblocks concatenated arrays of SPX_N bytes.\n */\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8])\n{\n    SPX_VLA(uint8_t, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n\n    memcpy(buf, ctx->pub_seed, SPX_N);\n    memcpy(buf + SPX_N, addr, SPX_ADDR_BYTES);\n    memcpy(buf + SPX_N + SPX_ADDR_BYTES, in, inblocks * SPX_N);\n\n    shake256(out, SPX_N, buf, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n}\n"
  },
  {
    "path": "ref/utils.c",
    "content": "#include <string.h>\n\n#include \"utils.h\"\n#include \"params.h\"\n#include \"hash.h\"\n#include \"thash.h\"\n#include \"address.h\"\n\n/**\n * Converts the value of 'in' to 'outlen' bytes in big-endian byte order.\n */\nvoid ull_to_bytes(unsigned char *out, unsigned int outlen,\n                  unsigned long long in)\n{\n    int i;\n\n    /* Iterate over out in decreasing order, for big-endianness. */\n    for (i = (signed int)outlen - 1; i >= 0; i--) {\n        out[i] = in & 0xff;\n        in = in >> 8;\n    }\n}\n\nvoid u32_to_bytes(unsigned char *out, uint32_t in)\n{\n    out[0] = (unsigned char)(in >> 24);\n    out[1] = (unsigned char)(in >> 16);\n    out[2] = (unsigned char)(in >> 8);\n    out[3] = (unsigned char)in;\n}\n\n/**\n * Converts the inlen bytes in 'in' from big-endian byte order to an integer.\n */\nunsigned long long bytes_to_ull(const unsigned char *in, unsigned int inlen)\n{\n    unsigned long long retval = 0;\n    unsigned int i;\n\n    for (i = 0; i < inlen; i++) {\n        retval |= ((unsigned long long)in[i]) << (8*(inlen - 1 - i));\n    }\n    return retval;\n}\n\n/**\n * Computes a root node given a leaf and an auth path.\n * Expects address to be complete other than the tree_height and tree_index.\n */\nvoid compute_root(unsigned char *root, const unsigned char *leaf,\n                  uint32_t leaf_idx, uint32_t idx_offset,\n                  const unsigned char *auth_path, uint32_t tree_height,\n                  const spx_ctx *ctx, uint32_t addr[8])\n{\n    uint32_t i;\n    unsigned char buffer[2 * SPX_N];\n\n    /* If leaf_idx is odd (last bit = 1), current path element is a right child\n       and auth_path has to go left. Otherwise it is the other way around. */\n    if (leaf_idx & 1) {\n        memcpy(buffer + SPX_N, leaf, SPX_N);\n        memcpy(buffer, auth_path, SPX_N);\n    }\n    else {\n        memcpy(buffer, leaf, SPX_N);\n        memcpy(buffer + SPX_N, auth_path, SPX_N);\n    }\n    auth_path += SPX_N;\n\n    for (i = 0; i < tree_height - 1; i++) {\n        leaf_idx >>= 1;\n        idx_offset >>= 1;\n        /* Set the address of the node we're creating. */\n        set_tree_height(addr, i + 1);\n        set_tree_index(addr, leaf_idx + idx_offset);\n\n        /* Pick the right or left neighbor, depending on parity of the node. */\n        if (leaf_idx & 1) {\n            thash(buffer + SPX_N, buffer, 2, ctx, addr);\n            memcpy(buffer, auth_path, SPX_N);\n        }\n        else {\n            thash(buffer, buffer, 2, ctx, addr);\n            memcpy(buffer + SPX_N, auth_path, SPX_N);\n        }\n        auth_path += SPX_N;\n    }\n\n    /* The last iteration is exceptional; we do not copy an auth_path node. */\n    leaf_idx >>= 1;\n    idx_offset >>= 1;\n    set_tree_height(addr, tree_height);\n    set_tree_index(addr, leaf_idx + idx_offset);\n    thash(root, buffer, 2, ctx, addr);\n}\n\n/**\n * For a given leaf index, computes the authentication path and the resulting\n * root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE).\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n */\nvoid treehash(unsigned char *root, unsigned char *auth_path, const spx_ctx* ctx,\n              uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height,\n              void (*gen_leaf)(\n                 unsigned char* /* leaf */,\n                 const spx_ctx* /* ctx */,\n                 uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */),\n              uint32_t tree_addr[8])\n{\n    SPX_VLA(uint8_t, stack, (tree_height+1)*SPX_N);\n    SPX_VLA(unsigned int, heights, tree_height+1);\n    unsigned int offset = 0;\n    uint32_t idx;\n    uint32_t tree_idx;\n\n    for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) {\n        /* Add the next leaf node to the stack. */\n        gen_leaf(stack + offset*SPX_N, ctx, idx + idx_offset, tree_addr);\n        offset++;\n        heights[offset - 1] = 0;\n\n        /* If this is a node we need for the auth path.. */\n        if ((leaf_idx ^ 0x1) == idx) {\n            memcpy(auth_path, stack + (offset - 1)*SPX_N, SPX_N);\n        }\n\n        /* While the top-most nodes are of equal height.. */\n        while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) {\n            /* Compute index of the new node, in the next layer. */\n            tree_idx = (idx >> (heights[offset - 1] + 1));\n\n            /* Set the address of the node we're creating. */\n            set_tree_height(tree_addr, heights[offset - 1] + 1);\n            set_tree_index(tree_addr,\n                           tree_idx + (idx_offset >> (heights[offset-1] + 1)));\n            /* Hash the top-most nodes from the stack together. */\n            thash(stack + (offset - 2)*SPX_N,\n                  stack + (offset - 2)*SPX_N, 2, ctx, tree_addr);\n            offset--;\n            /* Note that the top-most node is now one layer higher. */\n            heights[offset - 1]++;\n\n            /* If this is a node we need for the auth path.. */\n            if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) {\n                memcpy(auth_path + heights[offset - 1]*SPX_N,\n                       stack + (offset - 1)*SPX_N, SPX_N);\n            }\n        }\n    }\n    memcpy(root, stack, SPX_N);\n}\n"
  },
  {
    "path": "ref/utils.h",
    "content": "#ifndef SPX_UTILS_H\n#define SPX_UTILS_H\n\n#include <stdint.h>\n#include \"params.h\"\n#include \"context.h\"\n\n\n/* To support MSVC use alloca() instead of VLAs. See #20. */\n#ifdef _MSC_VER\n/* MSVC defines _alloca in malloc.h */\n# include <malloc.h>\n/* Note: _malloca(), which is recommended over deprecated _alloca,\n   requires that you call _freea(). So we stick with _alloca */ \n# define SPX_VLA(__t,__x,__s) __t *__x = (__t*)_alloca((__s)*sizeof(__t))\n#else\n# define SPX_VLA(__t,__x,__s) __t __x[__s]\n#endif\n\n/**\n * Converts the value of 'in' to 'outlen' bytes in big-endian byte order.\n */\n#define ull_to_bytes SPX_NAMESPACE(ull_to_bytes)\nvoid ull_to_bytes(unsigned char *out, unsigned int outlen,\n                  unsigned long long in);\n#define u32_to_bytes SPX_NAMESPACE(u32_to_bytes)\nvoid u32_to_bytes(unsigned char *out, uint32_t in);\n\n/**\n * Converts the inlen bytes in 'in' from big-endian byte order to an integer.\n */\n#define bytes_to_ull SPX_NAMESPACE(bytes_to_ull)\nunsigned long long bytes_to_ull(const unsigned char *in, unsigned int inlen);\n\n/**\n * Computes a root node given a leaf and an auth path.\n * Expects address to be complete other than the tree_height and tree_index.\n */\n#define compute_root SPX_NAMESPACE(compute_root)\nvoid compute_root(unsigned char *root, const unsigned char *leaf,\n                  uint32_t leaf_idx, uint32_t idx_offset,\n                  const unsigned char *auth_path, uint32_t tree_height,\n                  const spx_ctx *ctx, uint32_t addr[8]);\n\n/**\n * For a given leaf index, computes the authentication path and the resulting\n * root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE).\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n */\n#define treehash SPX_NAMESPACE(treehash)\nvoid treehash(unsigned char *root, unsigned char *auth_path,\n              const spx_ctx* ctx,\n              uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height,\n              void (*gen_leaf)(\n                 unsigned char* /* leaf */,\n                 const spx_ctx* ctx /* ctx */,\n                 uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */),\n              uint32_t tree_addr[8]);\n\n\n#endif\n"
  },
  {
    "path": "ref/utilsx1.c",
    "content": "#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx1.h\"\n#include \"params.h\"\n#include \"thash.h\"\n#include \"address.h\"\n\n/*\n * Generate the entire Merkle tree, computing the authentication path for\n * leaf_idx, and the resulting root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE)\n *\n * This expects tree_addr to be initialized to the addr structures for the\n * Merkle tree nodes\n *\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This works by using the standard Merkle tree building algorithm,\n */\nvoid treehashx1(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx* ctx,\n                uint32_t leaf_idx, uint32_t idx_offset,\n                uint32_t tree_height,\n                void (*gen_leaf)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx* /* ctx */,\n                   uint32_t idx, void *info),\n                uint32_t tree_addr[8],\n                void *info)\n{\n    /* This is where we keep the intermediate nodes */\n    SPX_VLA(uint8_t, stack, tree_height*SPX_N);\n\n    uint32_t idx;\n    uint32_t max_idx = (uint32_t)((1 << tree_height) - 1);\n    for (idx = 0;; idx++) {\n        unsigned char current[2*SPX_N];   /* Current logical node is at */\n            /* index[SPX_N].  We do this to minimize the number of copies */\n            /* needed during a thash */\n        gen_leaf( &current[SPX_N], ctx, idx + idx_offset,\n                    info );\n\n        /* Now combine the freshly generated right node with previously */\n        /* generated left ones */\n        uint32_t internal_idx_offset = idx_offset;\n        uint32_t internal_idx = idx;\n        uint32_t internal_leaf = leaf_idx;\n        uint32_t h;     /* The height we are in the Merkle tree */\n        for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) {\n\n            /* Check if we hit the top of the tree */\n            if (h == tree_height) {\n                /* We hit the root; return it */\n                memcpy( root, &current[SPX_N], SPX_N );\n                return;\n            }\n\n            /*\n             * Check if the node we have is a part of the\n             * authentication path; if it is, write it out\n             */\n            if ((internal_idx ^ internal_leaf) == 0x01) {\n                memcpy( &auth_path[ h * SPX_N ],\n                        &current[SPX_N],\n                        SPX_N );\n            }\n\n            /*\n             * Check if we're at a left child; if so, stop going up the stack\n             * Exception: if we've reached the end of the tree, keep on going\n             * (so we combine the last 4 nodes into the one root node in two\n             * more iterations)\n             */\n            if ((internal_idx & 1) == 0 && idx < max_idx) {\n                break;\n            }\n\n            /* Ok, we're at a right node */\n            /* Now combine the left and right logical nodes together */\n\n            /* Set the address of the node we're creating. */\n            internal_idx_offset >>= 1;\n            set_tree_height(tree_addr, h + 1);\n            set_tree_index(tree_addr, internal_idx/2 + internal_idx_offset );\n\n            unsigned char *left = &stack[h * SPX_N];\n            memcpy( &current[0], left, SPX_N );\n            thash( &current[1 * SPX_N],\n                   &current[0 * SPX_N],\n                   2, ctx, tree_addr);\n        }\n\n        /* We've hit a left child; save the current for when we get the */\n        /* corresponding right right */\n        memcpy( &stack[h * SPX_N], &current[SPX_N], SPX_N);\n    }\n}\n"
  },
  {
    "path": "ref/utilsx1.h",
    "content": "#ifndef SPX_UTILSX4_H\n#define SPX_UTILSX4_H\n\n#include <stdint.h>\n#include \"params.h\"\n#include \"context.h\"\n\n/**\n * For a given leaf index, computes the authentication path and the resulting\n * root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE).\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n */\n#define treehashx1 SPX_NAMESPACE(treehashx1)\nvoid treehashx1(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx* ctx,\n                uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height,\n                void (*gen_leaf)(\n                   unsigned char* /* Where to write the leaf */,\n                   const spx_ctx* /* ctx */,\n                   uint32_t addr_idx, void *info),\n                uint32_t tree_addrx4[8], void *info);\n\n#endif\n"
  },
  {
    "path": "ref/wots.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx1.h\"\n#include \"hash.h\"\n#include \"thash.h\"\n#include \"wots.h\"\n#include \"wotsx1.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n// TODO clarify address expectations, and make them more uniform.\n// TODO i.e. do we expect types to be set already?\n// TODO and do we expect modifications or copies?\n\n/**\n * Computes the chaining function.\n * out and in have to be n-byte arrays.\n *\n * Interprets in as start-th value of the chain.\n * addr has to contain the address of the chain.\n */\nstatic void gen_chain(unsigned char *out, const unsigned char *in,\n                      unsigned int start, unsigned int steps,\n                      const spx_ctx *ctx, uint32_t addr[8])\n{\n    uint32_t i;\n\n    /* Initialize out with the value at position 'start'. */\n    memcpy(out, in, SPX_N);\n\n    /* Iterate 'steps' calls to the hash function. */\n    for (i = start; i < (start+steps) && i < SPX_WOTS_W; i++) {\n        set_hash_addr(addr, i);\n        thash(out, out, 1, ctx, addr);\n    }\n}\n\n/**\n * base_w algorithm as described in draft.\n * Interprets an array of bytes as integers in base w.\n * This only works when log_w is a divisor of 8.\n */\nstatic void base_w(unsigned int *output, const int out_len,\n                   const unsigned char *input)\n{\n    int in = 0;\n    int out = 0;\n    unsigned char total;\n    int bits = 0;\n    int consumed;\n\n    for (consumed = 0; consumed < out_len; consumed++) {\n        if (bits == 0) {\n            total = input[in];\n            in++;\n            bits += 8;\n        }\n        bits -= SPX_WOTS_LOGW;\n        output[out] = (total >> bits) & (SPX_WOTS_W - 1);\n        out++;\n    }\n}\n\n/* Computes the WOTS+ checksum over a message (in base_w). */\nstatic void wots_checksum(unsigned int *csum_base_w,\n                          const unsigned int *msg_base_w)\n{\n    unsigned int csum = 0;\n    unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8];\n    unsigned int i;\n\n    /* Compute checksum. */\n    for (i = 0; i < SPX_WOTS_LEN1; i++) {\n        csum += SPX_WOTS_W - 1 - msg_base_w[i];\n    }\n\n    /* Convert checksum to base_w. */\n    /* Make sure expected empty zero bits are the least significant bits. */\n    csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8);\n    ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum);\n    base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes);\n}\n\n/* Takes a message and derives the matching chain lengths. */\nvoid chain_lengths(unsigned int *lengths, const unsigned char *msg)\n{\n    base_w(lengths, SPX_WOTS_LEN1, msg);\n    wots_checksum(lengths + SPX_WOTS_LEN1, lengths);\n}\n\n/**\n * Takes a WOTS signature and an n-byte message, computes a WOTS public key.\n *\n * Writes the computed public key to 'pk'.\n */\nvoid wots_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *msg,\n                      const spx_ctx *ctx, uint32_t addr[8])\n{\n    unsigned int lengths[SPX_WOTS_LEN];\n    uint32_t i;\n\n    chain_lengths(lengths, msg);\n\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        set_chain_addr(addr, i);\n        gen_chain(pk + i*SPX_N, sig + i*SPX_N,\n                  lengths[i], SPX_WOTS_W - 1 - lengths[i], ctx, addr);\n    }\n}\n"
  },
  {
    "path": "ref/wots.h",
    "content": "#ifndef SPX_WOTS_H\n#define SPX_WOTS_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n#include \"context.h\"\n\n/**\n * Takes a WOTS signature and an n-byte message, computes a WOTS public key.\n *\n * Writes the computed public key to 'pk'.\n */\n#define wots_pk_from_sig SPX_NAMESPACE(wots_pk_from_sig)\nvoid wots_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *msg,\n                      const spx_ctx *ctx, uint32_t addr[8]);\n\n/*\n * Compute the chain lengths needed for a given message hash\n */\n#define chain_lengths SPX_NAMESPACE(chain_lengths)\nvoid chain_lengths(unsigned int *lengths, const unsigned char *msg);\n\n#endif\n"
  },
  {
    "path": "ref/wotsx1.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"hash.h\"\n#include \"thash.h\"\n#include \"wots.h\"\n#include \"wotsx1.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n/*\n * This generates a WOTS public key\n * It also generates the WOTS signature if leaf_info indicates\n * that we're signing with this WOTS key\n */\nvoid wots_gen_leafx1(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info) {\n    struct leaf_info_x1 *info = v_info;\n    uint32_t *leaf_addr = info->leaf_addr;\n    uint32_t *pk_addr = info->pk_addr;\n    unsigned int i, k;\n    unsigned char pk_buffer[ SPX_WOTS_BYTES ];\n    unsigned char *buffer;\n    uint32_t wots_k_mask;\n\n    if (leaf_idx == info->wots_sign_leaf) {\n        /* We're traversing the leaf that's signing; generate the WOTS */\n        /* signature */\n        wots_k_mask = 0;\n    } else {\n        /* Nope, we're just generating pk's; turn off the signature logic */\n        wots_k_mask = (uint32_t)~0;\n    }\n\n    set_keypair_addr( leaf_addr, leaf_idx );\n    set_keypair_addr( pk_addr, leaf_idx );\n\n    for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) {\n        uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k to */\n            /* the step if we're generating a signature, ~0 if we're not */\n\n        /* Start with the secret seed */\n        set_chain_addr(leaf_addr, i);\n        set_hash_addr(leaf_addr, 0);\n        set_type(leaf_addr, SPX_ADDR_TYPE_WOTSPRF);\n\n        prf_addr(buffer, ctx, leaf_addr);\n\n        set_type(leaf_addr, SPX_ADDR_TYPE_WOTS);\n\n        /* Iterate down the WOTS chain */\n        for (k=0;; k++) {\n            /* Check if this is the value that needs to be saved as a */\n            /* part of the WOTS signature */\n            if (k == wots_k) {\n                memcpy( info->wots_sig + i * SPX_N, buffer, SPX_N );\n            }\n\n            /* Check if we hit the top of the chain */\n            if (k == SPX_WOTS_W - 1) break;\n\n            /* Iterate one step on the chain */\n            set_hash_addr(leaf_addr, k);\n\n            thash(buffer, buffer, 1, ctx, leaf_addr);\n        }\n    }\n\n    /* Do the final thash to generate the public keys */\n    thash(dest, pk_buffer, SPX_WOTS_LEN, ctx, pk_addr);\n}\n"
  },
  {
    "path": "ref/wotsx1.h",
    "content": "#if !defined( WOTSX1_H_ )\n#define WOTSX1_H_ \n\n#include <string.h>\n\n/*\n * This is here to provide an interface to the internal wots_gen_leafx1\n * routine.  While this routine is not referenced in the package outside of\n * wots.c, it is called from the stand-alone benchmark code to characterize\n * the performance\n */\nstruct leaf_info_x1 {\n    unsigned char *wots_sig;\n    uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */\n    uint32_t *wots_steps;\n    uint32_t leaf_addr[8];\n    uint32_t pk_addr[8];\n};\n\n/* Macro to set the leaf_info to something 'benign', that is, it would */\n/* run with the same time as it does during the real signing process */\n/* Used only by the benchmark code */\n#define INITIALIZE_LEAF_INFO_X1(info, addr, step_buffer) { \\\n    info.wots_sig = 0;             \\\n    info.wots_sign_leaf = ~0u;      \\\n    info.wots_steps = step_buffer; \\\n    memcpy( &info.leaf_addr[0], addr, 32 ); \\\n    memcpy( &info.pk_addr[0], addr, 32 ); \\\n}\n\n#define wots_gen_leafx1 SPX_NAMESPACE(wots_gen_leafx1)\nvoid wots_gen_leafx1(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info);\n\n#endif /* WOTSX1_H_ */\n"
  },
  {
    "path": "sha2-avx2/.gitignore",
    "content": "test/*\n!test/*.c\nPQCsignKAT_*.rsp\nPQCsignKAT_*.req\nPQCgenKAT_sign\nkeccak4x/KeccakP-1600-times4-SIMD256.o"
  },
  {
    "path": "sha2-avx2/Makefile",
    "content": "PARAMS = sphincs-sha2-128f\nTHASH = robust\n\nCC = /usr/bin/gcc\nCFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -march=native -flto -fomit-frame-pointer -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS)\n\n\nSOURCES =          hash_sha2.c hash_sha2x8.c thash_sha2_$(THASH).c thash_sha2_$(THASH)x8.c sha2.c sha256x8.c sha512x4.c sha256avx.c address.c randombytes.c merkle.c wots.c utils.c utilsx8.c fors.c sign.c\nHEADERS = params.h hash.h        hashx8.h        thash.h                 thashx8.h               sha2.h sha256x8.h sha512x4.h sha256avx.h address.h randombytes.h merkle.h wots.h utils.h utilsx8.h fors.h api.h\n\nDET_SOURCES = $(SOURCES:randombytes.%=rng.%)\nDET_HEADERS = $(HEADERS:randombytes.%=rng.%)\n\nTESTS = test/fors \\\n\t\ttest/spx \\\n\t\ttest/thashx8 \\\n\nBENCHMARK = test/benchmark\n\n.PHONY: clean test benchmark\n\ndefault: PQCgenKAT_sign\n\nall: PQCgenKAT_sign tests benchmarks\n\ntests: $(TESTS)\n\ntest: $(TESTS:=.exec)\n\nbenchmarks: $(BENCHMARK)\n\nbenchmark: $(BENCHMARK:=.exec)\n\nPQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto\n\ntest/%: test/%.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS)\n\ntest/%.exec: test/%\n\t@$<\n\nclean:\n\t-$(RM) $(TESTS)\n\t-$(RM) $(BENCHMARK)\n\t-$(RM) PQCgenKAT_sign\n\t-$(RM) PQCsignKAT_*.rsp\n\t-$(RM) PQCsignKAT_*.req\n"
  },
  {
    "path": "sha2-avx2/context.h",
    "content": "#ifndef SPX_CONTEXT_H\n#define SPX_CONTEXT_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n\ntypedef struct {\n    uint8_t pub_seed[SPX_N];\n    uint8_t sk_seed[SPX_N];\n\n    uint8_t state_seeded[40];\n#if SPX_SHA512\n    uint8_t state_seeded_512[72];\n#endif\n} spx_ctx;\n\n#endif\n"
  },
  {
    "path": "sha2-avx2/fors.c",
    "content": "#include <stdlib.h>\n#include <stdint.h>\n#include <string.h>\n\n#include \"fors.h\"\n#include \"utils.h\"\n#include \"utilsx8.h\"\n#include \"hash.h\"\n#include \"hashx8.h\"\n#include \"thash.h\"\n#include \"thashx8.h\"\n#include \"address.h\"\n\nstatic void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx,\n                        uint32_t fors_leaf_addr[8])\n{\n    prf_addr(sk, ctx, fors_leaf_addr);\n}\n\nstatic void fors_gen_skx8(unsigned char *sk0,\n                          unsigned char *sk1,\n                          unsigned char *sk2,\n                          unsigned char *sk3,\n                          unsigned char *sk4,\n                          unsigned char *sk5,\n                          unsigned char *sk6,\n                          unsigned char *sk7, const spx_ctx *ctx,\n                          uint32_t fors_leaf_addrx8[8*8])\n{\n    prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7,\n               ctx, fors_leaf_addrx8);\n}\n\nstatic void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk,\n                            const spx_ctx *ctx,\n                            uint32_t fors_leaf_addr[8])\n{\n    thash(leaf, sk, 1, ctx, fors_leaf_addr);\n}\n\nstatic void fors_sk_to_leafx8(unsigned char *leaf0,\n                              unsigned char *leaf1,\n                              unsigned char *leaf2,\n                              unsigned char *leaf3,\n                              unsigned char *leaf4,\n                              unsigned char *leaf5,\n                              unsigned char *leaf6,\n                              unsigned char *leaf7,\n                              const unsigned char *sk0,\n                              const unsigned char *sk1,\n                              const unsigned char *sk2,\n                              const unsigned char *sk3,\n                              const unsigned char *sk4,\n                              const unsigned char *sk5,\n                              const unsigned char *sk6,\n                              const unsigned char *sk7,\n                              const spx_ctx *ctx,\n                              uint32_t fors_leaf_addrx8[8*8])\n{\n    thashx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7,\n            sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7,\n            1, ctx, fors_leaf_addrx8);\n}\n\nstruct fors_gen_leaf_info {\n    uint32_t leaf_addrx[8*8];\n};\n\nstatic void fors_gen_leafx8(unsigned char *leaf,\n                            const spx_ctx *ctx,\n                            uint32_t addr_idx, void *info)\n{\n    struct fors_gen_leaf_info *fors_info = info;\n    uint32_t *fors_leaf_addrx8 = fors_info->leaf_addrx;\n    unsigned int j;\n\n    /* Only set the parts that the caller doesn't set */\n    for (j = 0; j < 8; j++) {\n        set_tree_index(fors_leaf_addrx8 + j*8, addr_idx + j);\n        set_type(fors_leaf_addrx8 + j*8, SPX_ADDR_TYPE_FORSPRF);\n    }\n\n    fors_gen_skx8(leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 2*SPX_N,\n                  leaf + 3*SPX_N,\n                  leaf + 4*SPX_N,\n                  leaf + 5*SPX_N,\n                  leaf + 6*SPX_N,\n                  leaf + 7*SPX_N,\n                  ctx, fors_leaf_addrx8);\n\n    for (j = 0; j < 8; j++) {\n        set_type(fors_leaf_addrx8 + j*8, SPX_ADDR_TYPE_FORSTREE);\n    }\n\n    fors_sk_to_leafx8(leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 2*SPX_N,\n                  leaf + 3*SPX_N,\n                  leaf + 4*SPX_N,\n                  leaf + 5*SPX_N,\n                  leaf + 6*SPX_N,\n                  leaf + 7*SPX_N,\n                  leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 2*SPX_N,\n                  leaf + 3*SPX_N,\n                  leaf + 4*SPX_N,\n                  leaf + 5*SPX_N,\n                  leaf + 6*SPX_N,\n                  leaf + 7*SPX_N,\n                  ctx, fors_leaf_addrx8);\n}\n\n/**\n * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n * Assumes indices has space for SPX_FORS_TREES integers.\n */\nstatic void message_to_indices(uint32_t *indices, const unsigned char *m)\n{\n    unsigned int i, j;\n    unsigned int offset = 0;\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        indices[i] = 0;\n        for (j = 0; j < SPX_FORS_HEIGHT; j++) {\n            indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 0x1) << j;\n            offset++;\n        }\n    }\n}\n\n/**\n * Signs a message m, deriving the secret key from sk_seed and the FTS address.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_sign(unsigned char *sig, unsigned char *pk,\n               const unsigned char *m,\n               const spx_ctx *ctx,\n               const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    uint32_t fors_tree_addr[8*8] = {0};\n    struct fors_gen_leaf_info fors_info = {0};\n    uint32_t *fors_leaf_addr = fors_info.leaf_addrx;\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    for (i=0; i<8; i++) {\n        copy_keypair_addr(fors_tree_addr + 8*i, fors_addr);\n        set_type(fors_tree_addr + 8*i, SPX_ADDR_TYPE_FORSTREE);\n        copy_keypair_addr(fors_leaf_addr + 8*i, fors_addr);\n    }\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Include the secret key part that produces the selected leaf node. */\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF);\n        fors_gen_sk(sig, ctx, fors_tree_addr);\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n        sig += SPX_N;\n\n        /* Compute the authentication path for this leaf node. */\n        treehashx8(roots + i*SPX_N, sig, ctx,\n                 indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx8,\n                 fors_tree_addr, &fors_info);\n\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n\n/**\n * Derives the FORS public key from a signature.\n * This can be used for verification by comparing to a known public key, or to\n * subsequently verify a signature on the derived public key. The latter is the\n * typical use-case when used as an FTS below an OTS in a hypertree.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *m,\n                      const spx_ctx *ctx,\n                      const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    unsigned char leaf[SPX_N];\n    uint32_t fors_tree_addr[8] = {0};\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    copy_keypair_addr(fors_tree_addr, fors_addr);\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n\n    set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Derive the leaf from the included secret key part. */\n        fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr);\n        sig += SPX_N;\n\n        /* Derive the corresponding root node of this tree. */\n        compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset,\n                     sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr);\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n"
  },
  {
    "path": "sha2-avx2/hash_sha2x8.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"utils.h\"\n#include \"params.h\"\n#include \"hashx8.h\"\n#include \"sha2.h\"\n#include \"sha256x8.h\"\n#include \"sha256avx.h\"\n\n/*\n * 8-way parallel version of prf_addr; takes 8x as much input and output\n */\nvoid prf_addrx8(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3,\n                unsigned char *out4,\n                unsigned char *out5,\n                unsigned char *out6,\n                unsigned char *out7,\n                const spx_ctx *ctx,\n                const uint32_t addrx8[8*8])\n{\n    unsigned char bufx8[8 * (SPX_N + SPX_SHA256_ADDR_BYTES)];\n    unsigned char outbufx8[8 * SPX_SHA256_OUTPUT_BYTES];\n    unsigned int j;\n\n    for (j = 0; j < 8; j++) {\n        memcpy(bufx8 + j*(SPX_N + SPX_SHA256_ADDR_BYTES),\n                         addrx8 + j*8, SPX_SHA256_ADDR_BYTES);\n        memcpy(\n            bufx8 + j*(SPX_N + SPX_SHA256_ADDR_BYTES) + SPX_SHA256_ADDR_BYTES,\n            ctx->sk_seed,\n            SPX_N\n        );\n    }\n\n    sha256x8_seeded(\n        /* out */\n        outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES,\n\n        /* seed */\n        ctx->state_seeded, 512,\n\n        /* in */\n        bufx8 + 0*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 1*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 2*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 3*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 4*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 5*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 6*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        bufx8 + 7*(SPX_SHA256_ADDR_BYTES + SPX_N),\n        SPX_SHA256_ADDR_BYTES + SPX_N /* len */\n    );\n\n    memcpy(out0, outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out1, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out2, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out3, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out4, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out5, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out6, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out7, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n}\n"
  },
  {
    "path": "sha2-avx2/hashx8.h",
    "content": "#ifndef SPX_HASHX8_H\n#define SPX_HASHX8_H\n\n#include <stdint.h>\n#include \"params.h\"\n\n#define prf_addrx8 SPX_NAMESPACE(prf_addrx8)\nvoid prf_addrx8(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3,\n                unsigned char *out4,\n                unsigned char *out5,\n                unsigned char *out6,\n                unsigned char *out7,\n                const spx_ctx *ctx,\n                const uint32_t addrx8[8*8]);\n\n#endif\n"
  },
  {
    "path": "sha2-avx2/merkle.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx8.h\"\n#include \"wots.h\"\n#include \"wotsx8.h\"\n#include \"wotsx8.h\"\n#include \"merkle.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n/*\n * This generates a Merkle signature (WOTS signature followed by the Merkle\n * authentication path).\n */ \nvoid merkle_sign(uint8_t *sig, unsigned char *root,\n                 const spx_ctx *ctx,\n                 uint32_t wots_addr[8], uint32_t tree_addr[8],\n                 uint32_t idx_leaf)\n{\n    unsigned char *auth_path = sig + SPX_WOTS_BYTES;\n    uint32_t tree_addrx8[8*8] = { 0 };\n    int j;\n    struct leaf_info_x8 info = { 0 };\n    unsigned steps[ SPX_WOTS_LEN ];\n\n    info.wots_sig = sig;\n    chain_lengths(steps, root);\n    info.wots_steps = steps;\n\n    for (j=0; j<8; j++) {\n        set_type(&tree_addrx8[8*j], SPX_ADDR_TYPE_HASHTREE);\n        set_type(&info.leaf_addr[8*j], SPX_ADDR_TYPE_WOTS);\n        set_type(&info.pk_addr[8*j], SPX_ADDR_TYPE_WOTSPK);\n        copy_subtree_addr(&tree_addrx8[8*j], tree_addr);\n        copy_subtree_addr(&info.leaf_addr[8*j], wots_addr);\n        copy_subtree_addr(&info.pk_addr[8*j], wots_addr);\n    }\n\n    info.wots_sign_leaf = idx_leaf;\n\n    treehashx8(root, auth_path, ctx,\n                idx_leaf, 0,\n                SPX_TREE_HEIGHT,\n                wots_gen_leafx8,\n                tree_addrx8, &info);\n}\n\n/* Compute root node of the top-most subtree. */\n/* Again, in this file because wots_gen_leaf is most of the work */\nvoid merkle_gen_root(unsigned char *root, const spx_ctx *ctx)\n{\n    /* We do not need the auth path in key generation, but it simplifies the\n       code to have just one treehash routine that computes both root and path\n       in one function. */\n    unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES];\n    uint32_t top_tree_addr[8] = {0};\n    uint32_t wots_addr[8] = {0};\n\n    set_layer_addr(top_tree_addr, SPX_D - 1);\n    set_layer_addr(wots_addr, SPX_D - 1);\n\n    merkle_sign(auth_path, root, ctx,\n                wots_addr, top_tree_addr,\n                ~0 /* ~0 means \"don't bother generating an auth path */ );\n}\n"
  },
  {
    "path": "sha2-avx2/sha256avx.c",
    "content": "#include <stdio.h>\n#include <string.h>\n#include <stdint.h>\n\n#include \"sha256avx.h\"\n\n// Transpose 8 vectors containing 32-bit values\nvoid transpose(u256 s[8]) {\n    u256 tmp0[8];\n    u256 tmp1[8];\n    tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]);\n    tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]);\n    tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]);\n    tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]);\n    tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]);\n    tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]);\n    tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]);\n    tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]);\n    tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]);\n    tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]);\n    tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]);\n    tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]);\n    tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]);\n    tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]);\n    tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]);\n    tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]);\n    s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20);\n    s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20);\n    s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20);\n    s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20);\n    s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31);\n    s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31);\n    s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31);\n    s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31);    \n}\n\nvoid sha256_init8x(sha256ctx *ctx) {\n    ctx->s[0] = _mm256_set_epi32(0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667,0x6a09e667);\n    ctx->s[1] = _mm256_set_epi32(0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85,0xbb67ae85);\n    ctx->s[2] = _mm256_set_epi32(0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372,0x3c6ef372);\n    ctx->s[3] = _mm256_set_epi32(0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a,0xa54ff53a);\n    ctx->s[4] = _mm256_set_epi32(0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f,0x510e527f);\n    ctx->s[5] = _mm256_set_epi32(0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c,0x9b05688c);\n    ctx->s[6] = _mm256_set_epi32(0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab,0x1f83d9ab);\n    ctx->s[7] = _mm256_set_epi32(0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19,0x5be0cd19);\n    \n    ctx->datalen = 0;\n    ctx->msglen = 0;\n}\n\nvoid sha256_final8x(sha256ctx *ctx,\n                    unsigned char *out0,\n                    unsigned char *out1,\n                    unsigned char *out2,\n                    unsigned char *out3,\n                    unsigned char *out4,\n                    unsigned char *out5,\n                    unsigned char *out6,\n                    unsigned char *out7) \n{\n    unsigned int i, curlen;\n\n    // Padding\n    if (ctx->datalen < 56) {\n        for (i = 0; i < 8; ++i) {\n            curlen = ctx->datalen;\n            ctx->msgblocks[64*i + curlen++] = 0x80;\n            while(curlen < 64) {\n                ctx->msgblocks[64*i + curlen++] = 0x00;\n            }\n        }\n    } else {\n        for (i = 0; i < 8; ++i) {\n            curlen = ctx->datalen;\n            ctx->msgblocks[64*i + curlen++] = 0x80;\n            while(curlen < 64) {\n                ctx->msgblocks[64*i + curlen++] = 0x00;\n            }\n        }\n        sha256_transform8x(ctx,\n            &ctx->msgblocks[64*0],\n            &ctx->msgblocks[64*1],\n            &ctx->msgblocks[64*2],\n            &ctx->msgblocks[64*3],\n            &ctx->msgblocks[64*4],\n            &ctx->msgblocks[64*5],\n            &ctx->msgblocks[64*6],\n            &ctx->msgblocks[64*7]\n        );\n        memset(ctx->msgblocks, 0, 8 * 64);\n    }\n\n    // Add length of the message to each block\n    ctx->msglen += ctx->datalen * 8;\n    for (i = 0; i < 8; i++) {\n        ctx->msgblocks[64*i + 63] = ctx->msglen;\n        ctx->msgblocks[64*i + 62] = ctx->msglen >> 8;\n        ctx->msgblocks[64*i + 61] = ctx->msglen >> 16;\n        ctx->msgblocks[64*i + 60] = ctx->msglen >> 24;\n        ctx->msgblocks[64*i + 59] = ctx->msglen >> 32;\n        ctx->msgblocks[64*i + 58] = ctx->msglen >> 40;\n        ctx->msgblocks[64*i + 57] = ctx->msglen >> 48;\n        ctx->msgblocks[64*i + 56] = ctx->msglen >> 56;\n    }\n    sha256_transform8x(ctx,\n        &ctx->msgblocks[64*0],\n        &ctx->msgblocks[64*1],\n        &ctx->msgblocks[64*2],\n        &ctx->msgblocks[64*3],\n        &ctx->msgblocks[64*4],\n        &ctx->msgblocks[64*5],\n        &ctx->msgblocks[64*6],\n        &ctx->msgblocks[64*7]\n    );\n\n    // Compute final hash output\n    transpose(ctx->s);\n\n    // Store Hash value\n    STORE(out0, BYTESWAP(ctx->s[0]));\n    STORE(out1, BYTESWAP(ctx->s[1]));\n    STORE(out2, BYTESWAP(ctx->s[2]));\n    STORE(out3, BYTESWAP(ctx->s[3]));\n    STORE(out4, BYTESWAP(ctx->s[4]));\n    STORE(out5, BYTESWAP(ctx->s[5]));\n    STORE(out6, BYTESWAP(ctx->s[6]));\n    STORE(out7, BYTESWAP(ctx->s[7]));\n}\n\nvoid sha256_transform8x(sha256ctx *ctx,\n        const unsigned char* data0,\n        const unsigned char* data1,\n        const unsigned char* data2,\n        const unsigned char* data3,\n        const unsigned char* data4,\n        const unsigned char* data5,\n        const unsigned char* data6,\n        const unsigned char* data7) {\n    u256 s[8], w[64], T0, T1;\n\n    // Load words and transform data correctly\n    w[0] = BYTESWAP(LOAD(data0));\n    w[0 + 8] = BYTESWAP(LOAD(data0 + 32));\n    w[1] = BYTESWAP(LOAD(data1));\n    w[1 + 8] = BYTESWAP(LOAD(data1 + 32));\n    w[2] = BYTESWAP(LOAD(data2));\n    w[2 + 8] = BYTESWAP(LOAD(data2 + 32));\n    w[3] = BYTESWAP(LOAD(data3));\n    w[3 + 8] = BYTESWAP(LOAD(data3 + 32));\n    w[4] = BYTESWAP(LOAD(data4));\n    w[4 + 8] = BYTESWAP(LOAD(data4 + 32));\n    w[5] = BYTESWAP(LOAD(data5));\n    w[5 + 8] = BYTESWAP(LOAD(data5 + 32));\n    w[6] = BYTESWAP(LOAD(data6));\n    w[6 + 8] = BYTESWAP(LOAD(data6 + 32));\n    w[7] = BYTESWAP(LOAD(data7));\n    w[7 + 8] = BYTESWAP(LOAD(data7 + 32));\n\n    transpose(w);\n    transpose(w + 8);\n\n    // Initial State\n    s[0] = ctx->s[0];\n    s[1] = ctx->s[1];\n    s[2] = ctx->s[2];\n    s[3] = ctx->s[3];\n    s[4] = ctx->s[4];\n    s[5] = ctx->s[5];\n    s[6] = ctx->s[6];\n    s[7] = ctx->s[7];\n\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]);    \n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]);\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]);\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]);\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]);\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]);\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]);\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]);\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]);\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]);\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]);\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]);\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]);\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]);\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]);\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]);   \n    w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1]));\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]);\n    w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2]));\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]);\n    w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3]));\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]);\n    w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4]));\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]);\n    w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5]));\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]);\n    w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6]));\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]);\n    w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7]));\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]);\n    w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8]));\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]);\n    w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9]));\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]);\n    w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10]));\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]);\n    w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11]));\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]);\n    w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12]));\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]);\n    w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13]));\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]);\n    w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14]));\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]);\n    w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15]));\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]);\n    w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16]));\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]);   \n    w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17]));\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]);\n    w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18]));\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]);\n    w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19]));\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]);\n    w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20]));\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]);\n    w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21]));\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]);\n    w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22]));\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]);\n    w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23]));\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]);\n    w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24]));\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]);\n    w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25]));\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]);\n    w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26]));\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]);\n    w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27]));\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]);\n    w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28]));\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]);\n    w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29]));\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]);\n    w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30]));\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]);\n    w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31]));\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]);\n    w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32]));\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]);\n    w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33]));\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]);\n    w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34]));\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]);\n    w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35]));\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]);\n    w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36]));\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]);\n    w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37]));\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]);\n    w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38]));\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]);\n    w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39]));\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]);\n    w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40]));\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]);\n    w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41]));\n    SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]);\n    w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42]));\n    SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); \n    w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43]));\n    SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]);   \n    w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44]));\n    SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]);\n    w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45]));\n    SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]);\n    w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46]));\n    SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]);\n    w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47]));\n    SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]);\n    w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48]));\n    SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]);\n\n    // Feed Forward\n    ctx->s[0] = ADD32(s[0], ctx->s[0]);\n    ctx->s[1] = ADD32(s[1], ctx->s[1]);\n    ctx->s[2] = ADD32(s[2], ctx->s[2]);\n    ctx->s[3] = ADD32(s[3], ctx->s[3]);\n    ctx->s[4] = ADD32(s[4], ctx->s[4]);\n    ctx->s[5] = ADD32(s[5], ctx->s[5]);\n    ctx->s[6] = ADD32(s[6], ctx->s[6]);\n    ctx->s[7] = ADD32(s[7], ctx->s[7]);\n}\n"
  },
  {
    "path": "sha2-avx2/sha256avx.h",
    "content": "#ifndef SHA256AVX_H\n#define SHA256AVX_H\n#include \"immintrin.h\"\n#include <stdint.h>\n\nstatic const unsigned int RC[] = {\n    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,\n    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,\n    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,\n    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,\n    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,\n    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,\n    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,\n    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,\n    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,\n    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,\n    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,\n    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,\n    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,\n    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,\n    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,\n    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2\n};\n\n#define u32 uint32_t\n#define u256 __m256i\n\n#define XOR _mm256_xor_si256\n#define OR _mm256_or_si256\n#define AND _mm256_and_si256\n#define ADD32 _mm256_add_epi32\n#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1))\n\n#define LOAD(src) _mm256_loadu_si256((__m256i *)(src))\n#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src)\n\n#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3))\n\n#define SHIFTR32(x, y) _mm256_srli_epi32(x, y)\n#define SHIFTL32(x, y) _mm256_slli_epi32(x, y)\n\n#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - y))\n#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - y))\n\n#define XOR3(a, b, c) XOR(XOR(a, b), c)\n\n#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c)\n#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d)\n#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e)\n\n#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c))\n#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c))\n\n#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25))\n#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22))\n\n#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10))\n#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3))\n\n#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \\\n    T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32(RC[rc]), w); \\\n    d = ADD32(d, T0); \\\n    T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \\\n    h = ADD32(T0, T1);\n\ntypedef struct SHA256state {\n    u256 s[8];\n    unsigned char msgblocks[8*64];\n    int datalen;\n    unsigned long long msglen;\n} sha256ctx;\n\n\nvoid transpose(u256 s[8]);\nvoid sha256_init8x(sha256ctx *ctx);\nvoid sha256_final8x(sha256ctx *ctx,\n                    unsigned char *out0,\n                    unsigned char *out1,\n                    unsigned char *out2,\n                    unsigned char *out3,\n                    unsigned char *out4,\n                    unsigned char *out5,\n                    unsigned char *out6,\n                    unsigned char *out7);\n\nvoid sha256_transform8x(sha256ctx *ctx,\n        const unsigned char *data0,\n        const unsigned char *data1,\n        const unsigned char *data2,\n        const unsigned char *data3,\n        const unsigned char *data4,\n        const unsigned char *data5,\n        const unsigned char *data6,\n        const unsigned char *data7);\n\n#endif\n"
  },
  {
    "path": "sha2-avx2/sha256x8.c",
    "content": "#include <string.h>\n\n#include \"sha256x8.h\"\n#include \"sha256avx.h\"\n#include \"utils.h\"\n\nstatic uint32_t load_bigendian_32(const uint8_t *x) {\n    return (uint32_t)(x[3]) | (((uint32_t)(x[2])) << 8) |\n           (((uint32_t)(x[1])) << 16) | (((uint32_t)(x[0])) << 24);\n}\n\n// Performs sha256x8 on an initialized (and perhaps seeded) state.\nstatic void _sha256x8(\n              sha256ctx *ctx,\n              unsigned char *out0,\n              unsigned char *out1,\n              unsigned char *out2,\n              unsigned char *out3,\n              unsigned char *out4,\n              unsigned char *out5,\n              unsigned char *out6,\n              unsigned char *out7,\n              const unsigned char *in0,\n              const unsigned char *in1,\n              const unsigned char *in2,\n              const unsigned char *in3,\n              const unsigned char *in4,\n              const unsigned char *in5,\n              const unsigned char *in6,\n              const unsigned char *in7, unsigned long long inlen) {\n    unsigned long long i = 0;\n    while(inlen - i >= 64) {\n        sha256_transform8x(ctx,\n            in0 + i,\n            in1 + i,\n            in2 + i,\n            in3 + i,\n            in4 + i,\n            in5 + i,\n            in6 + i,\n            in7 + i\n        );\n        i += 64;\n        ctx->msglen += 512;\n    }\n\n    int bytes_to_copy = inlen - i;\n    memcpy(&ctx->msgblocks[64*0], in0 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*1], in1 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*2], in2 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*3], in3 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*4], in4 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*5], in5 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*6], in6 + i, bytes_to_copy);\n    memcpy(&ctx->msgblocks[64*7], in7 + i, bytes_to_copy);\n    ctx->datalen = bytes_to_copy;\n\n    sha256_final8x(ctx, out0, out1, out2, out3, out4, out5, out6, out7);\n}\n\nvoid sha256x8_seeded(\n              unsigned char *out0,\n              unsigned char *out1,\n              unsigned char *out2,\n              unsigned char *out3,\n              unsigned char *out4,\n              unsigned char *out5,\n              unsigned char *out6,\n              unsigned char *out7,\n              const unsigned char *seed,\n              unsigned long long seedlen,\n              const unsigned char *in0,\n              const unsigned char *in1,\n              const unsigned char *in2,\n              const unsigned char *in3,\n              const unsigned char *in4,\n              const unsigned char *in5,\n              const unsigned char *in6,\n              const unsigned char *in7, unsigned long long inlen) {\n    uint32_t t;\n\n    sha256ctx ctx;\n\n    for (size_t i = 0; i < 8; i++) {\n        t = load_bigendian_32(seed + 4*i);\n        ctx.s[i] = _mm256_set_epi32(t, t, t, t, t, t, t, t);\n    }\n\n    ctx.datalen = 0;\n    ctx.msglen = seedlen;\n\n    _sha256x8(&ctx, out0, out1, out2, out3, out4, out5, out6, out7,\n            in0, in1, in2, in3, in4, in5, in6, in7, inlen);\n}\n\n/* This provides a wrapper around the internals of 8x parallel SHA256 */\nvoid sha256x8(unsigned char *out0,\n              unsigned char *out1,\n              unsigned char *out2,\n              unsigned char *out3,\n              unsigned char *out4,\n              unsigned char *out5,\n              unsigned char *out6,\n              unsigned char *out7,\n              const unsigned char *in0,\n              const unsigned char *in1,\n              const unsigned char *in2,\n              const unsigned char *in3,\n              const unsigned char *in4,\n              const unsigned char *in5,\n              const unsigned char *in6,\n              const unsigned char *in7, unsigned long long inlen)\n{\n    sha256ctx ctx;\n    sha256_init8x(&ctx);\n\n    _sha256x8(&ctx, out0, out1, out2, out3, out4, out5, out6, out7,\n            in0, in1, in2, in3, in4, in5, in6, in7, inlen);\n}\n\n/**\n * Note that inlen should be sufficiently small that it still allows for\n * an array to be allocated on the stack. Typically 'in' is merely a seed.\n * Outputs outlen number of bytes\n */\nvoid mgf1x8(unsigned char *outx8, unsigned long outlen,\n            const unsigned char *in0,\n            const unsigned char *in1,\n            const unsigned char *in2,\n            const unsigned char *in3,\n            const unsigned char *in4,\n            const unsigned char *in5,\n            const unsigned char *in6,\n            const unsigned char *in7,\n            unsigned long inlen)\n{\n    SPX_VLA(unsigned char, inbufx8, 8 * (inlen + 4));\n    unsigned char outbufx8[8*SPX_SHA256_OUTPUT_BYTES];\n    unsigned long i;\n    unsigned int j;\n\n    memcpy(inbufx8 + 0*(inlen + 4), in0, inlen);\n    memcpy(inbufx8 + 1*(inlen + 4), in1, inlen);\n    memcpy(inbufx8 + 2*(inlen + 4), in2, inlen);\n    memcpy(inbufx8 + 3*(inlen + 4), in3, inlen);\n    memcpy(inbufx8 + 4*(inlen + 4), in4, inlen);\n    memcpy(inbufx8 + 5*(inlen + 4), in5, inlen);\n    memcpy(inbufx8 + 6*(inlen + 4), in6, inlen);\n    memcpy(inbufx8 + 7*(inlen + 4), in7, inlen);\n\n    /* While we can fit in at least another full block of SHA256 output.. */\n    for (i = 0; (i+1)*SPX_SHA256_OUTPUT_BYTES <= outlen; i++) {\n        for (j = 0; j < 8; j++) {\n            u32_to_bytes(inbufx8 + inlen + j*(inlen + 4), i);\n        }\n\n        sha256x8(outx8 + 0*outlen,\n                 outx8 + 1*outlen,\n                 outx8 + 2*outlen,\n                 outx8 + 3*outlen,\n                 outx8 + 4*outlen,\n                 outx8 + 5*outlen,\n                 outx8 + 6*outlen,\n                 outx8 + 7*outlen,\n                 inbufx8 + 0*(inlen + 4),\n                 inbufx8 + 1*(inlen + 4),\n                 inbufx8 + 2*(inlen + 4),\n                 inbufx8 + 3*(inlen + 4),\n                 inbufx8 + 4*(inlen + 4),\n                 inbufx8 + 5*(inlen + 4),\n                 inbufx8 + 6*(inlen + 4),\n                 inbufx8 + 7*(inlen + 4), inlen + 4);\n        outx8 += SPX_SHA256_OUTPUT_BYTES;\n    }\n    /* Until we cannot anymore, and we fill the remainder. */\n    for (j = 0; j < 8; j++) {\n        u32_to_bytes(inbufx8 + inlen + j*(inlen + 4), i);\n    }\n    sha256x8(outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES,\n             outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES,\n             inbufx8 + 0*(inlen + 4),\n             inbufx8 + 1*(inlen + 4),\n             inbufx8 + 2*(inlen + 4),\n             inbufx8 + 3*(inlen + 4),\n             inbufx8 + 4*(inlen + 4),\n             inbufx8 + 5*(inlen + 4),\n             inbufx8 + 6*(inlen + 4),\n             inbufx8 + 7*(inlen + 4), inlen + 4);\n\n    for (j = 0; j < 8; j++) {\n        memcpy(outx8 + j*outlen,\n               outbufx8 + j*SPX_SHA256_OUTPUT_BYTES,\n               outlen - i*SPX_SHA256_OUTPUT_BYTES);\n    }\n}\n"
  },
  {
    "path": "sha2-avx2/sha256x8.h",
    "content": "#ifndef SPX_SHA256X8_H\n#define SPX_SHA256X8_H\n\n#include \"params.h\"\n\n#define SPX_SHA256_BLOCK_BYTES 64\n#define SPX_SHA256_OUTPUT_BYTES 32  /* This does not necessarily equal SPX_N */\n\n#if SPX_SHA256_OUTPUT_BYTES < SPX_N\n    #error Linking against SHA-256 with N larger than 32 bytes is not supported\n#endif\n\n#define sha256x8_seeded SPX_NAMESPACE(sha256x8_seeded)\nvoid sha256x8_seeded(\n              unsigned char *out0,\n              unsigned char *out1,\n              unsigned char *out2,\n              unsigned char *out3,\n              unsigned char *out4,\n              unsigned char *out5,\n              unsigned char *out6,\n              unsigned char *out7,\n              const unsigned char *seed,\n              unsigned long long seedlen,\n              const unsigned char *in0,\n              const unsigned char *in1,\n              const unsigned char *in2,\n              const unsigned char *in3,\n              const unsigned char *in4,\n              const unsigned char *in5,\n              const unsigned char *in6,\n              const unsigned char *in7, unsigned long long inlen);\n\n/* This provides a wrapper around the internals of 8x parallel SHA256 */\n#define sha256x8 SPX_NAMESPACE(sha256x8)\nvoid sha256x8(unsigned char *out0,\n              unsigned char *out1,\n              unsigned char *out2,\n              unsigned char *out3,\n              unsigned char *out4,\n              unsigned char *out5,\n              unsigned char *out6,\n              unsigned char *out7,\n              const unsigned char *in0,\n              const unsigned char *in1,\n              const unsigned char *in2,\n              const unsigned char *in3,\n              const unsigned char *in4,\n              const unsigned char *in5,\n              const unsigned char *in6,\n              const unsigned char *in7, unsigned long long inlen);\n\n/**\n * Note that inlen should be sufficiently small that it still allows for\n * an array to be allocated on the stack. Typically 'in' is merely a seed.\n * Outputs outlen number of bytes\n */\n#define mgf1x8 SPX_NAMESPACE(mgf1x8)\nvoid mgf1x8(unsigned char *outx8, unsigned long outlen,\n            const unsigned char *in0,\n            const unsigned char *in1,\n            const unsigned char *in2,\n            const unsigned char *in3,\n            const unsigned char *in4,\n            const unsigned char *in5,\n            const unsigned char *in6,\n            const unsigned char *in7,\n            unsigned long inlen);\n#endif\n"
  },
  {
    "path": "sha2-avx2/sha512x4.c",
    "content": "#include <stdio.h>\n#include <string.h>\n#include <stdint.h>\n\n#define SPX_SHA512_OUTPUT_BYTES 64  /* In sha256.h, but we don't want to */\n                                    /* pull in the entire thing */\n#include \"sha512x4.h\"\n#include \"utils.h\"\n\ntypedef uint64_t u64;\ntypedef __m256i u256;\n\nstatic void sha512_transform4x(\n    sha512ctx4x *ctx,\n    const unsigned char *d0,\n    const unsigned char *d1,\n    const unsigned char *d2,\n    const unsigned char *d3\n);\n\n#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0x8,0x9,0xa,0xb,0xc,0xd,0xe,0xf,0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xa,0xb,0xc,0xd,0xe,0xf,0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7))\n#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src)\n\n// Transpose 4 vectors containing 64-bit values\n// That is, it rearranges the array:\n//     A B C D\n//     E F G H\n//     I J K L\n//     M N O P\n// into\n//     A E I M\n//     B F J N\n//     C G K O\n//     D H L P\n// where each letter stands for 64 bits (and lsbits on the left)\nstatic void transpose(u256 s[4]) {\n    u256 tmp[4];\n    tmp[0] = _mm256_unpacklo_epi64(s[0], s[1]);\n    tmp[1] = _mm256_unpackhi_epi64(s[0], s[1]);\n    tmp[2] = _mm256_unpacklo_epi64(s[2], s[3]);\n    tmp[3] = _mm256_unpackhi_epi64(s[2], s[3]);\n    // tmp is in the order of\n    //   A E C G\n    //   B F D H\n    //   I M K O\n    //   J N L P\n    s[0] = _mm256_permute2x128_si256(tmp[0], tmp[2], 0x20);\n    s[1] = _mm256_permute2x128_si256(tmp[1], tmp[3], 0x20);\n    s[2] = _mm256_permute2x128_si256(tmp[0], tmp[2], 0x31);\n    s[3] = _mm256_permute2x128_si256(tmp[1], tmp[3], 0x31);\n}\n\n\nstatic void sha512_init4x(sha512ctx4x *ctx) {\n#define SET4(x) _mm256_set_epi64x(x, x, x, x)\n    ctx->s[0] = SET4(0x6a09e667f3bcc908ULL);\n    ctx->s[1] = SET4(0xbb67ae8584caa73bULL);\n    ctx->s[2] = SET4(0x3c6ef372fe94f82bULL);\n    ctx->s[3] = SET4(0xa54ff53a5f1d36f1ULL);\n    ctx->s[4] = SET4(0x510e527fade682d1ULL);\n    ctx->s[5] = SET4(0x9b05688c2b3e6c1fULL);\n    ctx->s[6] = SET4(0x1f83d9abfb41bd6bULL);\n    ctx->s[7] = SET4(0x5be0cd19137e2179ULL);\n#undef SET4\n    \n    ctx->datalen = 0;\n    ctx->msglen = 0;\n}\n\n#define XOR _mm256_xor_si256\n#define OR _mm256_or_si256\n#define AND _mm256_and_si256\n#define ADD64 _mm256_add_epi64\n\n#define LOAD(src) _mm256_loadu_si256((__m256i *)(src))\n\n#define SHIFTR64(x, y) _mm256_srli_epi64(x, y)\n#define SHIFTL64(x, y) _mm256_slli_epi64(x, y)\n\n#define ROTR64(x, y) OR(SHIFTR64(x, y), SHIFTL64(x, 64 - y))\n\nstatic u256 XOR3(u256 a, u256 b, u256 c) {\n    return XOR(XOR(a, b), c);\n}\n\n#define ADD3_64(a, b, c) ADD64(ADD64(a, b), c)\n#define ADD4_64(a, b, c, d) ADD64(ADD64(ADD64(a, b), c), d)\n#define ADD5_64(a, b, c, d, e) ADD64(ADD64(ADD64(ADD64(a, b), c), d), e)\n\nstatic u256 MAJ_AVX(u256 a, u256 b, u256 c) {\n    return XOR(c, AND(XOR(a, c), XOR(b, c)));\n}\nstatic u256 CH_AVX(u256 a, u256 b, u256 c) {\n    return XOR(c, AND(a, XOR(b, c)));\n}\nstatic u256 SIGMA0_AVX(u256 x) {\n    return XOR3(ROTR64(x, 28), ROTR64(x, 34), ROTR64(x, 39));\n}\nstatic u256 SIGMA1_AVX(u256 x) {\n    return XOR3(ROTR64(x, 14), ROTR64(x, 18), ROTR64(x, 41));\n}\nstatic u256 GAMMA0_AVX(u256 x) {\n    return XOR3(ROTR64(x, 1),  ROTR64(x, 8), SHIFTR64(x, 7));\n}\nstatic u256 GAMMA1_AVX(u256 x) {\n    return XOR3(ROTR64(x, 19), ROTR64(x, 61), SHIFTR64(x, 6));\n}\n\n#define SHA512ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \\\n    T0 = ADD5_64(h, w, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi64x(RC[rc])); \\\n    T1 = ADD64(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \\\n    d = ADD64(d, T0); \\\n    h = ADD64(T0, T1);\n\nstatic const unsigned long long RC[80] = {\n    0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, \n    0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,\n    0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, \n    0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,\n    0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, \n    0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,\n    0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, \n    0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,\n    0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, \n    0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,\n    0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, \n    0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,\n    0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, \n    0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,\n    0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, \n    0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,\n    0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, \n    0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,\n    0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, \n    0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,\n    0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,\n    0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,\n    0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, \n    0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,\n    0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, \n    0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,\n    0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, \n    0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,\n    0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, \n    0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,\n    0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, \n    0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,\n    0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, \n    0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,\n    0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, \n    0x113f9804bef90daeULL, 0x1b710b35131c471bULL,\n    0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, \n    0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,\n    0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, \n    0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,\n};\n\nstatic void sha512_transform4x(\n        sha512ctx4x *ctx,\n        const unsigned char *d0,\n        const unsigned char *d1,\n        const unsigned char *d2,\n        const unsigned char *d3) {\n    u256 s0, s1, s2, s3, s4, s5, s6, s7, w[16], T0, T1, nw;\n\n    // Load words and transform data correctly\n    w[0     ] = BYTESWAP(LOAD(d0     ));\n    w[0 +  4] = BYTESWAP(LOAD(d0 + 32));\n    w[0 +  8] = BYTESWAP(LOAD(d0 + 64));\n    w[0 + 12] = BYTESWAP(LOAD(d0 + 96));\n\n    w[1     ] = BYTESWAP(LOAD(d1     ));\n    w[1 +  4] = BYTESWAP(LOAD(d1 + 32));\n    w[1 +  8] = BYTESWAP(LOAD(d1 + 64));\n    w[1 + 12] = BYTESWAP(LOAD(d1 + 96));\n\n    w[2     ] = BYTESWAP(LOAD(d2     ));\n    w[2 +  4] = BYTESWAP(LOAD(d2 + 32));\n    w[2 +  8] = BYTESWAP(LOAD(d2 + 64));\n    w[2 + 12] = BYTESWAP(LOAD(d2 + 96));\n\n    w[3     ] = BYTESWAP(LOAD(d3     ));\n    w[3 +  4] = BYTESWAP(LOAD(d3 + 32));\n    w[3 +  8] = BYTESWAP(LOAD(d3 + 64));\n    w[3 + 12] = BYTESWAP(LOAD(d3 + 96));\n\n    transpose(w);\n    transpose(w + 4);\n    transpose(w + 8);\n    transpose(w + 12);\n\n    // Initial State\n    s0 = ctx->s[0];\n    s1 = ctx->s[1];\n    s2 = ctx->s[2];\n    s3 = ctx->s[3];\n    s4 = ctx->s[4];\n    s5 = ctx->s[5];\n    s6 = ctx->s[6];\n    s7 = ctx->s[7];\n\n    // The first 16 rounds (where the w inputs are directly from the data)\n    SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, 0, w[0]);\n    SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, 1, w[1]);\n    SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, 2, w[2]);\n    SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, 3, w[3]);\n    SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, 4, w[4]);\n    SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, 5, w[5]);\n    SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, 6, w[6]);\n    SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, 7, w[7]);\n    SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, 8, w[8]);\n    SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, 9, w[9]);\n    SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, 10, w[10]);\n    SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, 11, w[11]);\n    SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, 12, w[12]);\n    SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, 13, w[13]);\n    SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, 14, w[14]);\n    SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, 15, w[15]);\n\n#define M(i) (((i)+16) & 0xf)\n#define NextW(i) \\\n    w[M(i)] = ADD4_64(GAMMA1_AVX(w[M((i)-2)]), w[M((i)-7)], GAMMA0_AVX(w[M((i)-15)]), w[M((i)-16)]);\n\n    // The remaining 64 rounds (where the w inputs are a linear fix of the data)\n    for (unsigned i = 16; i<80; i+=16) {\n    nw = NextW(0);\n    SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, i+0, nw);\n    nw = NextW(1);\n    SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, i+1, nw);\n    nw = NextW(2);\n    SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, i+2, nw);\n    nw = NextW(3);\n    SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, i+3, nw);\n    nw = NextW(4);\n    SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, i+4, nw);\n    nw = NextW(5);\n    SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, i+5, nw);\n    nw = NextW(6);\n    SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, i+6, nw);\n    nw = NextW(7);\n    SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, i+7, nw);\n    nw = NextW(8);\n    SHA512ROUND_AVX(s0, s1, s2, s3, s4, s5, s6, s7, i+8, nw);\n    nw = NextW(9);\n    SHA512ROUND_AVX(s7, s0, s1, s2, s3, s4, s5, s6, i+9, nw);\n    nw = NextW(10);\n    SHA512ROUND_AVX(s6, s7, s0, s1, s2, s3, s4, s5, i+10, nw);\n    nw = NextW(11);\n    SHA512ROUND_AVX(s5, s6, s7, s0, s1, s2, s3, s4, i+11, nw);\n    nw = NextW(12);\n    SHA512ROUND_AVX(s4, s5, s6, s7, s0, s1, s2, s3, i+12, nw);\n    nw = NextW(13);\n    SHA512ROUND_AVX(s3, s4, s5, s6, s7, s0, s1, s2, i+13, nw);\n    nw = NextW(14);\n    SHA512ROUND_AVX(s2, s3, s4, s5, s6, s7, s0, s1, i+14, nw);\n    nw = NextW(15);\n    SHA512ROUND_AVX(s1, s2, s3, s4, s5, s6, s7, s0, i+15, nw);\n    }\n\n    // Feed Forward\n    ctx->s[0] = ADD64(s0, ctx->s[0]);\n    ctx->s[1] = ADD64(s1, ctx->s[1]);\n    ctx->s[2] = ADD64(s2, ctx->s[2]);\n    ctx->s[3] = ADD64(s3, ctx->s[3]);\n    ctx->s[4] = ADD64(s4, ctx->s[4]);\n    ctx->s[5] = ADD64(s5, ctx->s[5]);\n    ctx->s[6] = ADD64(s6, ctx->s[6]);\n    ctx->s[7] = ADD64(s7, ctx->s[7]);\n}\n\nstatic void _sha512x4(\n        sha512ctx4x* ctx,\n        unsigned char *out0,\n        unsigned char *out1,\n        unsigned char *out2,\n        unsigned char *out3,\n        const unsigned char *in0,\n        const unsigned char *in1,\n        const unsigned char *in2,\n        const unsigned char *in3,\n        unsigned long long inlen) {\n    unsigned int i = 0;\n\n    while(inlen - i >= 128) {\n        sha512_transform4x(\n            ctx,\n            in0 + i,\n            in1 + i,\n            in2 + i,\n            in3 + i\n        );\n        ctx->msglen += 1024;\n        i += 128;\n    }\n\n    ctx->datalen = inlen - i;\n    memcpy(&ctx->msgblocks[128*0], in0 + i, ctx->datalen);\n    memcpy(&ctx->msgblocks[128*1], in1 + i, ctx->datalen);\n    memcpy(&ctx->msgblocks[128*2], in2 + i, ctx->datalen);\n    memcpy(&ctx->msgblocks[128*3], in3 + i, ctx->datalen);\n\n    // Padding\n    unsigned long curlen;\n    if (ctx->datalen < 112) {\n        for (i = 0; i < 4; ++i) {\n            curlen = ctx->datalen;\n            ctx->msgblocks[128*i + curlen++] = 0x80;\n            while(curlen < 128) {\n                ctx->msgblocks[128*i + curlen++] = 0x00;\n            }\n        }\n    } else {\n        for (i = 0; i < 4; ++i) {\n            curlen = ctx->datalen;\n            ctx->msgblocks[128*i + curlen++] = 0x80;\n            while(curlen < 128) {\n                ctx->msgblocks[128*i + curlen++] = 0x00;\n            }\n        }\n        sha512_transform4x(\n            ctx,\n            ctx->msgblocks,\n            ctx->msgblocks + 128,\n            ctx->msgblocks + 256,\n            ctx->msgblocks + 384\n        );\n        memset(ctx->msgblocks, 0, 4 * 128);\n    }\n\n    // Add length of the message to each block\n    ctx->msglen += ctx->datalen * 8;\n    for (i = 0; i < 4; i++) {\n        ctx->msgblocks[128*i + 127] = ctx->msglen;\n        ctx->msgblocks[128*i + 126] = ctx->msglen >> 8;\n        ctx->msgblocks[128*i + 125] = ctx->msglen >> 16;\n        ctx->msgblocks[128*i + 124] = ctx->msglen >> 24;\n        ctx->msgblocks[128*i + 123] = ctx->msglen >> 32;\n        ctx->msgblocks[128*i + 122] = ctx->msglen >> 40;\n        ctx->msgblocks[128*i + 121] = ctx->msglen >> 48;\n        ctx->msgblocks[128*i + 120] = ctx->msglen >> 56;\n\tmemset( &ctx->msgblocks[128*i + 112], 0, 8 );\n    }\n    sha512_transform4x(\n        ctx,\n        ctx->msgblocks,\n        ctx->msgblocks + 128,\n        ctx->msgblocks + 256,\n        ctx->msgblocks + 384\n    );\n\n    // Compute final hash output\n    transpose(ctx->s);\n    transpose(ctx->s+4);\n\n    // Store Hash value\n    __m256i out[2];\n    STORE(out,   BYTESWAP(ctx->s[0]));\n    STORE(out+1, BYTESWAP(ctx->s[4]));\n    memcpy(out0, out, 64);\n\n    STORE(out,   BYTESWAP(ctx->s[1]));\n    STORE(out+1, BYTESWAP(ctx->s[5]));\n    memcpy(out1, out, 64);\n\n    STORE(out,   BYTESWAP(ctx->s[2]));\n    STORE(out+1, BYTESWAP(ctx->s[6]));\n    memcpy(out2, out, 64);\n\n    STORE(out,   BYTESWAP(ctx->s[3]));\n    STORE(out+1, BYTESWAP(ctx->s[7]));\n    memcpy(out3, out, 64);\n}\n\n\n/**\n * Note that inlen should be sufficiently small that it still allows for\n * an array to be allocated on the stack. Typically 'in' is merely a seed.\n * Outputs outlen number of bytes\n */\nvoid mgf1x4_512(unsigned char *outx4, unsigned long outlen,\n            const unsigned char *in0,\n            const unsigned char *in1,\n            const unsigned char *in2,\n            const unsigned char *in3,\n            unsigned long inlen)\n{\n    SPX_VLA(unsigned char, inbufx4, 4*(inlen + 4));\n    unsigned char outbuf[4*64];\n    unsigned long i;\n    unsigned int j;\n\n    memcpy(inbufx4 + 0*(inlen + 4), in0, inlen);\n    memcpy(inbufx4 + 1*(inlen + 4), in1, inlen);\n    memcpy(inbufx4 + 2*(inlen + 4), in2, inlen);\n    memcpy(inbufx4 + 3*(inlen + 4), in3, inlen);\n\n    /* While we can fit in at least another full block of SHA512 output.. */\n    unsigned long remaining = outlen;\n    for (i = 0; remaining > 0; i++) {\n        unsigned this_step = SPX_SHA512_OUTPUT_BYTES;\n        if (this_step > remaining) this_step = remaining;\n        remaining -= this_step;\n        for (j = 0; j < 4; j++) {\n            u32_to_bytes(inbufx4 + inlen + j*(inlen + 4), i);\n        }\n\n        sha512ctx4x ctx;\n        sha512_init4x(&ctx);\n        \n        _sha512x4(\n            &ctx,\n            outbuf + 0*64,\n            outbuf + 1*64,\n            outbuf + 2*64,\n            outbuf + 3*64,\n            inbufx4 + 0*(inlen + 4),\n            inbufx4 + 1*(inlen + 4),\n            inbufx4 + 2*(inlen + 4),\n            inbufx4 + 3*(inlen + 4),\n            inlen+4\n        );\n\n        memcpy(outx4 + 0*outlen, outbuf+0*64, this_step);\n        memcpy(outx4 + 1*outlen, outbuf+1*64, this_step);\n        memcpy(outx4 + 2*outlen, outbuf+2*64, this_step);\n        memcpy(outx4 + 3*outlen, outbuf+3*64, this_step);\n        outx4 += this_step;\n    }\n}\n\nvoid sha512x4_seeded(\n        unsigned char *out0,\n        unsigned char *out1,\n        unsigned char *out2,\n        unsigned char *out3,\n        const unsigned char *seed,\n        unsigned long long seedlen,\n        const unsigned char *in0,\n        const unsigned char *in1,\n        const unsigned char *in2,\n        const unsigned char *in3,\n        unsigned long long inlen) {\n    sha512ctx4x ctx;\n    unsigned long i;\n\n    for (i = 0; i < 8; i++) {\n        uint64_t t = (uint64_t)(seed[7]) | (((uint64_t)(seed[6])) << 8) |\n           (((uint64_t)(seed[5])) << 16) | (((uint64_t)(seed[4])) << 24) |\n           (((uint64_t)(seed[3])) << 32) | (((uint64_t)(seed[2])) << 40) |\n           (((uint64_t)(seed[1])) << 48) | (((uint64_t)(seed[0])) << 56);\n        ctx.s[i] = _mm256_set_epi64x(t, t, t, t);\n        seed += 8;\n    }\n\n    ctx.msglen = seedlen;\n    _sha512x4(\n        &ctx,\n        out0, out1, out2, out3,\n        in0, in1, in2, in3,\n        inlen\n    );\n}\n"
  },
  {
    "path": "sha2-avx2/sha512x4.h",
    "content": "#ifndef SHA512AVX_H\n#define SHA512AVX_H\n#include <stdint.h>\n#include \"immintrin.h\"\n\n#include \"params.h\"\n\ntypedef struct SHA512state4x {\n    __m256i s[8];\n    unsigned char msgblocks[4*128];\n    int datalen;\n    unsigned long long msglen;\n} sha512ctx4x;\n\n\n#define sha512x4_seeded SPX_NAMESPACE(sha512x4_seeded)\nvoid sha512x4_seeded(\n    unsigned char *out0,\n    unsigned char *out1,\n    unsigned char *out2,\n    unsigned char *out3,\n    const unsigned char *seed,\n    unsigned long long seedlen,\n    const unsigned char *in0,\n    const unsigned char *in1,\n    const unsigned char *in2,\n    const unsigned char *in3,\n    unsigned long long inlen);\n\n\n/**\n * Note that inlen should be sufficiently small that it still allows for\n * an array to be allocated on the stack. Typically 'in' is merely a seed.\n * Outputs outlen number of bytes\n */\n#define mgf1x4_512 SPX_NAMESPACE(mgf1x4_512)\nvoid mgf1x4_512(unsigned char *outx4, unsigned long outlen,\n            const unsigned char *in0,\n            const unsigned char *in1,\n            const unsigned char *in2,\n            const unsigned char *in3,\n            unsigned long inlen);\n\n#endif\n"
  },
  {
    "path": "sha2-avx2/test/benchmark.c",
    "content": "#define _POSIX_C_SOURCE 199309L\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n\n#include \"../api.h\"\n#include \"../fors.h\"\n#include \"../wots.h\"\n#include \"../wotsx8.h\"\n#include \"../params.h\"\n#include \"../randombytes.h\"\n\n#define SPX_MLEN 32\n#define NTESTS 10\n\nstatic void wots_gen_pkx8(unsigned char *pk, const spx_ctx *ctx,\n                 uint32_t addr[8]);\n\nstatic int cmp_llu(const void *a, const void*b)\n{\n  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;\n  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;\n  return 0;\n}\n\nstatic unsigned long long median(unsigned long long *l, size_t llen)\n{\n  qsort(l,llen,sizeof(unsigned long long),cmp_llu);\n\n  if(llen%2) return l[llen/2];\n  else return (l[llen/2-1]+l[llen/2])/2;\n}\n\nstatic void delta(unsigned long long *l, size_t llen)\n{\n    unsigned int i;\n    for(i = 0; i < llen - 1; i++) {\n        l[i] = l[i+1] - l[i];\n    }\n}\n\nstatic unsigned long long cpucycles(void)\n{\n  unsigned long long result;\n  __asm volatile(\".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax\"\n    : \"=a\" (result) ::  \"%rdx\");\n  return result;\n}\n\nstatic void printfcomma (unsigned long long n)\n{\n    if (n < 1000) {\n        printf(\"%llu\", n);\n        return;\n    }\n    printfcomma(n / 1000);\n    printf (\",%03llu\", n % 1000);\n}\n\nstatic void printfalignedcomma (unsigned long long n, int len)\n{\n    unsigned long long ncopy = n;\n    int i = 0;\n\n    while (ncopy > 9) {\n        len -= 1;\n        ncopy /= 10;\n        i += 1;  // to account for commas\n    }\n    i = i/3 - 1;  // to account for commas\n    for (; i < len; i++) {\n        printf(\" \");\n    }\n    printfcomma(n);\n}\n\nstatic void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul)\n{\n    unsigned long long med;\n\n    result /= NTESTS;\n    delta(l, NTESTS + 1);\n    med = median(l, llen);\n    printf(\"avg. %11.2lf us (%2.2lf sec); median \", result, result / 1e6);\n    printfalignedcomma(med, 12);\n    printf(\" cycles,  %5llux: \", mul);\n    printfalignedcomma(mul*med, 12);\n    printf(\" cycles\\n\");\n}\n\n#define MEASURE(TEXT, MUL, FNCALL)\\\n    printf(TEXT);\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\\\n    for(i = 0; i < NTESTS; i++) {\\\n        t[i] = cpucycles();\\\n        FNCALL;\\\n    }\\\n    t[NTESTS] = cpucycles();\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\\\n    result = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) / 1e3;\\\n    display_result(result, t, NTESTS, MUL);\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    spx_ctx ctx;\n    unsigned char pk[SPX_PK_BYTES];\n    unsigned char sk[SPX_SK_BYTES];\n    unsigned char *m = malloc(SPX_MLEN);\n    unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN);\n\n    unsigned char fors_pk[SPX_FORS_PK_BYTES];\n    unsigned char fors_m[SPX_FORS_MSG_BYTES];\n    unsigned char fors_sig[SPX_FORS_BYTES];\n    unsigned char addr[SPX_ADDR_BYTES];\n    unsigned char wots_pk[8*SPX_WOTS_PK_BYTES];\n\n    unsigned long long smlen;\n    unsigned long long mlen;\n    unsigned long long t[NTESTS+1];\n    struct timespec start, stop;\n    double result;\n    int i;\n\n    randombytes(m, SPX_MLEN);\n    randombytes(addr, SPX_ADDR_BYTES);\n\n    printf(\"Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\\n\",\n           SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES,\n           SPX_WOTS_W);\n\n    printf(\"Running %d iterations.\\n\", NTESTS);\n\n    MEASURE(\"Generating keypair.. \", 1, crypto_sign_keypair(pk, sk));\n    MEASURE(\"  - WOTS pk gen 8x.. \", (1 << SPX_TREE_HEIGHT) / 8, wots_gen_pkx8(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Signing..            \", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk));\n    MEASURE(\"  - FORS signing..   \", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr));\n    MEASURE(\"  - WOTS pk gen x8.. \", SPX_D * (1 << SPX_TREE_HEIGHT) / 8, wots_gen_pkx8(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Verifying..          \", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk));\n\n    printf(\"Signature size: %d (%.2f KiB)\\n\", SPX_BYTES, SPX_BYTES / 1024.0);\n    printf(\"Public key size: %d (%.2f KiB)\\n\", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0);\n    printf(\"Secret key size: %d (%.2f KiB)\\n\", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0);\n\n    free(m);\n    free(sm);\n    free(mout);\n\n    return 0;\n}\n\nstatic void wots_gen_pkx8(unsigned char *pk, const spx_ctx *ctx,\n        uint32_t addr[8]) {\n    struct leaf_info_x8 leaf;\n    unsigned steps[ SPX_WOTS_LEN ] = { 0 };\n    INITIALIZE_LEAF_INFO_X8(leaf, addr, steps);\n    wots_gen_leafx8(pk, ctx, 0, &leaf);\n}\n"
  },
  {
    "path": "sha2-avx2/test/thashx8.c",
    "content": "#include <stdio.h>\n#include <string.h>\n\n#include \"../thashx8.h\"\n#include \"../thash.h\"\n#include \"../randombytes.h\"\n#include \"../params.h\"\n#include \"../hash.h\"\n\n#if SPX_SHA512\n#include \"../sha2.h\"\n#include \"../sha512x4.h\"\n#endif\n\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    unsigned char input[16*SPX_N];\n    spx_ctx ctx;\n    unsigned char output[8*SPX_N];\n    unsigned char out8[8*SPX_N];\n    uint32_t addr[8*8] = {0};\n    unsigned int j;\n\n    randombytes(ctx.pub_seed, SPX_N);\n    randombytes(input, 16*SPX_N);\n    randombytes((unsigned char *)addr, 8 * 8 * sizeof(uint32_t));\n\n    initialize_hash_function(&ctx);\n\n    printf(\"Testing if thash matches thashx8 on one block ... \");\n\n    for (j = 0; j < 8; j++) {\n        thash(out8 + j * SPX_N, input + j * SPX_N, 1, &ctx, addr + j*8);\n    }\n\n    thashx8(output + 0*SPX_N,\n            output + 1*SPX_N,\n            output + 2*SPX_N,\n            output + 3*SPX_N,\n            output + 4*SPX_N,\n            output + 5*SPX_N,\n            output + 6*SPX_N,\n            output + 7*SPX_N,\n            input + 0*SPX_N,\n            input + 1*SPX_N,\n            input + 2*SPX_N,\n            input + 3*SPX_N,\n            input + 4*SPX_N,\n            input + 5*SPX_N,\n            input + 6*SPX_N,\n            input + 7*SPX_N,\n            1, &ctx, addr);\n\n    if (memcmp(out8, output, 8 * SPX_N)) {\n        printf(\"failed!\\n\");\n        return -1;\n    }\n    printf(\"successful.\\n\");\n\n    printf(\"Testing if thash matches thashx8 on two blocks ... \");\n\n    for (j = 0; j < 8; j++) {\n        thash(out8 + j * SPX_N, input + (2*j) * SPX_N, 2, &ctx, addr + j*8);\n    }\n\n    thashx8(output + 0*SPX_N,\n            output + 1*SPX_N,\n            output + 2*SPX_N,\n            output + 3*SPX_N,\n            output + 4*SPX_N,\n            output + 5*SPX_N,\n            output + 6*SPX_N,\n            output + 7*SPX_N,\n            input + 0*SPX_N,\n            input + 2*SPX_N,\n            input + 4*SPX_N,\n            input + 6*SPX_N,\n            input + 8*SPX_N,\n            input + 10*SPX_N,\n            input + 12*SPX_N,\n            input + 14*SPX_N,\n            2, &ctx, addr);\n\n    if (memcmp(out8, output, 8 * SPX_N)) {\n        printf(\"failed!\\n\");\n        return -1;\n    }\n    printf(\"successful.\\n\");\n    return 0;\n}\n"
  },
  {
    "path": "sha2-avx2/thash_sha2_robustx8.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"utils.h\"\n#include \"params.h\"\n#include \"thashx8.h\"\n#include \"sha2.h\"\n#include \"sha256x8.h\"\n#include \"sha256avx.h\"\n\n#if SPX_SHA512\n#include \"sha512x4.h\"\n\nstatic void thashx8_512(\n    unsigned char *out0,\n    unsigned char *out1,\n    unsigned char *out2,\n    unsigned char *out3,\n    unsigned char *out4,\n    unsigned char *out5,\n    unsigned char *out6,\n    unsigned char *out7,\n    const unsigned char *in0,\n    const unsigned char *in1,\n    const unsigned char *in2,\n    const unsigned char *in3,\n    const unsigned char *in4,\n    const unsigned char *in5,\n    const unsigned char *in6,\n    const unsigned char *in7,\n    unsigned int inblocks,\n    const spx_ctx *ctx,\n    uint32_t addrx8[8*8]\n);\n#endif\n\n/**\n * 8-way parallel version of thash; takes 8x as much input and output\n */\nvoid thashx8(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             unsigned char *out4,\n             unsigned char *out5,\n             unsigned char *out6,\n             unsigned char *out7,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3,\n             const unsigned char *in4,\n             const unsigned char *in5,\n             const unsigned char *in6,\n             const unsigned char *in7, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx8[8*8])\n{\n#if SPX_SHA512\n    if (inblocks > 1) {\n        thashx8_512(\n             out0, out1, out2, out3, out4, out5, out6, out7,\n             in0, in1, in2, in3, in4, in5, in6, in7,\n        inblocks, ctx, addrx8);\n        return;\n    }\n#endif\n    SPX_VLA(unsigned char, bufx8, 8 * (SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N));\n    SPX_VLA(unsigned char, outbufx8, 8 * SPX_SHA256_OUTPUT_BYTES);\n    SPX_VLA(unsigned char, bitmaskx8, 8 * (inblocks * SPX_N));\n    unsigned int i;\n\n    for (i = 0; i < 8; i++) {\n        memcpy(bufx8 + i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n               ctx->pub_seed, SPX_N);\n        memcpy(bufx8 + SPX_N +\n                         i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n                         addrx8 + i*8, SPX_SHA256_ADDR_BYTES);\n    }\n\n    mgf1x8(bitmaskx8, inblocks * SPX_N,\n           bufx8 + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           SPX_N + SPX_SHA256_ADDR_BYTES);\n\n    for (i = 0; i < inblocks * SPX_N; i++) {\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in0[i] ^ bitmaskx8[i + 0*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in1[i] ^ bitmaskx8[i + 1*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in2[i] ^ bitmaskx8[i + 2*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in3[i] ^ bitmaskx8[i + 3*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in4[i] ^ bitmaskx8[i + 4*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in5[i] ^ bitmaskx8[i + 5*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in6[i] ^ bitmaskx8[i + 6*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in7[i] ^ bitmaskx8[i + 7*(inblocks * SPX_N)];\n    }\n\n    sha256x8_seeded(\n        /* out */\n        outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES,\n\n        /* seed */\n        ctx->state_seeded, 512,\n\n        /* in */\n        bufx8 + SPX_N + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */\n    );\n\n    memcpy(out0, outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out1, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out2, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out3, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out4, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out5, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out6, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out7, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n}\n\n#if SPX_SHA512\n/**\n * 2x4-way parallel version of thash; this is for the uses of thash that are\n * based on SHA-512\n */\nstatic void thashx8_512(\n    unsigned char *out0,\n    unsigned char *out1,\n    unsigned char *out2,\n    unsigned char *out3,\n    unsigned char *out4,\n    unsigned char *out5,\n    unsigned char *out6,\n    unsigned char *out7,\n    const unsigned char *in0,\n    const unsigned char *in1,\n    const unsigned char *in2,\n    const unsigned char *in3,\n    const unsigned char *in4,\n    const unsigned char *in5,\n    const unsigned char *in6,\n    const unsigned char *in7,\n    unsigned int inblocks,\n    const spx_ctx *ctx,\n    uint32_t addrx8[8*8])\n{\n    SPX_VLA(unsigned char, bufx8, 8 * (SPX_N + SPX_SHA256_ADDR_BYTES + inblocks * SPX_N));\n    SPX_VLA(unsigned char, outbuf, 4 * SPX_SHA512_OUTPUT_BYTES);\n    SPX_VLA(unsigned char, bitmaskx4, 4 * (inblocks * SPX_N));\n    unsigned int i;\n\n    for (i = 0; i < 8; i++) {\n        memcpy(bufx8 + i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n               ctx->pub_seed, SPX_N);\n        memcpy(bufx8 + SPX_N +\n                         i*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n                         addrx8 + i*8, SPX_SHA256_ADDR_BYTES);\n    }\n\n    mgf1x4_512(bitmaskx4, inblocks * SPX_N,\n           bufx8 + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           SPX_N + SPX_SHA256_ADDR_BYTES);\n\n    for (i = 0; i < inblocks * SPX_N; i++) {\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in0[i] ^ bitmaskx4[i + 0*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in1[i] ^ bitmaskx4[i + 1*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in2[i] ^ bitmaskx4[i + 2*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in3[i] ^ bitmaskx4[i + 3*(inblocks * SPX_N)];\n    }\n\n    mgf1x4_512(bitmaskx4, inblocks * SPX_N,\n           bufx8 + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           bufx8 + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n           SPX_N + SPX_SHA256_ADDR_BYTES);\n\n    for (i = 0; i < inblocks * SPX_N; i++) {\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in4[i] ^ bitmaskx4[i + 0*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in5[i] ^ bitmaskx4[i + 1*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in6[i] ^ bitmaskx4[i + 2*(inblocks * SPX_N)];\n        bufx8[SPX_N + SPX_SHA256_ADDR_BYTES + i +\n                7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)] =\n            in7[i] ^ bitmaskx4[i + 3*(inblocks * SPX_N)];\n    }\n\n    sha512x4_seeded(\n        outbuf + 0*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 1*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 2*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 3*SPX_SHA512_OUTPUT_BYTES,\n        ctx->state_seeded_512, /* seed */\n        1024,                  /* seed length */\n        bufx8 + SPX_N + 0*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 1*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 2*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 3*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */\n    );\n\n    memcpy(out0, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out1, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out2, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out3, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n\n    sha512x4_seeded(\n        outbuf + 0*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 1*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 2*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 3*SPX_SHA512_OUTPUT_BYTES,\n        ctx->state_seeded_512, /* seed */\n        1024,                  /* seed length */\n        bufx8 + SPX_N + 4*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 5*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 6*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + SPX_N + 7*(SPX_N + SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */\n    );\n\n    memcpy(out4, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out5, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out6, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out7, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n}\n#endif\n"
  },
  {
    "path": "sha2-avx2/thash_sha2_simplex8.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"utils.h\"\n#include \"params.h\"\n#include \"thashx8.h\"\n#include \"sha2.h\"\n#include \"sha256x8.h\"\n#include \"sha256avx.h\"\n\n#if SPX_SHA512\n#include \"sha512x4.h\"\n\nstatic void thashx8_512(\n    unsigned char *out0,\n    unsigned char *out1,\n    unsigned char *out2,\n    unsigned char *out3,\n    unsigned char *out4,\n    unsigned char *out5,\n    unsigned char *out6,\n    unsigned char *out7,\n    const unsigned char *in0,\n    const unsigned char *in1,\n    const unsigned char *in2,\n    const unsigned char *in3,\n    const unsigned char *in4,\n    const unsigned char *in5,\n    const unsigned char *in6,\n    const unsigned char *in7,\n    unsigned int inblocks,\n    const spx_ctx *ctx,\n    uint32_t addrx8[8*8]\n);\n#endif\n\n/**\n * 8-way parallel version of thash; takes 8x as much input and output\n */\nvoid thashx8(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             unsigned char *out4,\n             unsigned char *out5,\n             unsigned char *out6,\n             unsigned char *out7,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3,\n             const unsigned char *in4,\n             const unsigned char *in5,\n             const unsigned char *in6,\n             const unsigned char *in7, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx8[8*8])\n{\n#if SPX_SHA512\n    if (inblocks > 1) {\n        thashx8_512(\n             out0, out1, out2, out3, out4, out5, out6, out7,\n             in0, in1, in2, in3, in4, in5, in6, in7,\n        inblocks, ctx, addrx8);\n        return;\n    }\n#endif\n    unsigned char bufx8[8*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)];\n    unsigned char outbufx8[8*SPX_SHA256_OUTPUT_BYTES];\n    unsigned int i;\n\n    for (i = 0; i < 8; i++) {\n        memcpy(bufx8 + i*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n                         addrx8 + i*8, SPX_SHA256_ADDR_BYTES);\n    }\n\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in0, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in1, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in2, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in3, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in4, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in5, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in6, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in7, inblocks * SPX_N);\n\n    sha256x8_seeded(\n        /* out */\n        outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES,\n        outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES,\n\n        /* seed */\n        ctx->state_seeded, 512,\n\n        /* in */\n        bufx8 + 0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */\n    );\n\n    memcpy(out0, outbufx8 + 0*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out1, outbufx8 + 1*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out2, outbufx8 + 2*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out3, outbufx8 + 3*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out4, outbufx8 + 4*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out5, outbufx8 + 5*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out6, outbufx8 + 6*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n    memcpy(out7, outbufx8 + 7*SPX_SHA256_OUTPUT_BYTES, SPX_N);\n}\n\n#if SPX_SHA512\n/**\n * 2x4-way parallel version of thash; this is for the uses of thash that are\n * based on SHA-512\n */\nstatic void thashx8_512(\n    unsigned char *out0,\n    unsigned char *out1,\n    unsigned char *out2,\n    unsigned char *out3,\n    unsigned char *out4,\n    unsigned char *out5,\n    unsigned char *out6,\n    unsigned char *out7,\n    const unsigned char *in0,\n    const unsigned char *in1,\n    const unsigned char *in2,\n    const unsigned char *in3,\n    const unsigned char *in4,\n    const unsigned char *in5,\n    const unsigned char *in6,\n    const unsigned char *in7,\n    unsigned int inblocks,\n    const spx_ctx *ctx,\n    uint32_t addrx8[8*8])\n{\n    unsigned char bufx8[8*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N)];\n    unsigned char outbuf[4*SPX_SHA512_OUTPUT_BYTES];\n    unsigned int i;\n\n    for (i = 0; i < 8; i++) {\n        memcpy(bufx8 + i*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n                         addrx8 + i*8, SPX_SHA256_ADDR_BYTES);\n    }\n\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in0, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in1, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in2, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in3, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in4, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in5, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in6, inblocks * SPX_N);\n    memcpy(bufx8 + SPX_SHA256_ADDR_BYTES +\n        7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), in7, inblocks * SPX_N);\n\n    sha512x4_seeded(\n        outbuf + 0*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 1*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 2*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 3*SPX_SHA512_OUTPUT_BYTES,\n        ctx->state_seeded_512, /* seed */\n        1024,                  /* seed length */\n        bufx8 + 0*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), /* in */\n        bufx8 + 1*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 2*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 3*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */\n    );\n\n    memcpy(out0, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out1, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out2, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out3, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n\n    sha512x4_seeded(\n        outbuf + 0*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 1*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 2*SPX_SHA512_OUTPUT_BYTES,\n        outbuf + 3*SPX_SHA512_OUTPUT_BYTES,\n        ctx->state_seeded_512, /* seed */\n        1024,                  /* seed length */\n        bufx8 + 4*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N), /* in */\n        bufx8 + 5*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 6*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        bufx8 + 7*(SPX_SHA256_ADDR_BYTES + inblocks*SPX_N),\n        SPX_SHA256_ADDR_BYTES + inblocks*SPX_N /* len */\n    );\n\n    memcpy(out4, outbuf + 0*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out5, outbuf + 1*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out6, outbuf + 2*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n    memcpy(out7, outbuf + 3*SPX_SHA512_OUTPUT_BYTES, SPX_N);\n}\n#endif\n"
  },
  {
    "path": "sha2-avx2/thashx8.h",
    "content": "#ifndef SPX_THASHX8_H\n#define SPX_THASHX8_H\n\n#include <stdint.h>\n#include \"context.h\"\n#include \"params.h\"\n\n#define thashx8 SPX_NAMESPACE(thashx8)\nvoid thashx8(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             unsigned char *out4,\n             unsigned char *out5,\n             unsigned char *out6,\n             unsigned char *out7,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3,\n             const unsigned char *in4,\n             const unsigned char *in5,\n             const unsigned char *in6,\n             const unsigned char *in7, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx8[8*8]);\n\n#endif\n"
  },
  {
    "path": "sha2-avx2/utilsx8.c",
    "content": "#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx8.h\"\n#include \"params.h\"\n#include \"thashx8.h\"\n#include \"address.h\"\n\n/*\n * Generate the entire Merkle tree, computing the authentication path for leaf_idx,\n * and the resulting root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE)\n *\n * This expects tree_addrx8 to be initialized to 8 parallel addr structures for\n * the Merkle tree nodes\n *\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This works by using the standard Merkle tree building algorithm, except\n * that each 'node' tracked is actually 8 consecutive nodes in the real tree.\n * When we combine two logical nodes ABCDEFGH and STUVWXYZ, we perform the H\n * operation on adjacent real nodes, forming the parent logical node\n * (AB)(CD)(EF)(GH)(ST)(UV)(WX)(YZ)\n *\n * When we get to the top three levels of the real tree (where there is only\n * one logical node), we continue this operation three more times; the right\n * most real node will by the actual root (and the other 7 nodes will be\n * garbage).  We follow the same thashx8 logic so that the 'extract\n * authentication path components' part of the loop is still executed (and\n * to simplify the code somewhat)\n *\n * This currently assumes tree_height >= 3; I suspect that doing an adjusting\n * idx, addr_idx on the gen_leafx8 call if tree_height < 3 would fix it; since\n * we don't actually use such short trees, I haven't bothered\n */\nvoid treehashx8(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx *ctx,\n                uint32_t leaf_idx, uint32_t idx_offset,\n                uint32_t tree_height,\n                void (*gen_leafx8)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx*,\n                   uint32_t idx, void *info),\n                uint32_t tree_addrx8[8*8],\n                void *info)\n{\n    /* This is where we keep the intermediate nodes */\n    SPX_VLA(unsigned char, stackx8, 8 * tree_height * SPX_N);\n    uint32_t left_adj = 0, prev_left_adj = 0; /* When we're doing the top 3 */\n        /* levels, the left-most part of the tree isn't at the beginning */\n        /* of current[].  These give the offset of the actual start */\n\n    uint32_t idx;\n    uint32_t max_idx = (1 << (tree_height-3)) - 1;\n    for (idx = 0;; idx++) {\n        unsigned char current[8*SPX_N];   /* Current logical node */\n        gen_leafx8( current, ctx, 8*idx + idx_offset,\n                    info );\n\n        /* Now combine the freshly generated right node with previously */\n        /* generated left ones */\n        uint32_t internal_idx_offset = idx_offset;\n        uint32_t internal_idx = idx;\n        uint32_t internal_leaf = leaf_idx;\n        uint32_t h;     /* The height we are in the Merkle tree */\n        for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) {\n\n            /* Special processing if we're at the top of the tree */\n            if (h >= tree_height - 3) {\n                if (h == tree_height) {\n                    /* We hit the root; return it */\n                    memcpy( root, &current[7*SPX_N], SPX_N );\n                    return;\n                }\n                /* The tree indexing logic is a bit off in this case */\n                /* Adjust it so that the left-most node of the part of */\n                /* the tree that we're processing has index 0 */\n                prev_left_adj = left_adj;\n                left_adj = 8 - (1 << (tree_height - h - 1));\n            }\n\n            /* Check if we hit the top of the tree */\n            if (h == tree_height) {\n                /* We hit the root; return it */\n                memcpy( root, &current[7*SPX_N], SPX_N );\n                return;\n            }\n            \n            /*\n             * Check if one of the nodes we have is a part of the\n             * authentication path; if it is, write it out\n             */\n            if ((((internal_idx << 3) ^ internal_leaf) & ~0x7) == 0) {\n                memcpy( &auth_path[ h * SPX_N ],\n                        &current[(((internal_leaf&7)^1) + prev_left_adj) * SPX_N],\n                        SPX_N );\n            }\n\n            /*\n             * Check if we're at a left child; if so, stop going up the stack\n             * Exception: if we've reached the end of the tree, keep on going\n             * (so we combine the last 8 nodes into the one root node in three\n             * more iterations)\n             */\n            if ((internal_idx & 1) == 0 && idx < max_idx) {\n                break;\n            }\n\n            /* Ok, we're at a right node (or doing the top 3 levels) */\n            /* Now combine the left and right logical nodes together */\n\n            /* Set the address of the node we're creating. */\n            int j;\n            internal_idx_offset >>= 1;\n            for (j = 0; j < 8; j++) {\n                set_tree_height(tree_addrx8 + j*8, h + 1);\n                set_tree_index(tree_addrx8 + j*8,\n                     (8/2) * (internal_idx&~1) + j - left_adj + internal_idx_offset );\n            }\n            unsigned char *left = &stackx8[h * 8 * SPX_N];\n            thashx8( &current[0 * SPX_N],\n                     &current[1 * SPX_N],\n                     &current[2 * SPX_N],\n                     &current[3 * SPX_N],\n                     &current[4 * SPX_N],\n                     &current[5 * SPX_N],\n                     &current[6 * SPX_N],\n                     &current[7 * SPX_N],\n                     &left   [0 * SPX_N],\n                     &left   [2 * SPX_N],\n                     &left   [4 * SPX_N],\n                     &left   [6 * SPX_N],\n                     &current[0 * SPX_N],\n                     &current[2 * SPX_N],\n                     &current[4 * SPX_N],\n                     &current[6 * SPX_N],\n                     2, ctx, tree_addrx8);\n        }\n\n        /* We've hit a left child; save the current for when we get the */\n        /* corresponding right right */\n        memcpy( &stackx8[h * 8 * SPX_N], current, 8 * SPX_N);\n    }\n}\n"
  },
  {
    "path": "sha2-avx2/utilsx8.h",
    "content": "#ifndef SPX_UTILSX8_H\n#define SPX_UTILSX8_H\n\n#include <stdint.h>\n#include \"params.h\"\n\n/**\n * For a given leaf index, computes the authentication path and the resulting\n * root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE).\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This implementation uses AVX to compute internal nodes 8 at a time (in\n * parallel)\n */\n#define treehashx8 SPX_NAMESPACE(treehashx8)\nvoid treehashx8(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx *ctx,\n                uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height,\n                void (*gen_leafx8)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx* /* ctx */,\n                   uint32_t addr_idx, void *info),\n                uint32_t tree_addrx8[8*8], void *info);\n\n#endif\n"
  },
  {
    "path": "sha2-avx2/wots.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx8.h\"\n#include \"hash.h\"\n#include \"hashx8.h\"\n#include \"thash.h\"\n#include \"thashx8.h\"\n#include \"wots.h\"\n#include \"wotsx8.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n// TODO clarify address expectations, and make them more uniform.\n// TODO i.e. do we expect types to be set already?\n// TODO and do we expect modifications or copies?\n\n/**\n * Computes up the chains\n */\nstatic void gen_chains(\n        unsigned char *out,\n        const unsigned char *in,\n        unsigned int start[SPX_WOTS_LEN],\n        unsigned int steps[SPX_WOTS_LEN],\n        const spx_ctx *ctx,\n        uint32_t addr[8])\n{\n    uint32_t i, j, k, idx, watching;\n    int done;\n    unsigned char empty[SPX_N];\n    unsigned char *bufs[8];\n    uint32_t addrs[8*8];\n\n    int l;\n    uint16_t counts[SPX_WOTS_W] = { 0 };\n    uint16_t idxs[SPX_WOTS_LEN];\n    uint16_t total, newTotal;\n\n    /* set addrs = {addr, addr, ..., addr} */\n    for (j = 0; j < 8; j++) {\n        memcpy(addrs+j*8, addr, sizeof(uint32_t) * 8);\n    }\n\n    /* Initialize out with the value at position 'start'. */\n    memcpy(out, in, SPX_WOTS_LEN*SPX_N);\n\n    /* Sort the chains in reverse order by steps using counting sort. */\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        counts[steps[i]]++;\n    }\n    total = 0;\n    for (l = SPX_WOTS_W - 1; l >= 0; l--) {\n        newTotal = counts[l] + total;\n        counts[l] = total;\n        total = newTotal;\n    }\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        idxs[counts[steps[i]]] = i;\n        counts[steps[i]]++;\n    }\n\n    /* We got our work cut out for us: do it! */\n    for (i = 0; i < SPX_WOTS_LEN; i += 8) {\n        for (j = 0; j < 8 && i+j < SPX_WOTS_LEN; j++) {\n            idx = idxs[i+j];\n            set_chain_addr(addrs+j*8, idx);\n            bufs[j] = out + SPX_N * idx;\n        }\n\n        /* As the chains are sorted in reverse order, we know that the first\n         * chain is the longest and the last one is the shortest.  We keep\n         * an eye on whether the last chain is done and then on the one before,\n         * et cetera. */\n        watching = 7;\n        done = 0;\n        while (i + watching >= SPX_WOTS_LEN) {\n            bufs[watching] = &empty[0];\n            watching--;\n        }\n\n        for (k = 0;; k++) {\n            while (k == steps[idxs[i+watching]]) {\n                bufs[watching] = &empty[0];\n                if (watching == 0) {\n                    done = 1;\n                    break;\n                }\n                watching--;\n            }\n            if (done) {\n                break;\n            }\n            for (j = 0; j < watching + 1; j++) {\n                set_hash_addr(addrs+j*8, k + start[idxs[i+j]]);\n            }\n\n            thashx8(bufs[0], bufs[1], bufs[2], bufs[3],\n                    bufs[4], bufs[5], bufs[6], bufs[7],\n                    bufs[0], bufs[1], bufs[2], bufs[3],\n                    bufs[4], bufs[5], bufs[6], bufs[7], 1, ctx, addrs);\n        }\n    }\n}\n\n/**\n * base_w algorithm as described in draft.\n * Interprets an array of bytes as integers in base w.\n * This only works when log_w is a divisor of 8.\n */\nstatic void base_w(unsigned int *output, const int out_len,\n                   const unsigned char *input)\n{\n    int in = 0;\n    int out = 0;\n    unsigned char total;\n    int bits = 0;\n    int consumed;\n\n    for (consumed = 0; consumed < out_len; consumed++) {\n        if (bits == 0) {\n            total = input[in];\n            in++;\n            bits += 8;\n        }\n        bits -= SPX_WOTS_LOGW;\n        output[out] = (total >> bits) & (SPX_WOTS_W - 1);\n        out++;\n    }\n}\n\n/* Computes the WOTS+ checksum over a message (in base_w). */\nstatic void wots_checksum(unsigned int *csum_base_w,\n                          const unsigned int *msg_base_w)\n{\n    unsigned int csum = 0;\n    unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8];\n    unsigned int i;\n\n    /* Compute checksum. */\n    for (i = 0; i < SPX_WOTS_LEN1; i++) {\n        csum += SPX_WOTS_W - 1 - msg_base_w[i];\n    }\n\n    /* Convert checksum to base_w. */\n    /* Make sure expected empty zero bits are the least significant bits. */\n    csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8);\n    ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum);\n    base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes);\n}\n\n/* Takes a message and derives the matching chain lengths. */\nvoid chain_lengths(unsigned int *lengths, const unsigned char *msg)\n{\n    base_w(lengths, SPX_WOTS_LEN1, msg);\n    wots_checksum(lengths + SPX_WOTS_LEN1, lengths);\n}\n\n/**\n * Takes a WOTS signature and an n-byte message, computes a WOTS public key.\n *\n * Writes the computed public key to 'pk'.\n */\nvoid wots_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *msg,\n                      const spx_ctx *ctx, uint32_t addr[8])\n{\n    unsigned int steps[SPX_WOTS_LEN];\n    unsigned int start[SPX_WOTS_LEN];\n    uint32_t i;\n\n    chain_lengths(start, msg);\n\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        steps[i] = SPX_WOTS_W - 1 - start[i];\n    }\n\n    gen_chains(pk, sig, start, steps, ctx, addr);\n}\n\n/*\n * This generates 8 sequential WOTS public keys\n * It also generates the WOTS signature if leaf_info indicates\n * that we're signing with one of these WOTS keys\n */\nvoid wots_gen_leafx8(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info) {\n    struct leaf_info_x8 *info = v_info;\n    uint32_t *leaf_addr = info->leaf_addr;\n    uint32_t *pk_addr = info->pk_addr;\n    unsigned int i, j, k;\n    unsigned char pk_buffer[ 8 * SPX_WOTS_BYTES ];\n    unsigned wots_offset = SPX_WOTS_BYTES;\n    unsigned char *buffer;\n    uint32_t wots_k_mask;\n    unsigned wots_sign_index;\n\n    if (((leaf_idx ^ info->wots_sign_leaf) & ~7) == 0) {\n        /* We're traversing the leaf that's signing; generate the WOTS */\n        /* signature */\n        wots_k_mask = 0;\n        wots_sign_index = info->wots_sign_leaf & 7; /* Which of of the 8 */\n                                  /* slots do the signatures come from */\n    } else {\n        /* Nope, we're just generating pk's; turn off the signature logic */\n        wots_k_mask = ~0;\n\twots_sign_index = 0;\n    }\n\n    for (j = 0; j < 8; j++) {\n        set_keypair_addr( leaf_addr + j*8, leaf_idx + j );\n        set_keypair_addr( pk_addr + j*8, leaf_idx + j );\n    }\n\n    for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) {\n        uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k */\n            /* to the step if we're generating a signature, ~0 if we're not */\n\n        /* Start with the secret seed */\n        for (j = 0; j < 8; j++) {\n            set_chain_addr(leaf_addr + j*8, i);\n            set_hash_addr(leaf_addr + j*8, 0);\n            set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTSPRF);\n        }\n        prf_addrx8(buffer + 0*wots_offset,\n                   buffer + 1*wots_offset,\n                   buffer + 2*wots_offset,\n                   buffer + 3*wots_offset,\n                   buffer + 4*wots_offset,\n                   buffer + 5*wots_offset,\n                   buffer + 6*wots_offset,\n                   buffer + 7*wots_offset,\n                   ctx, leaf_addr);\n\n        for (j = 0; j < 8; j++) {\n            set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTS);\n        }\n\n        /* Iterate down the WOTS chain */\n        for (k=0;; k++) {\n            /* Check if one of the values we have needs to be saved as a */\n            /* part of the WOTS signature */\n            if (k == wots_k) {\n                memcpy( info->wots_sig + i * SPX_N,\n                        buffer + wots_sign_index*wots_offset, SPX_N );\n            }\n\n            /* Check if we hit the top of the chain */\n            if (k == SPX_WOTS_W - 1) break;\n\n            /* Iterate one step on all 8 chains */\n            for (j = 0; j < 8; j++) {\n                set_hash_addr(leaf_addr + j*8, k);\n            }\n            thashx8(buffer + 0*wots_offset,\n                    buffer + 1*wots_offset,\n                    buffer + 2*wots_offset,\n                    buffer + 3*wots_offset,\n                    buffer + 4*wots_offset,\n                    buffer + 5*wots_offset,\n                    buffer + 6*wots_offset,\n                    buffer + 7*wots_offset,\n                    buffer + 0*wots_offset,\n                    buffer + 1*wots_offset,\n                    buffer + 2*wots_offset,\n                    buffer + 3*wots_offset,\n                    buffer + 4*wots_offset,\n                    buffer + 5*wots_offset,\n                    buffer + 6*wots_offset,\n                    buffer + 7*wots_offset, 1, ctx, leaf_addr);\n        }\n    }\n\n    /* Do the final thash to generate the public keys */\n    thashx8(dest + 0*SPX_N,\n            dest + 1*SPX_N,\n            dest + 2*SPX_N,\n            dest + 3*SPX_N,\n            dest + 4*SPX_N,\n            dest + 5*SPX_N,\n            dest + 6*SPX_N,\n            dest + 7*SPX_N,\n            pk_buffer + 0*wots_offset,\n            pk_buffer + 1*wots_offset,\n            pk_buffer + 2*wots_offset,\n            pk_buffer + 3*wots_offset,\n            pk_buffer + 4*wots_offset,\n            pk_buffer + 5*wots_offset,\n            pk_buffer + 6*wots_offset,\n            pk_buffer + 7*wots_offset, SPX_WOTS_LEN, ctx, pk_addr);\n}\n"
  },
  {
    "path": "sha2-avx2/wotsx8.h",
    "content": "#if !defined( WOTSX8_H_ )\n#define WOTSX8_H_ \n\n#include <string.h>\n#include \"params.h\"\n\n/*\n * This is here to provide an interface to the internal wots_gen_leafx8\n * routine.  While this routine is not referenced in the package outside of\n * wots.c, it is called from the stand-alone benchmark code to characterize\n * the performance\n */\nstruct leaf_info_x8 {\n    unsigned char *wots_sig;\n    uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */\n    uint32_t *wots_steps;\n    uint32_t leaf_addr[8*8];\n    uint32_t pk_addr[8*8];\n};\n\n/* Macro to set the leaf_info to something 'benign', that is, it would */\n/* run with the same time as it does during the real signing process */\n/* Used only by the benchmark code */\n#define INITIALIZE_LEAF_INFO_X8(info, addr, step_buffer) { \\\n    info.wots_sig = 0;             \\\n    info.wots_sign_leaf = ~0;      \\\n    info.wots_steps = step_buffer; \\\n    int i;                         \\\n    for (i=0; i<8; i++) {          \\\n        memcpy( &info.leaf_addr[8*i], addr, 32 ); \\\n        memcpy( &info.pk_addr[8*i], addr, 32 ); \\\n    } \\\n}\n\n#define wots_gen_leafx8 SPX_NAMESPACE(wots_gen_leafx8)\nvoid wots_gen_leafx8(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info);\n\n#endif /* WOTSX8_H_ */\n"
  },
  {
    "path": "shake-a64/.gitignore",
    "content": "test/*\n!test/*.c\nPQCsignKAT_*.rsp\nPQCsignKAT_*.req\nPQCgenKAT_sign\n"
  },
  {
    "path": "shake-a64/Makefile",
    "content": "PARAMS = sphincs-shake-128f\nTHASH = robust\n\nCFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -fomit-frame-pointer -flto -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS)\n\nSOURCES =          hash_shake.c hash_shakex2.c thash_shake_$(THASH)x2.c address.c randombytes.c merkle.c wots.c utils.c utilsx2.c fors.c sign.c fips202.c fips202x2.c f1600x2_const.c f1600x2.s\nHEADERS = params.h hash.h          hashx2.h                          thashx2.h                 address.h randombytes.h merkle.h wots.h utils.h utilsx2.h fors.h api.h fips202.h fips202x2.h f1600x2.h thash.h\n\nDET_SOURCES = $(SOURCES:randombytes.%=rng.%)\nDET_HEADERS = $(HEADERS:randombytes.%=rng.%)\n\nTESTS = test/fors \\\n\t\ttest/spx \\\n\t\ttest/thashx2 \\\n\nBENCHMARK = test/benchmark\n\n.PHONY: clean test benchmark\n\ndefault: PQCgenKAT_sign\n\nall: PQCgenKAT_sign tests benchmarks\n\ntests: $(TESTS)\n\ntest: $(TESTS:=.exec)\n\nbenchmarks: $(BENCHMARK)\n\nbenchmark: $(BENCHMARK:=.exec)\n\nPQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto\n\ntest/benchmark: test/benchmark.c test/cycles.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ test/cycles.c $(SOURCES) $< $(LDLIBS)\n\ntest/%: test/%.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS)\n\ntest/%.exec: test/%\n\t@$<\n\nclean:\n\t-$(RM) $(TESTS)\n\t-$(RM) $(BENCHMARK)\n\t-$(RM) PQCgenKAT_sign\n\t-$(RM) PQCsignKAT_*.rsp\n\t-$(RM) PQCsignKAT_*.req\n"
  },
  {
    "path": "shake-a64/context.h",
    "content": "#ifndef SPX_CONTEXT_H\n#define SPX_CONTEXT_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n\ntypedef struct {\n    uint8_t pub_seed[SPX_N];\n    uint8_t sk_seed[SPX_N];\n} spx_ctx;\n\n#endif\n"
  },
  {
    "path": "shake-a64/f1600x2.h",
    "content": "#ifndef SPX_F1600X2_H\n#define SPX_F1600X2_H\n\n#include <stdint.h>\n\nextern uint64_t f1600_RC[24];\nextern void _f1600x2(uint64_t* a, uint64_t* rc);\n\n#define f1600x2(s) do {_f1600x2((s), f1600_RC);} while(0)\n\n#endif\n"
  },
  {
    "path": "shake-a64/f1600x2.s",
    "content": "# From https://github.com/bwesterb/armed-keccak\n\n.macro round\n    # Execute theta, but without xoring into the state yet.\n    # Compute parities p[i] = a[i] ^ a[5+i] ^ ... ^ a[20+i].\n    eor3.16b v25, v0, v5, v10\n    eor3.16b v26, v1, v6, v11\n    eor3.16b v27, v2, v7, v12\n    eor3.16b v28, v3, v8, v13\n    eor3.16b v29, v4, v9, v14\n\n    eor3.16b v25, v25, v15, v20\n    eor3.16b v26, v26, v16, v21\n    eor3.16b v27, v27, v17, v22\n    eor3.16b v28, v28, v18, v23\n    eor3.16b v29, v29, v19, v24\n\n    # d[0] = rotl(p[1], 1) ^ p[4]\n    rax1.2d v30, v29, v26\n    # d[3] = rotl(p[4], 1) ^ p[2]\n    rax1.2d v29, v27, v29\n    # d[1] = rotl(p[2], 1) ^ p[0]\n    rax1.2d v27, v25, v27\n    # d[4] = rotl(p[0], 1) ^ p[3]\n    rax1.2d v25, v28, v25\n    # d[2] = rotl(p[3], 1) ^ p[1]\n    rax1.2d v28, v26, v28\n\n    # Xor parities from step theta into the state at the same time\n    # as executing rho and pi.\n    eor.16b v0, v0,  v30\n    mov.16b v31, v1\n    xar.2d v1,  v6,  v27, 20\n    xar.2d v6,  v9,  v25, 44\n    xar.2d v9,  v22, v28, 3\n    xar.2d v22, v14, v25, 25\n    xar.2d v14, v20, v30, 46\n    xar.2d v20, v2,  v28, 2\n    xar.2d v2,  v12, v28, 21\n    xar.2d v12, v13, v29, 39\n    xar.2d v13, v19, v25, 56\n    xar.2d v19, v23, v29, 8\n    xar.2d v23, v15, v30, 23\n    xar.2d v15, v4,  v25, 37\n    xar.2d v4,  v24, v25, 50\n    xar.2d v24, v21, v27, 62\n    xar.2d v21, v8,  v29, 9\n    xar.2d v8,  v16, v27, 19\n    xar.2d v16, v5,  v30, 28\n    xar.2d v5,  v3,  v29, 36\n    xar.2d v3,  v18, v29, 43\n    xar.2d v18, v17, v28, 49\n    xar.2d v17, v11, v27, 54\n    xar.2d v11, v7,  v28, 58\n    xar.2d v7,  v10, v30, 61\n    xar.2d v10, v31, v27, 63\n\n    # Chi\n    bcax.16b v25, v0,  v2,  v1\n    bcax.16b v26, v1,  v3,  v2\n    bcax.16b v2,  v2,  v4,  v3\n    bcax.16b v3,  v3,  v0,  v4\n    bcax.16b v4,  v4,  v1,  v0\n    mov.16b v0, v25\n    mov.16b v1, v26\n\n    bcax.16b v25, v5,  v7,  v6\n    bcax.16b v26, v6,  v8,  v7\n    bcax.16b v7,  v7,  v9,  v8\n    bcax.16b v8,  v8,  v5,  v9\n    bcax.16b v9,  v9,  v6,  v5\n    mov.16b v5, v25\n    mov.16b v6, v26\n\n    bcax.16b v25, v10,  v12,  v11\n    bcax.16b v26, v11,  v13,  v12\n    bcax.16b v12, v12,  v14,  v13\n    bcax.16b v13, v13,  v10,  v14\n    bcax.16b v14, v14,  v11,  v10\n    mov.16b v10, v25\n    mov.16b v11, v26\n\n    bcax.16b v25, v15,  v17,  v16\n    bcax.16b v26, v16,  v18,  v17\n    bcax.16b v17, v17,  v19,  v18\n    bcax.16b v18, v18,  v15,  v19\n    bcax.16b v19, v19,  v16,  v15\n    mov.16b v15, v25\n    mov.16b v16, v26\n\n    bcax.16b v25, v20,  v22,  v21\n    bcax.16b v26, v21,  v23,  v22\n    bcax.16b v22, v22,  v24,  v23\n    bcax.16b v23, v23,  v20,  v24\n    bcax.16b v24, v24,  v21,  v20\n    mov.16b v20, v25\n    mov.16b v21, v26\n\n    # iota\n    ld1r {v25.2d}, [x1], #8\n    eor.16b v0, v0, v25\n.endm\n\n.align 4\n.global __f1600x2\n__f1600x2:\n    stp d8,  d9,  [sp,#-16]!\n    stp d10, d11, [sp,#-16]!\n    stp d12, d13, [sp,#-16]!\n    stp d14, d15, [sp,#-16]!\n\n    mov x2, x0\n    mov x3, #24\n\n    ld1.2d {v0,  v1,  v2,  v3},  [x0], #64\n    ld1.2d {v4,  v5,  v6,  v7},  [x0], #64\n    ld1.2d {v8,  v9,  v10, v11}, [x0], #64\n    ld1.2d {v12, v13, v14, v15}, [x0], #64\n    ld1.2d {v16, v17, v18, v19}, [x0], #64\n    ld1.2d {v20, v21, v22, v23}, [x0], #64\n    ld1.2d {v24}, [x0]\n\nloop:\n    round\n\n    subs x3, x3, #1\n    cbnz x3, loop\n\n    mov x0, x2\n    st1.2d {v0,  v1,  v2,  v3},  [x0], #64\n    st1.2d {v4,  v5,  v6,  v7},  [x0], #64\n    st1.2d {v8,  v9,  v10, v11}, [x0], #64\n    st1.2d {v12, v13, v14, v15}, [x0], #64\n    st1.2d {v16, v17, v18, v19}, [x0], #64\n    st1.2d {v20, v21, v22, v23}, [x0], #64\n    st1.2d {v24}, [x0]\n\n    ldp d14, d15, [sp], #16\n    ldp d12, d13, [sp], #16\n    ldp d10, d11, [sp], #16\n    ldp d8,  d9,  [sp], #16\n\n    ret lr\n"
  },
  {
    "path": "shake-a64/f1600x2_const.c",
    "content": "#include \"f1600x2.h\"\n\nuint64_t f1600_RC[24] = {\n\t0x0000000000000001,\n\t0x0000000000008082,\n\t0x800000000000808A,\n\t0x8000000080008000,\n\t0x000000000000808B,\n\t0x0000000080000001,\n\t0x8000000080008081,\n\t0x8000000000008009,\n\t0x000000000000008A,\n\t0x0000000000000088,\n\t0x0000000080008009,\n\t0x000000008000000A,\n\t0x000000008000808B,\n\t0x800000000000008B,\n\t0x8000000000008089,\n\t0x8000000000008003,\n\t0x8000000000008002,\n\t0x8000000000000080,\n\t0x000000000000800A,\n\t0x800000008000000A,\n\t0x8000000080008081,\n\t0x8000000000008080,\n\t0x0000000080000001,\n\t0x8000000080008008,\n};\n\n\n"
  },
  {
    "path": "shake-a64/fips202x2.c",
    "content": "#include <stdint.h>\n#include <assert.h>\n\n#include \"fips202x2.h\"\n#include \"fips202.h\"\n#include \"f1600x2.h\"\n\nuint64_t load64(const unsigned char *x)\n{\n  unsigned long long r = 0, i;\n\n  for (i = 0; i < 8; ++i) {\n    r |= (unsigned long long)x[i] << 8 * i;\n  }\n  return r;\n}\n\nvoid store64(uint8_t *x, uint64_t u)\n{\n  unsigned int i;\n\n  for(i=0; i<8; ++i) {\n    x[i] = u;\n    u >>= 8;\n  }\n}\n\nstatic void keccak_absorb2x(uint64_t *s,\n                          unsigned int r,\n                          const unsigned char *m0,\n                          const unsigned char *m1,\n                          unsigned long long int mlen,\n                          unsigned char p)\n{\n  unsigned long long i;\n  unsigned char t0[200];\n  unsigned char t1[200];\n\n  while (mlen >= r)\n  {\n    for (i = 0; i < r / 8; ++i)\n    {\n      s[2*i+0] ^= load64(m0 + 8 * i);\n      s[2*i+1] ^= load64(m1 + 8 * i);\n    }\n\n    f1600x2(s);\n    mlen -= r;\n    m0 += r;\n    m1 += r;\n  }\n\n  for (i = 0; i < r; ++i)\n  {\n    t0[i] = 0;\n    t1[i] = 0;\n  }\n  for (i = 0; i < mlen; ++i)\n  {\n    t0[i] = m0[i];\n    t1[i] = m1[i];\n  }\n\n  t0[i] = p;\n  t1[i] = p;\n\n  t0[r - 1] |= 128;\n  t1[r - 1] |= 128;\n\n  for (i = 0; i < r / 8; ++i)\n  {\n    s[2*i+0] ^= load64(t0 + 8 * i);\n    s[2*i+1] ^= load64(t1 + 8 * i);\n  }\n}\n\n\nstatic void keccak_squeezeblocks2x(unsigned char *h0,\n                                   unsigned char *h1,\n                                   unsigned long long int nblocks,\n                                   uint64_t *s,\n                                   unsigned int r)\n{\n  unsigned int i;\n\n  while(nblocks > 0)\n  {\n    f1600x2(s);\n    for(i=0;i<(r>>3);i++)\n    {\n      store64(h0+8*i, s[2*i+0]);\n      store64(h1+8*i, s[2*i+1]);\n    }\n    h0 += r;\n    h1 += r;\n    nblocks--;\n  }\n}\n\n\n\nvoid shake128x2(unsigned char *out0,\n                unsigned char *out1,\n                unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned long long inlen)\n{\n  uint64_t s[50] = {0};\n  unsigned char t0[SHAKE128_RATE];\n  unsigned char t1[SHAKE128_RATE];\n  unsigned int i;\n\n  /* absorb 4 message of identical length in parallel */\n  keccak_absorb2x(s, SHAKE128_RATE, in0, in1, inlen, 0x1F);\n\n  /* Squeeze output */\n  keccak_squeezeblocks2x(out0, out1, outlen/SHAKE128_RATE, s, SHAKE128_RATE);\n\n  out0 += (outlen/SHAKE128_RATE)*SHAKE128_RATE;\n  out1 += (outlen/SHAKE128_RATE)*SHAKE128_RATE;\n\n  if(outlen%SHAKE128_RATE)\n  {\n    keccak_squeezeblocks2x(t0, t1, 1, s, SHAKE128_RATE);\n    for(i=0;i<outlen%SHAKE128_RATE;i++)\n    {\n      out0[i] = t0[i];\n      out1[i] = t1[i];\n    }\n  }\n}\n\n\nvoid shake256x2(unsigned char *out0,\n                unsigned char *out1,\n                unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned long long inlen)\n{\n  uint64_t s[50] = {0};\n  unsigned char t0[SHAKE256_RATE];\n  unsigned char t1[SHAKE256_RATE];\n  unsigned int i;\n\n  /* absorb 2 message of identical length in parallel */\n  keccak_absorb2x(s, SHAKE256_RATE, in0, in1, inlen, 0x1F);\n\n  /* Squeeze output */\n  keccak_squeezeblocks2x(out0, out1, outlen/SHAKE256_RATE, s, SHAKE256_RATE);\n\n  out0 += (outlen/SHAKE256_RATE)*SHAKE256_RATE;\n  out1 += (outlen/SHAKE256_RATE)*SHAKE256_RATE;\n\n  if(outlen%SHAKE256_RATE)\n  {\n    keccak_squeezeblocks2x(t0, t1, 1, s, SHAKE256_RATE);\n    for(i=0;i<outlen%SHAKE256_RATE;i++)\n    {\n      out0[i] = t0[i];\n      out1[i] = t1[i];\n    }\n  }\n}\n"
  },
  {
    "path": "shake-a64/fips202x2.h",
    "content": "#ifndef SPX_FIPS202X2_H\n#define SPX_FIPS202X2_H\n\n#include <stdint.h>\n\nuint64_t load64(const unsigned char *x);\nvoid store64(uint8_t *x, uint64_t u);\n\n\nvoid shake128x2(unsigned char *out0,\n                unsigned char *out1,\n                unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned long long inlen);\n\nvoid shake256x2(unsigned char *out0,\n                unsigned char *out1,\n                unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned long long inlen);\n\n#endif\n"
  },
  {
    "path": "shake-a64/fors.c",
    "content": "#include <stdlib.h>\n#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"fors.h\"\n#include \"utils.h\"\n#include \"utilsx2.h\"\n#include \"hash.h\"\n#include \"hashx2.h\"\n#include \"thashx2.h\"\n#include \"address.h\"\n\nstatic void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx,\n                        uint32_t fors_leaf_addr[8])\n{\n    prf_addr(sk, ctx, fors_leaf_addr);\n}\n\nstatic void fors_gen_skx2(unsigned char *sk0,\n                          unsigned char *sk1,\n                          const spx_ctx *ctx,\n                          uint32_t fors_leaf_addrx2[2*8])\n{\n    prf_addrx2(sk0, sk1,\n               ctx, fors_leaf_addrx2);\n}\n\nstatic void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk,\n                            const spx_ctx *ctx,\n                            uint32_t fors_leaf_addr[8])\n{\n    thash(leaf, sk, 1, ctx, fors_leaf_addr);\n}\n\nstatic void fors_sk_to_leafx2(unsigned char *leaf0,\n                              unsigned char *leaf1,\n                              const unsigned char *sk0,\n                              const unsigned char *sk1,\n                              const spx_ctx *ctx,\n                              uint32_t fors_leaf_addrx2[2*8])\n{\n    thashx2(leaf0, leaf1,\n            sk0, sk1,\n            1, ctx, fors_leaf_addrx2);\n}\n\nstruct fors_gen_leaf_info {\n    uint32_t leaf_addrx[2*8];\n};\n\nstatic void fors_gen_leafx2(unsigned char *leaf,\n                            const spx_ctx *ctx,\n                            uint32_t addr_idx, void *info)\n{\n    struct fors_gen_leaf_info *fors_info = info;\n    uint32_t *fors_leaf_addrx2 = fors_info->leaf_addrx;\n    unsigned int j;\n\n    /* Only set the parts that the caller doesn't set */\n    for (j = 0; j < 2; j++) {\n        set_tree_index(fors_leaf_addrx2 + j*8, addr_idx + j);\n        set_type(fors_leaf_addrx2 + j*8, SPX_ADDR_TYPE_FORSPRF);\n    }\n\n    fors_gen_skx2(leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  ctx, fors_leaf_addrx2);\n\n    for (j = 0; j < 2; j++) {\n        set_type(fors_leaf_addrx2 + j*8, SPX_ADDR_TYPE_FORSTREE);\n    }\n\n    fors_sk_to_leafx2(leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  ctx, fors_leaf_addrx2);\n}\n\n/**\n * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n * Assumes indices has space for SPX_FORS_TREES integers.\n */\nstatic void message_to_indices(uint32_t *indices, const unsigned char *m)\n{\n    unsigned int i, j;\n    unsigned int offset = 0;\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        indices[i] = 0;\n        for (j = 0; j < SPX_FORS_HEIGHT; j++) {\n            indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 0x1) << j;\n            offset++;\n        }\n    }\n}\n\n/**\n * Signs a message m, deriving the secret key from sk_seed and the FTS address.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_sign(unsigned char *sig, unsigned char *pk,\n               const unsigned char *m,\n               const spx_ctx *ctx,\n               const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    uint32_t fors_tree_addr[2*8] = {0};\n    struct fors_gen_leaf_info fors_info = {0};\n    uint32_t *fors_leaf_addr = fors_info.leaf_addrx;\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    for (i=0; i<2; i++) {\n        copy_keypair_addr(fors_tree_addr + 8*i, fors_addr);\n        set_type(fors_tree_addr + 8*i, SPX_ADDR_TYPE_FORSTREE);\n        copy_keypair_addr(fors_leaf_addr + 8*i, fors_addr);\n    }\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Include the secret key part that produces the selected leaf node. */\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF);\n        fors_gen_sk(sig, ctx, fors_tree_addr);\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n        sig += SPX_N;\n\n        /* Compute the authentication path for this leaf node. */\n        treehashx2(roots + i*SPX_N, sig, ctx,\n                 indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx2,\n                 fors_tree_addr, &fors_info);\n\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n\n/**\n * Derives the FORS public key from a signature.\n * This can be used for verification by comparing to a known public key, or to\n * subsequently verify a signature on the derived public key. The latter is the\n * typical use-case when used as an FTS below an OTS in a hypertree.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *m,\n                      const spx_ctx *ctx,\n                      const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    unsigned char leaf[SPX_N];\n    uint32_t fors_tree_addr[8] = {0};\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    copy_keypair_addr(fors_tree_addr, fors_addr);\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n\n    set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Derive the leaf from the included secret key part. */\n        fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr);\n        sig += SPX_N;\n\n        /* Derive the corresponding root node of this tree. */\n        compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset,\n                     sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr);\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n"
  },
  {
    "path": "shake-a64/hash_shakex2.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"params.h\"\n#include \"fips202x2.h\"\n#include \"f1600x2.h\"\n#include \"hashx2.h\"\n\n/*\n * 2-way parallel version of prf_addr; takes 2x as much input and output\n */\nvoid prf_addrx2(unsigned char *out0,\n                unsigned char *out1,\n                const spx_ctx *ctx,\n                const uint32_t addrx2[2*8]) {\n    /* As we write and read only a few quadwords, it is more efficient to\n     * build and extract from the fourway SHAKE256 state by hand. */\n    uint64_t state[50] = {0};\n    \n    for (int i = 0; i < SPX_N/8; i++) {\n        uint64_t x = load64(ctx->pub_seed + 8*i);\n        state[2*i] = x;\n        state[2*i+1] = x;\n    }\n    for (int i = 0; i < 4; i++) {\n        state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32)\n            | (uint64_t)addrx2[2*i];\n        state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32)\n            | (uint64_t)addrx2[8+2*i];\n    }\n    for (int i = 0; i < SPX_N/8; i++) {\n        uint64_t x = load64(ctx->sk_seed + 8*i);\n        state[2*(SPX_N/8+i+4)] = x;\n        state[2*(SPX_N/8+i+4)+1] = x;\n    }\n\n    /* SHAKE domain separator and padding. */\n    state[2*(SPX_N/4+4)] = 0x1f;\n    state[2*(SPX_N/4+4)+1] = 0x1f;\n\n    state[2*16] = 0x80ULL << 56;\n    state[2*16+1] = 0x80ULL << 56;\n\n    f1600x2(state);\n\n    for (int i = 0; i < SPX_N/8; i++) {\n        store64(out0 + 8*i, state[2*i]);\n        store64(out1 + 8*i, state[2*i+1]);\n    }\n}\n"
  },
  {
    "path": "shake-a64/hashx2.h",
    "content": "#ifndef SPX_HASHX2_H\n#define SPX_HASHX2_H\n\n#include <stdint.h>\n#include \"context.h\"\n#include \"params.h\"\n\n#define prf_addrx2 SPX_NAMESPACE(prf_addrx2)\nvoid prf_addrx2(unsigned char *out0,\n                unsigned char *out1,\n                const spx_ctx *ctx,\n                const uint32_t addrx2[2*8]);\n\n#endif\n"
  },
  {
    "path": "shake-a64/merkle.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx2.h\"\n#include \"wots.h\"\n#include \"wotsx2.h\"\n#include \"merkle.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n/*\n * This generates a Merkle signature (WOTS signature followed by the Merkle\n * authentication path).\n */ \nvoid merkle_sign(uint8_t *sig, unsigned char *root,\n                 const spx_ctx* ctx,\n                 uint32_t wots_addr[8], uint32_t tree_addr[8],\n                 uint32_t idx_leaf)\n{\n    unsigned char *auth_path = sig + SPX_WOTS_BYTES;\n    uint32_t tree_addrx2[2*8] = { 0 };\n    int j;\n    struct leaf_info_x2 info = { 0 };\n    unsigned steps[ SPX_WOTS_LEN ];\n\n    info.wots_sig = sig;\n    chain_lengths(steps, root);\n    info.wots_steps = steps;\n\n    for (j=0; j<2; j++) {\n        set_type(&tree_addrx2[8*j], SPX_ADDR_TYPE_HASHTREE);\n        set_type(&info.leaf_addr[8*j], SPX_ADDR_TYPE_WOTS);\n        set_type(&info.pk_addr[8*j], SPX_ADDR_TYPE_WOTSPK);\n        copy_subtree_addr(&tree_addrx2[8*j], tree_addr);\n        copy_subtree_addr(&info.leaf_addr[8*j], wots_addr);\n        copy_subtree_addr(&info.pk_addr[8*j], wots_addr);\n    }\n\n    info.wots_sign_leaf = idx_leaf;\n\n    treehashx2(root, auth_path, ctx,\n                idx_leaf, 0,\n                SPX_TREE_HEIGHT,\n                wots_gen_leafx2,\n                tree_addrx2, &info);\n}\n\n/* Compute root node of the top-most subtree. */\nvoid merkle_gen_root(unsigned char *root, const spx_ctx *ctx)\n{\n    /* We do not need the auth path in key generation, but it simplifies the\n       code to have just one treehash routine that computes both root and path\n       in one function. */\n    unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES];\n    uint32_t top_tree_addr[8] = {0};\n    uint32_t wots_addr[8] = {0};\n\n    set_layer_addr(top_tree_addr, SPX_D - 1);\n    set_layer_addr(wots_addr, SPX_D - 1);\n\n    merkle_sign(auth_path, root, ctx,\n                wots_addr, top_tree_addr,\n                ~0 /* ~0 means \"don't bother generating an auth path */ );\n}\n"
  },
  {
    "path": "shake-a64/test/benchmark.c",
    "content": "#define _POSIX_C_SOURCE 199309L\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n\n#include \"../thash.h\"\n#include \"../thashx2.h\"\n#include \"../api.h\"\n#include \"../f1600x2.h\"\n#include \"../fors.h\"\n#include \"../wots.h\"\n#include \"../wotsx2.h\"\n#include \"../params.h\"\n#include \"../randombytes.h\"\n\n#include \"cycles.h\"\n\n#define SPX_MLEN 32\n#define NTESTS 10\n\nstatic void wots_gen_pkx2(unsigned char *pk, const spx_ctx *ctx,\n        uint32_t addr[8]);\n\nstatic int cmp_llu(const void *a, const void*b)\n{\n  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;\n  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;\n  return 0;\n}\n\nstatic unsigned long long median(unsigned long long *l, size_t llen)\n{\n  qsort(l,llen,sizeof(unsigned long long),cmp_llu);\n\n  if(llen%2) return l[llen/2];\n  else return (l[llen/2-1]+l[llen/2])/2;\n}\n\nstatic void delta(unsigned long long *l, size_t llen)\n{\n    unsigned int i;\n    for(i = 0; i < llen - 1; i++) {\n        l[i] = l[i+1] - l[i];\n    }\n}\n\nstatic void printfcomma (unsigned long long n)\n{\n    if (n < 1000) {\n        printf(\"%llu\", n);\n        return;\n    }\n    printfcomma(n / 1000);\n    printf (\",%03llu\", n % 1000);\n}\n\nstatic void printfalignedcomma (unsigned long long n, int len)\n{\n    unsigned long long ncopy = n;\n    int i = 0;\n\n    while (ncopy > 9) {\n        len -= 1;\n        ncopy /= 10;\n        i += 1;  // to account for commas\n    }\n    i = i/3 - 1;  // to account for commas\n    for (; i < len; i++) {\n        printf(\" \");\n    }\n    printfcomma(n);\n}\n\nstatic void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul)\n{\n    unsigned long long med;\n\n    result /= NTESTS;\n    delta(l, NTESTS + 1);\n    med = median(l, llen);\n    printf(\"avg. %11.2lf us (%2.2lf sec); median \", result, result / 1e6);\n    printfalignedcomma(med, 12);\n    printf(\" cycles,  %5llux: \", mul);\n    printfalignedcomma(mul*med, 12);\n    printf(\" cycles\\n\");\n}\n\n#define MEASURE_GENERIC(TEXT, MUL, FNCALL, CORR)\\\n    printf(TEXT);\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\\\n    for(i = 0; i < NTESTS; i++) {\\\n        t[i] = cpucycles() / CORR;\\\n        FNCALL;\\\n    }\\\n    t[NTESTS] = cpucycles();\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\\\n    result = ((stop.tv_sec - start.tv_sec) * 1e6 + \\\n        (stop.tv_nsec - start.tv_nsec) / 1e3) / (double)CORR;\\\n    display_result(result, t, NTESTS, MUL);\n#define MEASURT(TEXT, MUL, FNCALL)\\\n    MEASURE_GENERIC(\\\n        TEXT, MUL,\\\n        do {\\\n          for (int j = 0; j < 1000; j++) {\\\n            FNCALL;\\\n          }\\\n        } while (0);,\\\n    1000);\n#define MEASURE(TEXT, MUL, FNCALL) MEASURE_GENERIC(TEXT, MUL, FNCALL, 1)\n\nint main(void)\n{\n    init_cpucycles();\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    spx_ctx ctx;\n    unsigned char pk[SPX_PK_BYTES];\n    unsigned char sk[SPX_SK_BYTES];\n    unsigned char *m = malloc(SPX_MLEN);\n    unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN);\n\n    unsigned char fors_pk[SPX_FORS_PK_BYTES];\n    unsigned char fors_m[SPX_FORS_MSG_BYTES];\n    unsigned char fors_sig[SPX_FORS_BYTES];\n    unsigned char addr[SPX_ADDR_BYTES*2];\n    unsigned char wots_pk[4*SPX_WOTS_PK_BYTES];\n    unsigned char block[SPX_N];\n\n    unsigned long long smlen;\n    unsigned long long mlen;\n    unsigned long long t[NTESTS+1];\n    struct timespec start, stop;\n    double result;\n    int i;\n    uint64_t statex2[50];\n\n    randombytes(m, SPX_MLEN);\n    randombytes(addr, SPX_ADDR_BYTES*2);\n\n    printf(\"Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\\n\",\n           SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES,\n           SPX_WOTS_W);\n\n    printf(\"Running %d iterations.\\n\", NTESTS);\n\n    MEASURT(\"thash                \", 1, thash(block, block, 1, &ctx, (uint32_t*)addr));\n    MEASURT(\"f1600x2              \", 1, f1600x2(statex2));\n    MEASURT(\"thashx2              \", 1, thashx2(block, block, block, block, 1, &ctx, (uint32_t*)addr));\n    MEASURE(\"Generating keypair.. \", 1, crypto_sign_keypair(pk, sk));\n    MEASURE(\"  - WOTS pk gen 2x.. \", (1 << SPX_TREE_HEIGHT) / 2, wots_gen_pkx2(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Signing..            \", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk));\n    MEASURE(\"  - FORS signing..   \", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr));\n    MEASURE(\"  - WOTS pk gen x2.. \", SPX_D * (1 << SPX_TREE_HEIGHT) / 2, wots_gen_pkx2(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Verifying..          \", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk));\n\n    printf(\"Signature size: %d (%.2f KiB)\\n\", SPX_BYTES, SPX_BYTES / 1024.0);\n    printf(\"Public key size: %d (%.2f KiB)\\n\", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0);\n    printf(\"Secret key size: %d (%.2f KiB)\\n\", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0);\n\n    free(m);\n    free(sm);\n    free(mout);\n\n    return 0;\n}\n\nstatic void wots_gen_pkx2(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) {\n    struct leaf_info_x2 leaf;\n    unsigned steps[ SPX_WOTS_LEN ] = { 0 };\n    INITIALIZE_LEAF_INFO_X2(leaf, addr, steps);\n    wots_gen_leafx2(pk, ctx, 0, &leaf);\n}\n"
  },
  {
    "path": "shake-a64/test/thashx2.c",
    "content": "#include <stdio.h>\n#include <string.h>\n\n#include \"../thashx2.h\"\n#include \"../thash.h\"\n#include \"../randombytes.h\"\n#include \"../params.h\"\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    unsigned char input[2*SPX_N];\n    unsigned char output[2*SPX_N];\n    unsigned char out2[2*SPX_N];\n    uint32_t addr[2*8] = {0};\n    unsigned int j;\n    spx_ctx ctx;\n\n    randombytes(ctx.pub_seed, SPX_N);\n    randombytes(input, 4*SPX_N);\n    randombytes((unsigned char *)addr, 2 * 8 * sizeof(uint32_t));\n\n    printf(\"Testing if thash matches thashx2.. \");\n\n    for (j = 0; j < 2; j++) {\n        thash(out2 + j * SPX_N, input + j * SPX_N, 1, &ctx, addr + j*8);\n    }\n\n    thashx2(output + 0*SPX_N,\n            output + 1*SPX_N,\n            input + 0*SPX_N,\n            input + 1*SPX_N,\n            1, &ctx, addr);\n\n    if (memcmp(out2, output, 2 * SPX_N)) {\n        printf(\"failed!\\n\");\n        return -1;\n    }\n    printf(\"successful.\\n\");\n    return 0;\n}\n"
  },
  {
    "path": "shake-a64/thash.h",
    "content": "#ifndef SPX_THASHX2_AS_ONE\n#define SPX_THASHX2_AS_ONE\n\n#include <stdint.h>\n#include \"context.h\"\n\nvoid thash(unsigned char *out, const unsigned char *in, unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8]);\n\n\n#endif\n\n"
  },
  {
    "path": "shake-a64/thash_shake_robustx2.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"thashx2.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"f1600x2.h\"\n#include \"fips202x2.h\"\n\n\nvoid thash(unsigned char *out,\n           const unsigned char *in,\n           unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8]) {\n    uint32_t addrx2 [2*8] = {\n        addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7],\n        addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7]\n    };\n    thashx2(out, out, in, in, inblocks, ctx, addrx2);\n}\n\n/**\n * 2-way parallel version of thash; takes 2x as much input and output\n */\nvoid thashx2(unsigned char *out0,\n             unsigned char *out1,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx2[2*8])\n{\n    if (inblocks == 1 || inblocks == 2) {\n        /* As we write and read only a few quadwords, it is more efficient to\n         * build and extract from the twoway SHAKE256 state by hand. */\n        uint64_t state[50] = {0};\n        uint64_t state2[50];\n\n        for (int i = 0; i < SPX_N/8; i++) {\n            uint64_t x = load64(ctx->pub_seed + 8*i);\n            state[2*i] = x;\n            state[2*i+1] = x;\n        }\n        for (int i = 0; i < 4; i++) {\n            state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32)\n                | (uint64_t)addrx2[2*i];\n            state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32)\n                | (uint64_t)addrx2[8+2*i];\n        }\n\n        /* Domain separator and padding. */\n        state[2*16] = 0x80ULL << 56;\n        state[2*16+1] = 0x80ULL << 56;\n\n        state[2*((SPX_N/8)+4)] ^= 0x1f;\n        state[2*((SPX_N/8)+4)+1] ^= 0x1f;\n\n        /* We will permutate state2 with f1600x2 to compute the bitmask,\n         * but first we'll copy it to state2 which will be used to compute\n         * the final output, as its input is almost identical. */\n        memcpy(state2, state, 400);\n\n        f1600x2(state);\n\n        /* By copying from state, state2 already contains the pub_seed\n         * and address.  We just need to copy in the input blocks xorred with\n         * the bitmask we just computed. */\n        for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) {\n            state2[2*(SPX_N/8+4+i)] = state[2*i] ^ load64(in0 + 8*i);\n            state2[2*(SPX_N/8+4+i)+1] = state[2*i+1] ^ load64(in1 + 8*i);\n        }\n\n        /* Domain separator and start of padding.  Note that the quadwords\n         * around are already zeroed for state from which we copied.\n         * We do a XOR instead of a set as this might be the 16th quadword\n         * when N=32 and inblocks=2, which already contains the end\n         * of the padding. */\n        state2[2*((SPX_N/8)*(1+inblocks)+4)] ^= 0x1f;\n        state2[2*((SPX_N/8)*(1+inblocks)+4)+1] ^= 0x1f;\n\n        f1600x2(state2);\n\n        for (int i = 0; i < SPX_N/8; i++) {\n            store64(out0 + 8*i, state2[2*i]);\n            store64(out1 + 8*i, state2[2*i+1]);\n        }\n    } else {\n        SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N);\n        SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N);\n        SPX_VLA(unsigned char, bitmask0, inblocks * SPX_N);\n        SPX_VLA(unsigned char, bitmask1, inblocks * SPX_N);\n        unsigned int i;\n\n        memcpy(buf0, ctx->pub_seed, SPX_N);\n        memcpy(buf1, ctx->pub_seed, SPX_N);\n        memcpy(buf0 + SPX_N, addrx2 + 0*8, SPX_ADDR_BYTES);\n        memcpy(buf1 + SPX_N, addrx2 + 1*8, SPX_ADDR_BYTES);\n\n        shake256x2(bitmask0, bitmask1, inblocks * SPX_N,\n                   buf0, buf1, SPX_N + SPX_ADDR_BYTES);\n\n        for (i = 0; i < inblocks * SPX_N; i++) {\n            buf0[SPX_N + SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i];\n            buf1[SPX_N + SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i];\n        }\n\n        shake256x2(out0, out1, SPX_N,\n                   buf0, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n    }\n}\n"
  },
  {
    "path": "shake-a64/thash_shake_simplex2.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thash.h\"\n#include \"thashx2.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"f1600x2.h\"\n#include \"fips202x2.h\"\n\n\nvoid thash(unsigned char *out,\n           const unsigned char *in,\n           unsigned int inblocks,\n           const spx_ctx *ctx, uint32_t addr[8]) {\n    uint32_t addrx2 [2*8] = {\n        addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7],\n        addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7]\n    };\n    thashx2(out, out, in, in, inblocks, ctx, addrx2);\n}\n\n/**\n * 2-way parallel version of thash; takes 2x as much input and output\n */\nvoid thashx2(unsigned char *out0,\n             unsigned char *out1,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx2[2*8])\n{\n    if (inblocks == 1 || inblocks == 2) {\n        /* As we write and read only a few quadwords, it is more efficient to\n         * build and extract from the twoway SHAKE256 state by hand. */\n        uint64_t state[50] = {0};\n        for (int i = 0; i < SPX_N/8; i++) {\n            uint64_t x = load64(ctx->pub_seed + 8*i);\n            state[2*i] = x;\n            state[2*i+1] = x;\n        }\n        for (int i = 0; i < 4; i++) {\n            state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32)\n                | (uint64_t)addrx2[2*i];\n            state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32)\n                | (uint64_t)addrx2[8+2*i];\n        }\n\n        for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) {\n            state[2*(SPX_N/8+4+i)] = load64(in0+8*i);\n            state[2*(SPX_N/8+4+i)+1] = load64(in1+8*i);\n        }\n\n        /* Domain separator and padding. */\n        state[2*16] = 0x80ULL << 56;\n        state[2*16+1] = 0x80ULL << 56;\n\n        state[2*((SPX_N/8)*(1+inblocks)+4)] ^= 0x1f;\n        state[2*((SPX_N/8)*(1+inblocks)+4)+1] ^= 0x1f;\n\n        f1600x2(state);\n\n        for (int i = 0; i < SPX_N/8; i++) {\n            store64(out0 + 8*i, state[2*i]);\n            store64(out1 + 8*i, state[2*i+1]);\n        }\n    } else {\n        SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N);\n        SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks * SPX_N);\n\n        memcpy(buf0, ctx->pub_seed, SPX_N);\n        memcpy(buf1, ctx->pub_seed, SPX_N);\n        memcpy(buf0 + SPX_N, addrx2 + 0*8, SPX_ADDR_BYTES);\n        memcpy(buf1 + SPX_N, addrx2 + 1*8, SPX_ADDR_BYTES);\n        memcpy(buf0 + SPX_N + SPX_ADDR_BYTES, in0, inblocks * SPX_N);\n        memcpy(buf1 + SPX_N + SPX_ADDR_BYTES, in1, inblocks * SPX_N);\n\n        shake256x2(out0, out1, SPX_N,\n                   buf0, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n    }\n}\n"
  },
  {
    "path": "shake-a64/thashx2.h",
    "content": "#ifndef SPX_THASHX2_H\n#define SPX_THASHX2_H\n\n#include <stdint.h>\n#include \"context.h\"\n#include \"params.h\"\n\n#define thashx2 SPX_NAMESPACE(thashx2)\nvoid thashx2(unsigned char *out0,\n             unsigned char *out1,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx2[2*8]);\n\n#endif\n"
  },
  {
    "path": "shake-a64/utilsx2.c",
    "content": "#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx2.h\"\n#include \"params.h\"\n#include \"thashx2.h\"\n#include \"address.h\"\n\n/*\n * Generate the entire Merkle tree, computing the authentication path for leaf_idx,\n * and the resulting root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE)\n *\n * This expects tree_addrx2 to be initialized to 2 parallel addr structures for\n * the Merkle tree nodes\n *\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This works by using the standard Merkle tree building algorithm, except\n * that each 'node' tracked is actually 2 consecutive nodes in the real tree.\n * When we combine two logical nodes AB and WX, we perform the H\n * operation on adjacent real nodes, forming the parent logical node\n * (AB)(WX)\n *\n * When we get to the top level of the real tree (where there is only\n * one logical node), we continue this operation one more time; the right\n * most real node will by the actual root (and the other node will be\n * garbage).  We follow the same thashx2 logic so that the 'extract\n * authentication path components' part of the loop is still executed (and\n * to simplify the code somewhat)\n */\nvoid treehashx2(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx *ctx,\n                uint32_t leaf_idx, uint32_t idx_offset,\n                uint32_t tree_height,\n                void (*gen_leafx2)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx*,\n                   uint32_t idx, void *info),\n                uint32_t tree_addrx2[2*8],\n                void *info)\n{\n    /* This is where we keep the intermediate nodes */\n    SPX_VLA(unsigned char, stackx2, 2 * tree_height * SPX_N);\n    uint32_t left_adj = 0, prev_left_adj = 0; /* When we're doing the top */\n        /* level, the left-most part of the tree isn't at the beginning */\n        /* of current[].  These give the offset of the actual start */\n\n    uint32_t idx;\n    uint32_t max_idx = (1 << (tree_height-1)) - 1;\n    for (idx = 0;; idx++) {\n        unsigned char current[2*SPX_N];   /* Current logical node */\n        gen_leafx2( current, ctx, 2*idx + idx_offset,\n                    info );\n\n        /* Now combine the freshly generated right node with previously */\n        /* generated left ones */\n        uint32_t internal_idx_offset = idx_offset;\n        uint32_t internal_idx = idx;\n        uint32_t internal_leaf = leaf_idx;\n        uint32_t h;     /* The height we are in the Merkle tree */\n        for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) {\n\n            /* Special processing if we're at the top of the tree */\n            if (h >= tree_height - 1) {\n                if (h == tree_height) {\n                    /* We hit the root; return it */\n                    memcpy( root, &current[1*SPX_N], SPX_N );\n                    return;\n                }\n                /* The tree indexing logic is a bit off in this case */\n                /* Adjust it so that the left-most node of the part of */\n                /* the tree that we're processing has index 0 */\n                prev_left_adj = left_adj;\n                left_adj = 2 - (1 << (tree_height - h - 1));\n            }\n\n            /* Check if we hit the top of the tree */\n            if (h == tree_height) {\n                /* We hit the root; return it */\n                memcpy( root, &current[1*SPX_N], SPX_N );\n                return;\n            }\n            \n            /*\n             * Check if one of the nodes we have is a part of the\n             * authentication path; if it is, write it out\n             */\n            if ((((internal_idx << 1) ^ internal_leaf) & ~0x1) == 0) {\n                memcpy( &auth_path[ h * SPX_N ],\n                        &current[(((internal_leaf&1)^1) + prev_left_adj) * SPX_N],\n                        SPX_N );\n            }\n\n            /*\n             * Check if we're at a left child; if so, stop going up the stack\n             * Exception: if we've reached the end of the tree, keep on going\n             * (so we combine the last 2 nodes into the one root node in two\n             * more iterations)\n             */\n            if ((internal_idx & 1) == 0 && idx < max_idx) {\n                break;\n            }\n\n            /* Ok, we're at a right node (or doing the top 3 levels) */\n            /* Now combine the left and right logical nodes together */\n\n            /* Set the address of the node we're creating. */\n            int j;\n            internal_idx_offset >>= 1;\n            for (j = 0; j < 2; j++) {\n                set_tree_height(tree_addrx2 + j*8, h + 1);\n                set_tree_index(tree_addrx2 + j*8,\n                     (2/2) * (internal_idx&~1) + j - left_adj + internal_idx_offset );\n            }\n            unsigned char *left = &stackx2[h * 2 * SPX_N];\n            thashx2( &current[0 * SPX_N],\n                     &current[1 * SPX_N],\n                     &left   [0 * SPX_N],\n                     &current[0 * SPX_N],\n                     2, ctx, tree_addrx2);\n        }\n\n        /* We've hit a left child; save the current for when we get the */\n        /* corresponding right right */\n        memcpy( &stackx2[h * 2 * SPX_N], current, 2 * SPX_N);\n    }\n}\n"
  },
  {
    "path": "shake-a64/utilsx2.h",
    "content": "#ifndef SPX_UTILSX2_H\n#define SPX_UTILSX2_H\n\n#include <stdint.h>\n#include \"params.h\"\n\n/**\n * For a given leaf index, computes the authentication path and the resulting\n * root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE).\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This implementation uses SIMD to compute internal nodes 2 at a time (in\n * parallel)\n */\n#define treehashx2 SPX_NAMESPACE(treehashx2)\nvoid treehashx2(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx *ctx,\n                uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height,\n                void (*gen_leafx2)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx* /* ctx */,\n                   uint32_t addr_idx, void *info),\n                uint32_t tree_addrx2[2*8], void *info);\n\n#endif\n"
  },
  {
    "path": "shake-a64/wots.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx2.h\"\n#include \"hash.h\"\n#include \"hashx2.h\"\n#include \"thashx2.h\"\n#include \"wots.h\"\n#include \"wotsx2.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n// TODO clarify address expectations, and make them more uniform.\n// TODO i.e. do we expect types to be set already?\n// TODO and do we expect modifications or copies?\n\n/**\n * Computes up the chains\n */\nstatic void gen_chains(\n        unsigned char *out,\n        const unsigned char *in,\n        unsigned int start[SPX_WOTS_LEN],\n        unsigned int steps[SPX_WOTS_LEN],\n        const spx_ctx *ctx,\n        uint32_t addr[8])\n{\n    uint32_t i, j, k, idx, watching;\n    int done;\n    unsigned char empty[SPX_N];\n    unsigned char *bufs[4];\n    uint32_t addrs[8*2];\n\n    int l;\n    uint16_t counts[SPX_WOTS_W] = { 0 };\n    uint16_t idxs[SPX_WOTS_LEN];\n    uint16_t total, newTotal;\n\n    /* set addrs = {addr, addr} */\n    for (j = 0; j < 2; j++) {\n        memcpy(addrs+j*8, addr, sizeof(uint32_t) * 8);\n    }\n\n    /* Initialize out with the value at position 'start'. */\n    memcpy(out, in, SPX_WOTS_LEN*SPX_N);\n\n    /* Sort the chains in reverse order by steps using counting sort. */\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        counts[steps[i]]++;\n    }\n    total = 0;\n    for (l = SPX_WOTS_W - 1; l >= 0; l--) {\n        newTotal = counts[l] + total;\n        counts[l] = total;\n        total = newTotal;\n    }\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        idxs[counts[steps[i]]] = i;\n        counts[steps[i]]++;\n    }\n\n    /* We got our work cut out for us: do it! */\n    for (i = 0; i < SPX_WOTS_LEN; i += 2) {\n        for (j = 0; j < 2 && i+j < SPX_WOTS_LEN; j++) {\n            idx = idxs[i+j];\n            set_chain_addr(addrs+j*8, idx);\n            bufs[j] = out + SPX_N * idx;\n        }\n\n        /* As the chains are sorted in reverse order, we know that the first\n         * chain is the longest and the last one is the shortest.  We keep\n         * an eye on whether the last chain is done and then on the one before,\n         * et cetera. */\n        watching = 1;\n        done = 0;\n        while (i + watching >= SPX_WOTS_LEN) {\n            bufs[watching] = &empty[0];\n            watching--;\n        }\n\n        for (k = 0;; k++) {\n            while (k == steps[idxs[i+watching]]) {\n                bufs[watching] = &empty[0];\n                if (watching == 0) {\n                    done = 1;\n                    break;\n                }\n                watching--;\n            }\n            if (done) {\n                break;\n            }\n            for (j = 0; j < watching + 1; j++) {\n                set_hash_addr(addrs+j*8, k + start[idxs[i+j]]);\n            }\n\n            thashx2(bufs[0], bufs[1], \n                    bufs[0], bufs[1], 1, ctx, addrs);\n        }\n    }\n}\n\n/**\n * base_w algorithm as described in draft.\n * Interprets an array of bytes as integers in base w.\n * This only works when log_w is a divisor of 8.\n */\nstatic void base_w(unsigned int *output, const int out_len,\n                   const unsigned char *input)\n{\n    int in = 0;\n    int out = 0;\n    unsigned char total;\n    int bits = 0;\n    int consumed;\n\n    for (consumed = 0; consumed < out_len; consumed++) {\n        if (bits == 0) {\n            total = input[in];\n            in++;\n            bits += 8;\n        }\n        bits -= SPX_WOTS_LOGW;\n        output[out] = (total >> bits) & (SPX_WOTS_W - 1);\n        out++;\n    }\n}\n\n/* Computes the WOTS+ checksum over a message (in base_w). */\nstatic void wots_checksum(unsigned int *csum_base_w,\n                          const unsigned int *msg_base_w)\n{\n    unsigned int csum = 0;\n    unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8];\n    unsigned int i;\n\n    /* Compute checksum. */\n    for (i = 0; i < SPX_WOTS_LEN1; i++) {\n        csum += SPX_WOTS_W - 1 - msg_base_w[i];\n    }\n\n    /* Convert checksum to base_w. */\n    /* Make sure expected empty zero bits are the least significant bits. */\n    csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8);\n    ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum);\n    base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes);\n}\n\n/* Takes a message and derives the matching chain lengths. */\nvoid chain_lengths(unsigned int *lengths, const unsigned char *msg)\n{\n    base_w(lengths, SPX_WOTS_LEN1, msg);\n    wots_checksum(lengths + SPX_WOTS_LEN1, lengths);\n}\n\n/**\n * Takes a WOTS signature and an n-byte message, computes a WOTS public key.\n *\n * Writes the computed public key to 'pk'.\n */\nvoid wots_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *msg,\n                      const spx_ctx *ctx, uint32_t addr[8])\n{\n    unsigned int steps[SPX_WOTS_LEN];\n    unsigned int start[SPX_WOTS_LEN];\n    uint32_t i;\n\n    chain_lengths(start, msg);\n\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        steps[i] = SPX_WOTS_W - 1 - start[i];\n    }\n\n    gen_chains(pk, sig, start, steps, ctx, addr);\n}\n\n/*\n * This generates 2 sequential WOTS public keys\n * It also generates the WOTS signature if leaf_info indicates\n * that we're signing with one of these WOTS keys\n */\nvoid wots_gen_leafx2(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info) {\n    struct leaf_info_x2 *info = v_info;\n    uint32_t *leaf_addr = info->leaf_addr;\n    uint32_t *pk_addr = info->pk_addr;\n    unsigned int i, j, k;\n    unsigned char pk_buffer[ 2 * SPX_WOTS_BYTES ];\n    unsigned wots_offset = SPX_WOTS_BYTES;\n    unsigned char *buffer;\n    uint32_t wots_k_mask;\n    unsigned wots_sign_index;\n\n    if (((leaf_idx ^ info->wots_sign_leaf) & ~1) == 0) {\n        /* We're traversing the leaf that's signing; generate the WOTS */\n        /* signature */\n        wots_k_mask = 0;\n        wots_sign_index = info->wots_sign_leaf & 1; /* Which of of the 2 */\n                                  /* slots do the signatures come from */\n    } else {\n        /* Nope, we're just generating pk's; turn off the signature logic */\n        wots_k_mask = ~0;\n\twots_sign_index = 0;\n    }\n\n    for (j = 0; j < 2; j++) {\n        set_keypair_addr( leaf_addr + j*8, leaf_idx + j );\n        set_keypair_addr( pk_addr + j*8, leaf_idx + j );\n    }\n\n    for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) {\n        uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k to */\n            /* the step if we're generating a signature, ~0 if we're not */\n\n        /* Start with the secret seed */\n        for (j = 0; j < 2; j++) {\n            set_chain_addr(leaf_addr + j*8, i);\n            set_hash_addr(leaf_addr + j*8, 0);\n            set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTSPRF);\n        }\n        prf_addrx2(buffer + 0*wots_offset,\n                   buffer + 1*wots_offset,\n                   ctx, leaf_addr);\n        for (j = 0; j < 2; j++) {\n            set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTS);\n        }\n\n        /* Iterate down the WOTS chain */\n        for (k=0;; k++) {\n            /* Check if one of the values we have needs to be saved as a */\n            /* part of the WOTS signature */\n            if (k == wots_k) {\n                memcpy( info->wots_sig + i * SPX_N,\n                        buffer + wots_sign_index*wots_offset, SPX_N );\n            }\n\n            /* Check if we hit the top of the chain */\n            if (k == SPX_WOTS_W - 1) break;\n\n            /* Iterate one step on all 4 chains */\n            for (j = 0; j < 2; j++) {\n                set_hash_addr(leaf_addr + j*8, k);\n            }\n            thashx2(buffer + 0*wots_offset,\n                    buffer + 1*wots_offset,\n                    buffer + 0*wots_offset,\n                    buffer + 1*wots_offset,\n                    1, ctx, leaf_addr);\n        }\n    }\n\n    /* Do the final thash to generate the public keys */\n    thashx2(dest + 0*SPX_N,\n            dest + 1*SPX_N,\n            pk_buffer + 0*wots_offset,\n            pk_buffer + 1*wots_offset,\n            SPX_WOTS_LEN, ctx, pk_addr);\n}\n"
  },
  {
    "path": "shake-a64/wotsx2.h",
    "content": "#if !defined( WOTSX2_H_ )\n#define WOTSX2_H_ \n\n#include <string.h>\n#include \"params.h\"\n\n/*\n * This is here to provide an interface to the internal wots_gen_leafx2\n * routine.  While this routine is not referenced in the package outside of\n * wots.c, it is called from the stand-alone benchmark code to characterize\n * the performance\n */\nstruct leaf_info_x2 {\n    unsigned char *wots_sig;\n    uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */\n    uint32_t *wots_steps;\n    uint32_t leaf_addr[2*8];\n    uint32_t pk_addr[2*8];\n};\n\n/* Macro to set the leaf_info to something 'benign', that is, it would */\n/* run with the same time as it does during the real signing process */\n/* Used only by the benchmark code */\n#define INITIALIZE_LEAF_INFO_X2(info, addr, step_buffer) { \\\n    info.wots_sig = 0;             \\\n    info.wots_sign_leaf = ~0;      \\\n    info.wots_steps = step_buffer; \\\n    int i;                         \\\n    for (i=0; i<2; i++) {          \\\n        memcpy( &info.leaf_addr[8*i], addr, 32 ); \\\n        memcpy( &info.pk_addr[8*i], addr, 32 ); \\\n    } \\\n}\n\n#define wots_gen_leafx2 SPX_NAMESPACE(wots_gen_leafx2)\nvoid wots_gen_leafx2(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info);\n\n#endif /* WOTSX2_H_ */\n"
  },
  {
    "path": "shake-avx2/.gitignore",
    "content": "test/*\n!test/*.c\nPQCsignKAT_*.rsp\nPQCsignKAT_*.req\nPQCgenKAT_sign\nkeccak4x/KeccakP-1600-times4-SIMD256.o"
  },
  {
    "path": "shake-avx2/Makefile",
    "content": "PARAMS = sphincs-shake-128f\nTHASH = robust\n\nCC = /usr/bin/gcc\nCFLAGS = -Wall -Wextra -Wpedantic -Wmissing-prototypes -O3 -std=c99 -march=native -fomit-frame-pointer -flto -DPARAMS=$(PARAMS) $(EXTRA_CFLAGS)\n\nSOURCES =          hash_shake.c hash_shakex4.c thash_shake_$(THASH).c thash_shake_$(THASH)x4.c address.c randombytes.c merkle.c wots.c utils.c utilsx4.c fors.c sign.c fips202.c fips202x4.c keccak4x/KeccakP-1600-times4-SIMD256.o\nHEADERS = params.h hash.h          hashx4.h          thash.h                 thashx4.h                 address.h randombytes.h merkle.h wots.h utils.h utilsx4.h fors.h api.h fips202.h fips202x4.h\n\nDET_SOURCES = $(SOURCES:randombytes.%=rng.%)\nDET_HEADERS = $(HEADERS:randombytes.%=rng.%)\n\nTESTS = test/fors \\\n\t\ttest/spx \\\n\t\ttest/thashx4 \\\n\nBENCHMARK = test/benchmark\n\n.PHONY: clean test benchmark\n\ndefault: PQCgenKAT_sign\n\nall: PQCgenKAT_sign tests benchmarks\n\ntests: $(TESTS)\n\ntest: $(TESTS:=.exec)\n\nbenchmarks: $(BENCHMARK)\n\nbenchmark: $(BENCHMARK:=.exec)\n\nPQCgenKAT_sign: PQCgenKAT_sign.c $(DET_SOURCES) $(DET_HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(DET_SOURCES) $< -lcrypto\n\ntest/%: test/%.c $(SOURCES) $(HEADERS)\n\t$(CC) $(CFLAGS) -o $@ $(SOURCES) $< $(LDLIBS)\n\ntest/%.exec: test/%\n\t@$<\n\nkeccak4x/KeccakP-1600-times4-SIMD256.o: keccak4x/align.h \\\n\t\t\t\t\t\t\t\t\t\tkeccak4x/brg_endian.h \\\n\t\t\t\t\t\t\t\t\t\tkeccak4x/KeccakP-1600-times4-SIMD256.c \\\n\t\t\t\t\t\t\t\t\t\tkeccak4x/KeccakP-1600-times4-SnP.h \\\n\t\t\t\t\t\t\t\t\t\tkeccak4x/KeccakP-1600-unrolling.macros \\\n\t\t\t\t\t\t\t\t\t\tkeccak4x/SIMD256-config.h\n\t$(CC) $(CFLAGS) -c keccak4x/KeccakP-1600-times4-SIMD256.c -o $@\n\nclean:\n\t-$(RM) keccak4x/KeccakP-1600-times4-SIMD256.o\n\t-$(RM) $(TESTS)\n\t-$(RM) $(BENCHMARK)\n\t-$(RM) PQCgenKAT_sign\n\t-$(RM) PQCsignKAT_*.rsp\n\t-$(RM) PQCsignKAT_*.req\n"
  },
  {
    "path": "shake-avx2/context.h",
    "content": "#ifndef SPX_CONTEXT_H\n#define SPX_CONTEXT_H\n\n#include <stdint.h>\n\n#include \"params.h\"\n\ntypedef struct {\n    uint8_t pub_seed[SPX_N];\n    uint8_t sk_seed[SPX_N];\n} spx_ctx;\n\n#endif\n"
  },
  {
    "path": "shake-avx2/fips202x4.c",
    "content": "#include <immintrin.h>\n#include <stdint.h>\n#include <assert.h>\n\n#include \"fips202.h\"\n#include \"fips202x4.h\"\n\n#define NROUNDS 24\n#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))\n\nstatic uint64_t load64(const unsigned char *x)\n{\n  unsigned long long r = 0, i;\n\n  for (i = 0; i < 8; ++i) {\n    r |= (unsigned long long)x[i] << 8 * i;\n  }\n  return r;\n}\n\nstatic void store64(uint8_t *x, uint64_t u)\n{\n  unsigned int i;\n\n  for(i=0; i<8; ++i) {\n    x[i] = u;\n    u >>= 8;\n  }\n}\n\n/* Use implementation from the Keccak Code Package */\nextern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s);\n#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds\n\nstatic void keccak_absorb4x(__m256i *s,\n                          unsigned int r,\n                          const unsigned char *m0,\n                          const unsigned char *m1,\n                          const unsigned char *m2,\n                          const unsigned char *m3,\n                          unsigned long long int mlen,\n                          unsigned char p)\n{\n  unsigned long long i;\n  unsigned char t0[200];\n  unsigned char t1[200];\n  unsigned char t2[200];\n  unsigned char t3[200];\n\n  unsigned long long *ss = (unsigned long long *)s;\n\n\n  while (mlen >= r)\n  {\n    for (i = 0; i < r / 8; ++i)\n    {\n      ss[4*i+0] ^= load64(m0 + 8 * i);\n      ss[4*i+1] ^= load64(m1 + 8 * i);\n      ss[4*i+2] ^= load64(m2 + 8 * i);\n      ss[4*i+3] ^= load64(m3 + 8 * i);\n    }\n\n    KeccakF1600_StatePermute4x(s);\n    mlen -= r;\n    m0 += r;\n    m1 += r;\n    m2 += r;\n    m3 += r;\n  }\n\n  for (i = 0; i < r; ++i)\n  {\n    t0[i] = 0;\n    t1[i] = 0;\n    t2[i] = 0;\n    t3[i] = 0;\n  }\n  for (i = 0; i < mlen; ++i)\n  {\n    t0[i] = m0[i];\n    t1[i] = m1[i];\n    t2[i] = m2[i];\n    t3[i] = m3[i];\n  }\n\n  t0[i] = p;\n  t1[i] = p;\n  t2[i] = p;\n  t3[i] = p;\n\n  t0[r - 1] |= 128;\n  t1[r - 1] |= 128;\n  t2[r - 1] |= 128;\n  t3[r - 1] |= 128;\n\n  for (i = 0; i < r / 8; ++i)\n  {\n    ss[4*i+0] ^= load64(t0 + 8 * i);\n    ss[4*i+1] ^= load64(t1 + 8 * i);\n    ss[4*i+2] ^= load64(t2 + 8 * i);\n    ss[4*i+3] ^= load64(t3 + 8 * i);\n  }\n}\n\n\nstatic void keccak_squeezeblocks4x(unsigned char *h0,\n                                   unsigned char *h1,\n                                   unsigned char *h2,\n                                   unsigned char *h3,\n                                   unsigned long long int nblocks,\n                                   __m256i *s,\n                                   unsigned int r)\n{\n  unsigned int i;\n\n  unsigned long long *ss = (unsigned long long *)s;\n\n  while(nblocks > 0)\n  {\n    KeccakF1600_StatePermute4x(s);\n    for(i=0;i<(r>>3);i++)\n    {\n      store64(h0+8*i, ss[4*i+0]);\n      store64(h1+8*i, ss[4*i+1]);\n      store64(h2+8*i, ss[4*i+2]);\n      store64(h3+8*i, ss[4*i+3]);\n    }\n    h0 += r;\n    h1 += r;\n    h2 += r;\n    h3 += r;\n    nblocks--;\n  }\n}\n\n\n\nvoid shake128x4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3, unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned char *in2,\n                unsigned char *in3, unsigned long long inlen)\n{\n  __m256i s[25];\n  unsigned char t0[SHAKE128_RATE];\n  unsigned char t1[SHAKE128_RATE];\n  unsigned char t2[SHAKE128_RATE];\n  unsigned char t3[SHAKE128_RATE];\n  unsigned int i;\n\n  /* zero state */\n  for(i=0;i<25;i++)\n    s[i] = _mm256_xor_si256(s[i], s[i]);\n\n  /* absorb 4 message of identical length in parallel */\n  keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F);\n\n  /* Squeeze output */\n  keccak_squeezeblocks4x(out0, out1, out2, out3, outlen/SHAKE128_RATE, s, SHAKE128_RATE);\n\n  out0 += (outlen/SHAKE128_RATE)*SHAKE128_RATE;\n  out1 += (outlen/SHAKE128_RATE)*SHAKE128_RATE;\n  out2 += (outlen/SHAKE128_RATE)*SHAKE128_RATE;\n  out3 += (outlen/SHAKE128_RATE)*SHAKE128_RATE;\n\n  if(outlen%SHAKE128_RATE)\n  {\n    keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE);\n    for(i=0;i<outlen%SHAKE128_RATE;i++)\n    {\n      out0[i] = t0[i];\n      out1[i] = t1[i];\n      out2[i] = t2[i];\n      out3[i] = t3[i];\n    }\n  }\n}\n\n\nvoid shake256x4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3, unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned char *in2,\n                unsigned char *in3, unsigned long long inlen)\n{\n  __m256i s[25];\n  unsigned char t0[SHAKE256_RATE];\n  unsigned char t1[SHAKE256_RATE];\n  unsigned char t2[SHAKE256_RATE];\n  unsigned char t3[SHAKE256_RATE];\n  unsigned int i;\n\n  /* zero state */\n  for(i=0;i<25;i++)\n    s[i] = _mm256_xor_si256(s[i], s[i]);\n\n  /* absorb 4 message of identical length in parallel */\n  keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F);\n\n  /* Squeeze output */\n  keccak_squeezeblocks4x(out0, out1, out2, out3, outlen/SHAKE256_RATE, s, SHAKE256_RATE);\n\n  out0 += (outlen/SHAKE256_RATE)*SHAKE256_RATE;\n  out1 += (outlen/SHAKE256_RATE)*SHAKE256_RATE;\n  out2 += (outlen/SHAKE256_RATE)*SHAKE256_RATE;\n  out3 += (outlen/SHAKE256_RATE)*SHAKE256_RATE;\n\n  if(outlen%SHAKE256_RATE)\n  {\n    keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE);\n    for(i=0;i<outlen%SHAKE256_RATE;i++)\n    {\n      out0[i] = t0[i];\n      out1[i] = t1[i];\n      out2[i] = t2[i];\n      out3[i] = t3[i];\n    }\n  }\n}\n"
  },
  {
    "path": "shake-avx2/fips202x4.h",
    "content": "#ifndef SPX_FIPS202X4_H\n#define SPX_FIPS202X4_H\n\n#include <immintrin.h>\n\nvoid shake128x4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3, unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned char *in2,\n                unsigned char *in3, unsigned long long inlen);\n\nvoid shake256x4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3, unsigned long long outlen,\n                unsigned char *in0,\n                unsigned char *in1,\n                unsigned char *in2,\n                unsigned char *in3, unsigned long long inlen);\n\n#endif\n"
  },
  {
    "path": "shake-avx2/fors.c",
    "content": "#include <stdlib.h>\n#include <stdint.h>\n#include <string.h>\n\n#include \"fors.h\"\n#include \"utils.h\"\n#include \"utilsx4.h\"\n#include \"hash.h\"\n#include \"hashx4.h\"\n#include \"thash.h\"\n#include \"thashx4.h\"\n#include \"address.h\"\n\nstatic void fors_gen_sk(unsigned char *sk, const spx_ctx *ctx,\n                        uint32_t fors_leaf_addr[8])\n{\n    prf_addr(sk, ctx, fors_leaf_addr);\n}\n\nstatic void fors_gen_skx4(unsigned char *sk0,\n                          unsigned char *sk1,\n                          unsigned char *sk2,\n                          unsigned char *sk3, const spx_ctx *ctx,\n                          uint32_t fors_leaf_addrx4[4*8])\n{\n    prf_addrx4(sk0, sk1, sk2, sk3,\n               ctx, fors_leaf_addrx4);\n}\n\nstatic void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk,\n                            const spx_ctx *ctx,\n                            uint32_t fors_leaf_addr[8])\n{\n    thash(leaf, sk, 1, ctx, fors_leaf_addr);\n}\n\nstatic void fors_sk_to_leafx4(unsigned char *leaf0,\n                              unsigned char *leaf1,\n                              unsigned char *leaf2,\n                              unsigned char *leaf3,\n                              const unsigned char *sk0,\n                              const unsigned char *sk1,\n                              const unsigned char *sk2,\n                              const unsigned char *sk3,\n                              const spx_ctx *ctx,\n                              uint32_t fors_leaf_addrx4[4*8])\n{\n    thashx4(leaf0, leaf1, leaf2, leaf3,\n            sk0, sk1, sk2, sk3,\n            1, ctx, fors_leaf_addrx4);\n}\n\nstruct fors_gen_leaf_info {\n    uint32_t leaf_addrx[4*8];\n};\n\nstatic void fors_gen_leafx4(unsigned char *leaf,\n                            const spx_ctx *ctx,\n                            uint32_t addr_idx, void *info)\n{\n    struct fors_gen_leaf_info *fors_info = info;\n    uint32_t *fors_leaf_addrx4 = fors_info->leaf_addrx;\n    unsigned int j;\n\n    /* Only set the parts that the caller doesn't set */\n    for (j = 0; j < 4; j++) {\n        set_tree_index(fors_leaf_addrx4 + j*8, addr_idx + j);\n        set_type(fors_leaf_addrx4 + j*8, SPX_ADDR_TYPE_FORSPRF);\n    }\n\n    fors_gen_skx4(leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 2*SPX_N,\n                  leaf + 3*SPX_N,\n                  ctx, fors_leaf_addrx4);\n\n    for (j = 0; j < 4; j++) {\n        set_type(fors_leaf_addrx4 + j*8, SPX_ADDR_TYPE_FORSTREE);\n    }\n\n    fors_sk_to_leafx4(leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 2*SPX_N,\n                  leaf + 3*SPX_N,\n                  leaf + 0*SPX_N,\n                  leaf + 1*SPX_N,\n                  leaf + 2*SPX_N,\n                  leaf + 3*SPX_N,\n                  ctx, fors_leaf_addrx4);\n}\n\n/**\n * Interprets m as SPX_FORS_HEIGHT-bit unsigned integers.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n * Assumes indices has space for SPX_FORS_TREES integers.\n */\nstatic void message_to_indices(uint32_t *indices, const unsigned char *m)\n{\n    unsigned int i, j;\n    unsigned int offset = 0;\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        indices[i] = 0;\n        for (j = 0; j < SPX_FORS_HEIGHT; j++) {\n            indices[i] ^= ((m[offset >> 3] >> (offset & 0x7)) & 0x1) << j;\n            offset++;\n        }\n    }\n}\n\n/**\n * Signs a message m, deriving the secret key from sk_seed and the FTS address.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_sign(unsigned char *sig, unsigned char *pk,\n               const unsigned char *m,\n               const spx_ctx *ctx,\n               const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    uint32_t fors_tree_addr[4*8] = {0};\n    struct fors_gen_leaf_info fors_info = {0};\n    uint32_t *fors_leaf_addr = fors_info.leaf_addrx;\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    for (i=0; i<4; i++) {\n        copy_keypair_addr(fors_tree_addr + 8*i, fors_addr);\n        set_type(fors_tree_addr + 8*i, SPX_ADDR_TYPE_FORSTREE);\n        copy_keypair_addr(fors_leaf_addr + 8*i, fors_addr);\n    }\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Include the secret key part that produces the selected leaf node. */\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSPRF);\n        fors_gen_sk(sig, ctx, fors_tree_addr);\n        set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n        sig += SPX_N;\n\n        /* Compute the authentication path for this leaf node. */\n        treehashx4(roots + i*SPX_N, sig, ctx,\n                 indices[i], idx_offset, SPX_FORS_HEIGHT, fors_gen_leafx4,\n                 fors_tree_addr, &fors_info);\n\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n\n/**\n * Derives the FORS public key from a signature.\n * This can be used for verification by comparing to a known public key, or to\n * subsequently verify a signature on the derived public key. The latter is the\n * typical use-case when used as an FTS below an OTS in a hypertree.\n * Assumes m contains at least SPX_FORS_HEIGHT * SPX_FORS_TREES bits.\n */\nvoid fors_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *m,\n                      const spx_ctx *ctx,\n                      const uint32_t fors_addr[8])\n{\n    uint32_t indices[SPX_FORS_TREES];\n    unsigned char roots[SPX_FORS_TREES * SPX_N];\n    unsigned char leaf[SPX_N];\n    uint32_t fors_tree_addr[8] = {0};\n    uint32_t fors_pk_addr[8] = {0};\n    uint32_t idx_offset;\n    unsigned int i;\n\n    copy_keypair_addr(fors_tree_addr, fors_addr);\n    copy_keypair_addr(fors_pk_addr, fors_addr);\n\n    set_type(fors_tree_addr, SPX_ADDR_TYPE_FORSTREE);\n    set_type(fors_pk_addr, SPX_ADDR_TYPE_FORSPK);\n\n    message_to_indices(indices, m);\n\n    for (i = 0; i < SPX_FORS_TREES; i++) {\n        idx_offset = i * (1 << SPX_FORS_HEIGHT);\n\n        set_tree_height(fors_tree_addr, 0);\n        set_tree_index(fors_tree_addr, indices[i] + idx_offset);\n\n        /* Derive the leaf from the included secret key part. */\n        fors_sk_to_leaf(leaf, sig, ctx, fors_tree_addr);\n        sig += SPX_N;\n\n        /* Derive the corresponding root node of this tree. */\n        compute_root(roots + i*SPX_N, leaf, indices[i], idx_offset,\n                     sig, SPX_FORS_HEIGHT, ctx, fors_tree_addr);\n        sig += SPX_N * SPX_FORS_HEIGHT;\n    }\n\n    /* Hash horizontally across all tree roots to derive the public key. */\n    thash(pk, roots, SPX_FORS_TREES, ctx, fors_pk_addr);\n}\n"
  },
  {
    "path": "shake-avx2/hash_shakex4.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"address.h\"\n#include \"params.h\"\n#include \"fips202x4.h\"\n#include \"hashx4.h\"\n\nextern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s);\n\n/*\n * 4-way parallel version of prf_addr; takes 4x as much input and output\n */\nvoid prf_addrx4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3,\n                const spx_ctx *ctx,\n                const uint32_t addrx4[4*8]) {\n    /* As we write and read only a few quadwords, it is more efficient to\n     * build and extract from the fourway SHAKE256 state by hand. */\n    __m256i state[25];\n    \n    for (int i = 0; i < SPX_N/8; i++) {\n        state[i] = _mm256_set1_epi64x(((int64_t*)ctx->pub_seed)[i]);\n    }\n    for (int i = 0; i < 4; i++) {\n        state[SPX_N/8+i] = _mm256_set_epi32(\n            addrx4[3*8+1+2*i],\n            addrx4[3*8+2*i],\n            addrx4[2*8+1+2*i],\n            addrx4[2*8+2*i],\n            addrx4[8+1+2*i],\n            addrx4[8+2*i],\n            addrx4[1+2*i],\n            addrx4[2*i]\n        );\n    }\n    for (int i = 0; i < SPX_N/8; i++) {\n        state[SPX_N/8+i+4] = _mm256_set1_epi64x(((int64_t*)ctx->sk_seed)[i]);\n    }\n\n    /* SHAKE domain separator and padding. */\n    state[SPX_N/4+4] = _mm256_set1_epi64x(0x1f);\n    for (int i = SPX_N/4+5; i < 16; i++) {\n        state[i] = _mm256_set1_epi64x(0);\n    }\n    // shift unsigned and then cast to avoid UB\n    state[16] = _mm256_set1_epi64x((long long)(0x80ULL << 56));\n\n    for (int i = 17; i < 25; i++) {\n        state[i] = _mm256_set1_epi64x(0);\n    }\n\n    KeccakP1600times4_PermuteAll_24rounds(&state[0]);\n\n    for (int i = 0; i < SPX_N/8; i++) {\n        ((int64_t*)out0)[i] = _mm256_extract_epi64(state[i], 0);\n        ((int64_t*)out1)[i] = _mm256_extract_epi64(state[i], 1);\n        ((int64_t*)out2)[i] = _mm256_extract_epi64(state[i], 2);\n        ((int64_t*)out3)[i] = _mm256_extract_epi64(state[i], 3);\n    }\n}\n"
  },
  {
    "path": "shake-avx2/hashx4.h",
    "content": "#ifndef SPX_HASHX4_H\n#define SPX_HASHX4_H\n\n#include <stdint.h>\n#include \"context.h\"\n#include \"params.h\"\n\n#define prf_addrx4 SPX_NAMESPACE(prf_addrx4)\nvoid prf_addrx4(unsigned char *out0,\n                unsigned char *out1,\n                unsigned char *out2,\n                unsigned char *out3,\n                const spx_ctx *ctx,\n                const uint32_t addrx4[4*8]);\n\n#endif\n"
  },
  {
    "path": "shake-avx2/keccak4x/KeccakP-1600-times4-SIMD256.c",
    "content": "/*\nImplementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,\nJoan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby\ndenoted as \"the implementer\".\n\nFor more information, feedback or questions, please refer to our websites:\nhttp://keccak.noekeon.org/\nhttp://keyak.noekeon.org/\nhttp://ketje.noekeon.org/\n\nTo the extent possible under law, the implementer has waived all copyright\nand related or neighboring rights to the source code in this file.\nhttp://creativecommons.org/publicdomain/zero/1.0/\n*/\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <smmintrin.h>\n#include <wmmintrin.h>\n#include <immintrin.h>\n#include <emmintrin.h>\n#include \"align.h\"\n#include \"KeccakP-1600-times4-SnP.h\"\n#include \"SIMD256-config.h\"\n\n#include \"brg_endian.h\"\n#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)\n#error Expecting a little-endian platform\n#endif\n\ntypedef unsigned char UINT8;\ntypedef unsigned long long int UINT64;\ntypedef __m128i V128;\ntypedef __m256i V256;\n\n#define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex)\n\n#if defined(KeccakP1600times4_useAVX2)\n    #define ANDnu256(a, b)          _mm256_andnot_si256(a, b)\n    #define CONST256(a)             _mm256_load_si256((const V256 *)&(a))\n    #define CONST256_64(a)          (V256)_mm256_broadcast_sd((const double*)(&a))\n    #define LOAD256(a)              _mm256_load_si256((const V256 *)&(a))\n    #define LOAD256u(a)             _mm256_loadu_si256((const V256 *)&(a))\n    #define LOAD4_64(a, b, c, d)    _mm256_set_epi64x((UINT64)(a), (UINT64)(b), (UINT64)(c), (UINT64)(d))\n    #define ROL64in256(d, a, o)     d = _mm256_or_si256(_mm256_slli_epi64(a, o), _mm256_srli_epi64(a, 64-(o)))\n    #define ROL64in256_8(d, a)      d = _mm256_shuffle_epi8(a, CONST256(rho8))\n    #define ROL64in256_56(d, a)     d = _mm256_shuffle_epi8(a, CONST256(rho56))\nstatic const UINT64 rho8[4] = {0x0605040302010007, 0x0E0D0C0B0A09080F, 0x1615141312111017, 0x1E1D1C1B1A19181F};\nstatic const UINT64 rho56[4] = {0x0007060504030201, 0x080F0E0D0C0B0A09, 0x1017161514131211, 0x181F1E1D1C1B1A19};\n    #define STORE256(a, b)          _mm256_store_si256((V256 *)&(a), b)\n    #define STORE256u(a, b)         _mm256_storeu_si256((V256 *)&(a), b)\n    #define STORE2_128(ah, al, v)   _mm256_storeu2_m128d((V128*)&(ah), (V128*)&(al), v)\n    #define XOR256(a, b)            _mm256_xor_si256(a, b)\n    #define XOReq256(a, b)          a = _mm256_xor_si256(a, b)\n    #define UNPACKL( a, b )         _mm256_unpacklo_epi64((a), (b))\n    #define UNPACKH( a, b )         _mm256_unpackhi_epi64((a), (b))\n    #define PERM128( a, b, c )      (V256)_mm256_permute2f128_ps((__m256)(a), (__m256)(b), c)\n    #define SHUFFLE64( a, b, c )    (V256)_mm256_shuffle_pd((__m256d)(a), (__m256d)(b), c)\n\n    #define UNINTLEAVE()            lanesL01 = UNPACKL( lanes0, lanes1 ),                   \\\n                                    lanesH01 = UNPACKH( lanes0, lanes1 ),                   \\\n                                    lanesL23 = UNPACKL( lanes2, lanes3 ),                   \\\n                                    lanesH23 = UNPACKH( lanes2, lanes3 ),                   \\\n                                    lanes0 = PERM128( lanesL01, lanesL23, 0x20 ),           \\\n                                    lanes2 = PERM128( lanesL01, lanesL23, 0x31 ),           \\\n                                    lanes1 = PERM128( lanesH01, lanesH23, 0x20 ),           \\\n                                    lanes3 = PERM128( lanesH01, lanesH23, 0x31 )\n\n    #define INTLEAVE()              lanesL01 = PERM128( lanes0, lanes2, 0x20 ),             \\\n                                    lanesH01 = PERM128( lanes1, lanes3, 0x20 ),             \\\n                                    lanesL23 = PERM128( lanes0, lanes2, 0x31 ),             \\\n                                    lanesH23 = PERM128( lanes1, lanes3, 0x31 ),             \\\n                                    lanes0 = SHUFFLE64( lanesL01, lanesH01, 0x00 ),         \\\n                                    lanes1 = SHUFFLE64( lanesL01, lanesH01, 0x0F ),         \\\n                                    lanes2 = SHUFFLE64( lanesL23, lanesH23, 0x00 ),         \\\n                                    lanes3 = SHUFFLE64( lanesL23, lanesH23, 0x0F )\n\n#endif\n\n#define SnP_laneLengthInBytes 8\n\nvoid KeccakP1600times4_InitializeAll(void *states)\n{\n    memset(states, 0, KeccakP1600times4_statesSizeInBytes);\n}\n\nvoid KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)\n{\n    unsigned int sizeLeft = length;\n    unsigned int lanePosition = offset/SnP_laneLengthInBytes;\n    unsigned int offsetInLane = offset%SnP_laneLengthInBytes;\n    const unsigned char *curData = data;\n    UINT64 *statesAsLanes = (UINT64 *)states;\n\n    if ((sizeLeft > 0) && (offsetInLane != 0)) {\n        unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;\n        UINT64 lane = 0;\n        if (bytesInLane > sizeLeft)\n            bytesInLane = sizeLeft;\n        memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);\n        statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;\n        sizeLeft -= bytesInLane;\n        lanePosition++;\n        curData += bytesInLane;\n    }\n\n    while(sizeLeft >= SnP_laneLengthInBytes) {\n        UINT64 lane = *((const UINT64*)curData);\n        statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;\n        sizeLeft -= SnP_laneLengthInBytes;\n        lanePosition++;\n        curData += SnP_laneLengthInBytes;\n    }\n\n    if (sizeLeft > 0) {\n        UINT64 lane = 0;\n        memcpy(&lane, curData, sizeLeft);\n        statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;\n    }\n}\n\nvoid KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)\n{\n    V256 *stateAsLanes = (V256 *)states;\n    unsigned int i;\n    const UINT64 *curData0 = (const UINT64 *)data;\n    const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);\n    const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes);\n    const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes);\n    V256    lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;\n\n    #define Xor_In( argIndex )  XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))\n\n    #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\\\n                                lanes1 = LOAD256u( curData1[argIndex]),\\\n                                lanes2 = LOAD256u( curData2[argIndex]),\\\n                                lanes3 = LOAD256u( curData3[argIndex]),\\\n                                INTLEAVE(),\\\n                                XOReq256( stateAsLanes[argIndex+0], lanes0 ),\\\n                                XOReq256( stateAsLanes[argIndex+1], lanes1 ),\\\n                                XOReq256( stateAsLanes[argIndex+2], lanes2 ),\\\n                                XOReq256( stateAsLanes[argIndex+3], lanes3 )\n\n    if ( laneCount >= 16 )  {\n        Xor_In4( 0 );\n        Xor_In4( 4 );\n        Xor_In4( 8 );\n        Xor_In4( 12 );\n        if ( laneCount >= 20 )  {\n            Xor_In4( 16 );\n            for(i=20; i<laneCount; i++)\n                Xor_In( i );\n        }\n        else {\n            for(i=16; i<laneCount; i++)\n                Xor_In( i );\n        }\n    }\n    else {\n        for(i=0; i<laneCount; i++)\n            Xor_In( i );\n    }\n    #undef  Xor_In\n    #undef  Xor_In4\n}\n\nvoid KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)\n{\n    unsigned int sizeLeft = length;\n    unsigned int lanePosition = offset/SnP_laneLengthInBytes;\n    unsigned int offsetInLane = offset%SnP_laneLengthInBytes;\n    const unsigned char *curData = data;\n    UINT64 *statesAsLanes = (UINT64 *)states;\n\n    if ((sizeLeft > 0) && (offsetInLane != 0)) {\n        unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;\n        if (bytesInLane > sizeLeft)\n            bytesInLane = sizeLeft;\n        memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);\n        sizeLeft -= bytesInLane;\n        lanePosition++;\n        curData += bytesInLane;\n    }\n\n    while(sizeLeft >= SnP_laneLengthInBytes) {\n        UINT64 lane = *((const UINT64*)curData);\n        statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;\n        sizeLeft -= SnP_laneLengthInBytes;\n        lanePosition++;\n        curData += SnP_laneLengthInBytes;\n    }\n\n    if (sizeLeft > 0) {\n        memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);\n    }\n}\n\nvoid KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)\n{\n    V256 *stateAsLanes = (V256 *)states;\n    unsigned int i;\n    const UINT64 *curData0 = (const UINT64 *)data;\n    const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);\n    const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes);\n    const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes);\n    V256    lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;\n\n    #define OverWr( argIndex )  STORE256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))\n\n    #define OverWr4( argIndex )     lanes0 = LOAD256u( curData0[argIndex]),\\\n                                    lanes1 = LOAD256u( curData1[argIndex]),\\\n                                    lanes2 = LOAD256u( curData2[argIndex]),\\\n                                    lanes3 = LOAD256u( curData3[argIndex]),\\\n                                    INTLEAVE(),\\\n                                    STORE256( stateAsLanes[argIndex+0], lanes0 ),\\\n                                    STORE256( stateAsLanes[argIndex+1], lanes1 ),\\\n                                    STORE256( stateAsLanes[argIndex+2], lanes2 ),\\\n                                    STORE256( stateAsLanes[argIndex+3], lanes3 )\n\n    if ( laneCount >= 16 )  {\n        OverWr4( 0 );\n        OverWr4( 4 );\n        OverWr4( 8 );\n        OverWr4( 12 );\n        if ( laneCount >= 20 )  {\n            OverWr4( 16 );\n            for(i=20; i<laneCount; i++)\n                OverWr( i );\n        }\n        else {\n            for(i=16; i<laneCount; i++)\n                OverWr( i );\n        }\n    }\n    else {\n        for(i=0; i<laneCount; i++)\n            OverWr( i );\n    }\n    #undef  OverWr\n    #undef  OverWr4\n}\n\nvoid KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)\n{\n    unsigned int sizeLeft = byteCount;\n    unsigned int lanePosition = 0;\n    UINT64 *statesAsLanes = (UINT64 *)states;\n\n    while(sizeLeft >= SnP_laneLengthInBytes) {\n        statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;\n        sizeLeft -= SnP_laneLengthInBytes;\n        lanePosition++;\n    }\n\n    if (sizeLeft > 0) {\n        memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);\n    }\n}\n\nvoid KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)\n{\n    unsigned int sizeLeft = length;\n    unsigned int lanePosition = offset/SnP_laneLengthInBytes;\n    unsigned int offsetInLane = offset%SnP_laneLengthInBytes;\n    unsigned char *curData = data;\n    const UINT64 *statesAsLanes = (const UINT64 *)states;\n\n    if ((sizeLeft > 0) && (offsetInLane != 0)) {\n        unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;\n        if (bytesInLane > sizeLeft)\n            bytesInLane = sizeLeft;\n        memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);\n        sizeLeft -= bytesInLane;\n        lanePosition++;\n        curData += bytesInLane;\n    }\n\n    while(sizeLeft >= SnP_laneLengthInBytes) {\n        *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];\n        sizeLeft -= SnP_laneLengthInBytes;\n        lanePosition++;\n        curData += SnP_laneLengthInBytes;\n    }\n\n    if (sizeLeft > 0) {\n        memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);\n    }\n}\n\nvoid KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)\n{\n    UINT64 *curData0 = (UINT64 *)data;\n    UINT64 *curData1 = (UINT64 *)(data+laneOffset*1*SnP_laneLengthInBytes);\n    UINT64 *curData2 = (UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes);\n    UINT64 *curData3 = (UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes);\n\n    const V256 *stateAsLanes = (const V256 *)states;\n    const UINT64 *stateAsLanes64 = (const UINT64*)states;\n    V256    lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;\n    unsigned int i;\n\n    #define Extr( argIndex )    curData0[argIndex] = stateAsLanes64[4*(argIndex)],      \\\n                                curData1[argIndex] = stateAsLanes64[4*(argIndex)+1],    \\\n                                curData2[argIndex] = stateAsLanes64[4*(argIndex)+2],    \\\n                                curData3[argIndex] = stateAsLanes64[4*(argIndex)+3]\n\n    #define Extr4( argIndex )   lanes0 = LOAD256( stateAsLanes[argIndex+0] ),           \\\n                                lanes1 = LOAD256( stateAsLanes[argIndex+1] ),           \\\n                                lanes2 = LOAD256( stateAsLanes[argIndex+2] ),           \\\n                                lanes3 = LOAD256( stateAsLanes[argIndex+3] ),           \\\n                                UNINTLEAVE(),                                           \\\n                                STORE256u( curData0[argIndex], lanes0 ),                \\\n                                STORE256u( curData1[argIndex], lanes1 ),                \\\n                                STORE256u( curData2[argIndex], lanes2 ),                \\\n                                STORE256u( curData3[argIndex], lanes3 )\n\n    if ( laneCount >= 16 )  {\n        Extr4( 0 );\n        Extr4( 4 );\n        Extr4( 8 );\n        Extr4( 12 );\n        if ( laneCount >= 20 )  {\n            Extr4( 16 );\n            for(i=20; i<laneCount; i++)\n                Extr( i );\n        }\n        else {\n            for(i=16; i<laneCount; i++)\n                Extr( i );\n        }\n    }\n    else {\n        for(i=0; i<laneCount; i++)\n            Extr( i );\n    }\n    #undef  Extr\n    #undef  Extr4\n}\n\nvoid KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)\n{\n    unsigned int sizeLeft = length;\n    unsigned int lanePosition = offset/SnP_laneLengthInBytes;\n    unsigned int offsetInLane = offset%SnP_laneLengthInBytes;\n    const unsigned char *curInput = input;\n    unsigned char *curOutput = output;\n    const UINT64 *statesAsLanes = (const UINT64 *)states;\n\n    if ((sizeLeft > 0) && (offsetInLane != 0)) {\n        unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;\n        UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);\n        if (bytesInLane > sizeLeft)\n            bytesInLane = sizeLeft;\n        sizeLeft -= bytesInLane;\n        do {\n            *(curOutput++) = *(curInput++) ^ (unsigned char)lane;\n            lane >>= 8;\n        } while ( --bytesInLane != 0);\n        lanePosition++;\n    }\n\n    while(sizeLeft >= SnP_laneLengthInBytes) {\n        *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];\n        sizeLeft -= SnP_laneLengthInBytes;\n        lanePosition++;\n        curInput += SnP_laneLengthInBytes;\n        curOutput += SnP_laneLengthInBytes;\n    }\n\n    if (sizeLeft != 0) {\n        UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];\n        do {\n            *(curOutput++) = *(curInput++) ^ (unsigned char)lane;\n            lane >>= 8;\n        } while ( --sizeLeft != 0);\n    }\n}\n\nvoid KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)\n{\n    const UINT64 *curInput0 = (UINT64 *)input;\n    const UINT64 *curInput1 = (UINT64 *)(input+laneOffset*1*SnP_laneLengthInBytes);\n    const UINT64 *curInput2 = (UINT64 *)(input+laneOffset*2*SnP_laneLengthInBytes);\n    const UINT64 *curInput3 = (UINT64 *)(input+laneOffset*3*SnP_laneLengthInBytes);\n    UINT64 *curOutput0 = (UINT64 *)output;\n    UINT64 *curOutput1 = (UINT64 *)(output+laneOffset*1*SnP_laneLengthInBytes);\n    UINT64 *curOutput2 = (UINT64 *)(output+laneOffset*2*SnP_laneLengthInBytes);\n    UINT64 *curOutput3 = (UINT64 *)(output+laneOffset*3*SnP_laneLengthInBytes);\n\n    const V256 *stateAsLanes = (const V256 *)states;\n    const UINT64 *stateAsLanes64 = (const UINT64*)states;\n    V256    lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;\n    unsigned int i;\n\n    #define ExtrXor( argIndex ) \\\n                                curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes64[4*(argIndex)],\\\n                                curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes64[4*(argIndex)+1],\\\n                                curOutput2[argIndex] = curInput2[argIndex] ^ stateAsLanes64[4*(argIndex)+2],\\\n                                curOutput3[argIndex] = curInput3[argIndex] ^ stateAsLanes64[4*(argIndex)+3]\n\n    #define ExtrXor4( argIndex ) \\\n                                    lanes0 = LOAD256( stateAsLanes[argIndex+0] ),\\\n                                    lanes1 = LOAD256( stateAsLanes[argIndex+1] ),\\\n                                    lanes2 = LOAD256( stateAsLanes[argIndex+2] ),\\\n                                    lanes3 = LOAD256( stateAsLanes[argIndex+3] ),\\\n                                    UNINTLEAVE(),\\\n                                    lanesL01 = LOAD256u( curInput0[argIndex]),\\\n                                    lanesH01 = LOAD256u( curInput1[argIndex]),\\\n                                    lanesL23 = LOAD256u( curInput2[argIndex]),\\\n                                    lanesH23 = LOAD256u( curInput3[argIndex]),\\\n                                    XOReq256( lanes0, lanesL01 ),\\\n                                    XOReq256( lanes1, lanesH01 ),\\\n                                    XOReq256( lanes2, lanesL23 ),\\\n                                    XOReq256( lanes3, lanesH23 ),\\\n                                    STORE256u( curOutput0[argIndex], lanes0 ),\\\n                                    STORE256u( curOutput1[argIndex], lanes1 ),\\\n                                    STORE256u( curOutput2[argIndex], lanes2 ),\\\n                                    STORE256u( curOutput3[argIndex], lanes3 )\n\n    if ( laneCount >= 16 )  {\n        ExtrXor4( 0 );\n        ExtrXor4( 4 );\n        ExtrXor4( 8 );\n        ExtrXor4( 12 );\n        if ( laneCount >= 20 )  {\n            ExtrXor4( 16 );\n            for(i=20; i<laneCount; i++)\n                ExtrXor( i );\n        }\n        else {\n            for(i=16; i<laneCount; i++)\n                ExtrXor( i );\n        }\n    }\n    else {\n        for(i=0; i<laneCount; i++)\n            ExtrXor( i );\n    }\n    #undef  ExtrXor\n    #undef  ExtrXor4\n}\n\n#define declareABCDE \\\n    V256 Aba, Abe, Abi, Abo, Abu; \\\n    V256 Aga, Age, Agi, Ago, Agu; \\\n    V256 Aka, Ake, Aki, Ako, Aku; \\\n    V256 Ama, Ame, Ami, Amo, Amu; \\\n    V256 Asa, Ase, Asi, Aso, Asu; \\\n    V256 Bba, Bbe, Bbi, Bbo, Bbu; \\\n    V256 Bga, Bge, Bgi, Bgo, Bgu; \\\n    V256 Bka, Bke, Bki, Bko, Bku; \\\n    V256 Bma, Bme, Bmi, Bmo, Bmu; \\\n    V256 Bsa, Bse, Bsi, Bso, Bsu; \\\n    V256 Ca, Ce, Ci, Co, Cu; \\\n    V256 Ca1, Ce1, Ci1, Co1, Cu1; \\\n    V256 Da, De, Di, Do, Du; \\\n    V256 Eba, Ebe, Ebi, Ebo, Ebu; \\\n    V256 Ega, Ege, Egi, Ego, Egu; \\\n    V256 Eka, Eke, Eki, Eko, Eku; \\\n    V256 Ema, Eme, Emi, Emo, Emu; \\\n    V256 Esa, Ese, Esi, Eso, Esu; \\\n\n#define prepareTheta \\\n    Ca = XOR256(Aba, XOR256(Aga, XOR256(Aka, XOR256(Ama, Asa)))); \\\n    Ce = XOR256(Abe, XOR256(Age, XOR256(Ake, XOR256(Ame, Ase)))); \\\n    Ci = XOR256(Abi, XOR256(Agi, XOR256(Aki, XOR256(Ami, Asi)))); \\\n    Co = XOR256(Abo, XOR256(Ago, XOR256(Ako, XOR256(Amo, Aso)))); \\\n    Cu = XOR256(Abu, XOR256(Agu, XOR256(Aku, XOR256(Amu, Asu)))); \\\n\n/* --- Theta Rho Pi Chi Iota Prepare-theta */\n/* --- 64-bit lanes mapped to 64-bit words */\n#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \\\n    ROL64in256(Ce1, Ce, 1); \\\n    Da = XOR256(Cu, Ce1); \\\n    ROL64in256(Ci1, Ci, 1); \\\n    De = XOR256(Ca, Ci1); \\\n    ROL64in256(Co1, Co, 1); \\\n    Di = XOR256(Ce, Co1); \\\n    ROL64in256(Cu1, Cu, 1); \\\n    Do = XOR256(Ci, Cu1); \\\n    ROL64in256(Ca1, Ca, 1); \\\n    Du = XOR256(Co, Ca1); \\\n\\\n    XOReq256(A##ba, Da); \\\n    Bba = A##ba; \\\n    XOReq256(A##ge, De); \\\n    ROL64in256(Bbe, A##ge, 44); \\\n    XOReq256(A##ki, Di); \\\n    ROL64in256(Bbi, A##ki, 43); \\\n    E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); \\\n    XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); \\\n    Ca = E##ba; \\\n    XOReq256(A##mo, Do); \\\n    ROL64in256(Bbo, A##mo, 21); \\\n    E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); \\\n    Ce = E##be; \\\n    XOReq256(A##su, Du); \\\n    ROL64in256(Bbu, A##su, 14); \\\n    E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); \\\n    Ci = E##bi; \\\n    E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); \\\n    Co = E##bo; \\\n    E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); \\\n    Cu = E##bu; \\\n\\\n    XOReq256(A##bo, Do); \\\n    ROL64in256(Bga, A##bo, 28); \\\n    XOReq256(A##gu, Du); \\\n    ROL64in256(Bge, A##gu, 20); \\\n    XOReq256(A##ka, Da); \\\n    ROL64in256(Bgi, A##ka, 3); \\\n    E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)); \\\n    XOReq256(Ca, E##ga); \\\n    XOReq256(A##me, De); \\\n    ROL64in256(Bgo, A##me, 45); \\\n    E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)); \\\n    XOReq256(Ce, E##ge); \\\n    XOReq256(A##si, Di); \\\n    ROL64in256(Bgu, A##si, 61); \\\n    E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)); \\\n    XOReq256(Ci, E##gi); \\\n    E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); \\\n    XOReq256(Co, E##go); \\\n    E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); \\\n    XOReq256(Cu, E##gu); \\\n\\\n    XOReq256(A##be, De); \\\n    ROL64in256(Bka, A##be, 1); \\\n    XOReq256(A##gi, Di); \\\n    ROL64in256(Bke, A##gi, 6); \\\n    XOReq256(A##ko, Do); \\\n    ROL64in256(Bki, A##ko, 25); \\\n    E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); \\\n    XOReq256(Ca, E##ka); \\\n    XOReq256(A##mu, Du); \\\n    ROL64in256_8(Bko, A##mu); \\\n    E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); \\\n    XOReq256(Ce, E##ke); \\\n    XOReq256(A##sa, Da); \\\n    ROL64in256(Bku, A##sa, 18); \\\n    E##ki = XOR256(Bki, ANDnu256(Bko, Bku)); \\\n    XOReq256(Ci, E##ki); \\\n    E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); \\\n    XOReq256(Co, E##ko); \\\n    E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); \\\n    XOReq256(Cu, E##ku); \\\n\\\n    XOReq256(A##bu, Du); \\\n    ROL64in256(Bma, A##bu, 27); \\\n    XOReq256(A##ga, Da); \\\n    ROL64in256(Bme, A##ga, 36); \\\n    XOReq256(A##ke, De); \\\n    ROL64in256(Bmi, A##ke, 10); \\\n    E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); \\\n    XOReq256(Ca, E##ma); \\\n    XOReq256(A##mi, Di); \\\n    ROL64in256(Bmo, A##mi, 15); \\\n    E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); \\\n    XOReq256(Ce, E##me); \\\n    XOReq256(A##so, Do); \\\n    ROL64in256_56(Bmu, A##so); \\\n    E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); \\\n    XOReq256(Ci, E##mi); \\\n    E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); \\\n    XOReq256(Co, E##mo); \\\n    E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); \\\n    XOReq256(Cu, E##mu); \\\n\\\n    XOReq256(A##bi, Di); \\\n    ROL64in256(Bsa, A##bi, 62); \\\n    XOReq256(A##go, Do); \\\n    ROL64in256(Bse, A##go, 55); \\\n    XOReq256(A##ku, Du); \\\n    ROL64in256(Bsi, A##ku, 39); \\\n    E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); \\\n    XOReq256(Ca, E##sa); \\\n    XOReq256(A##ma, Da); \\\n    ROL64in256(Bso, A##ma, 41); \\\n    E##se = XOR256(Bse, ANDnu256(Bsi, Bso)); \\\n    XOReq256(Ce, E##se); \\\n    XOReq256(A##se, De); \\\n    ROL64in256(Bsu, A##se, 2); \\\n    E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); \\\n    XOReq256(Ci, E##si); \\\n    E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); \\\n    XOReq256(Co, E##so); \\\n    E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); \\\n    XOReq256(Cu, E##su); \\\n\\\n\n/* --- Theta Rho Pi Chi Iota */\n/* --- 64-bit lanes mapped to 64-bit words */\n#define thetaRhoPiChiIota(i, A, E) \\\n    ROL64in256(Ce1, Ce, 1); \\\n    Da = XOR256(Cu, Ce1); \\\n    ROL64in256(Ci1, Ci, 1); \\\n    De = XOR256(Ca, Ci1); \\\n    ROL64in256(Co1, Co, 1); \\\n    Di = XOR256(Ce, Co1); \\\n    ROL64in256(Cu1, Cu, 1); \\\n    Do = XOR256(Ci, Cu1); \\\n    ROL64in256(Ca1, Ca, 1); \\\n    Du = XOR256(Co, Ca1); \\\n\\\n    XOReq256(A##ba, Da); \\\n    Bba = A##ba; \\\n    XOReq256(A##ge, De); \\\n    ROL64in256(Bbe, A##ge, 44); \\\n    XOReq256(A##ki, Di); \\\n    ROL64in256(Bbi, A##ki, 43); \\\n    E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); \\\n    XOReq256(E##ba, CONST256_64(KeccakF1600RoundConstants[i])); \\\n    XOReq256(A##mo, Do); \\\n    ROL64in256(Bbo, A##mo, 21); \\\n    E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); \\\n    XOReq256(A##su, Du); \\\n    ROL64in256(Bbu, A##su, 14); \\\n    E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); \\\n    E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); \\\n    E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); \\\n\\\n    XOReq256(A##bo, Do); \\\n    ROL64in256(Bga, A##bo, 28); \\\n    XOReq256(A##gu, Du); \\\n    ROL64in256(Bge, A##gu, 20); \\\n    XOReq256(A##ka, Da); \\\n    ROL64in256(Bgi, A##ka, 3); \\\n    E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)); \\\n    XOReq256(A##me, De); \\\n    ROL64in256(Bgo, A##me, 45); \\\n    E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)); \\\n    XOReq256(A##si, Di); \\\n    ROL64in256(Bgu, A##si, 61); \\\n    E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)); \\\n    E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); \\\n    E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); \\\n\\\n    XOReq256(A##be, De); \\\n    ROL64in256(Bka, A##be, 1); \\\n    XOReq256(A##gi, Di); \\\n    ROL64in256(Bke, A##gi, 6); \\\n    XOReq256(A##ko, Do); \\\n    ROL64in256(Bki, A##ko, 25); \\\n    E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); \\\n    XOReq256(A##mu, Du); \\\n    ROL64in256_8(Bko, A##mu); \\\n    E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); \\\n    XOReq256(A##sa, Da); \\\n    ROL64in256(Bku, A##sa, 18); \\\n    E##ki = XOR256(Bki, ANDnu256(Bko, Bku)); \\\n    E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); \\\n    E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); \\\n\\\n    XOReq256(A##bu, Du); \\\n    ROL64in256(Bma, A##bu, 27); \\\n    XOReq256(A##ga, Da); \\\n    ROL64in256(Bme, A##ga, 36); \\\n    XOReq256(A##ke, De); \\\n    ROL64in256(Bmi, A##ke, 10); \\\n    E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); \\\n    XOReq256(A##mi, Di); \\\n    ROL64in256(Bmo, A##mi, 15); \\\n    E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); \\\n    XOReq256(A##so, Do); \\\n    ROL64in256_56(Bmu, A##so); \\\n    E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); \\\n    E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); \\\n    E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); \\\n\\\n    XOReq256(A##bi, Di); \\\n    ROL64in256(Bsa, A##bi, 62); \\\n    XOReq256(A##go, Do); \\\n    ROL64in256(Bse, A##go, 55); \\\n    XOReq256(A##ku, Du); \\\n    ROL64in256(Bsi, A##ku, 39); \\\n    E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); \\\n    XOReq256(A##ma, Da); \\\n    ROL64in256(Bso, A##ma, 41); \\\n    E##se = XOR256(Bse, ANDnu256(Bsi, Bso)); \\\n    XOReq256(A##se, De); \\\n    ROL64in256(Bsu, A##se, 2); \\\n    E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); \\\n    E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); \\\n    E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); \\\n\\\n\nstatic ALIGN(KeccakP1600times4_statesAlignment) const UINT64 KeccakF1600RoundConstants[24] = {\n    0x0000000000000001ULL,\n    0x0000000000008082ULL,\n    0x800000000000808aULL,\n    0x8000000080008000ULL,\n    0x000000000000808bULL,\n    0x0000000080000001ULL,\n    0x8000000080008081ULL,\n    0x8000000000008009ULL,\n    0x000000000000008aULL,\n    0x0000000000000088ULL,\n    0x0000000080008009ULL,\n    0x000000008000000aULL,\n    0x000000008000808bULL,\n    0x800000000000008bULL,\n    0x8000000000008089ULL,\n    0x8000000000008003ULL,\n    0x8000000000008002ULL,\n    0x8000000000000080ULL,\n    0x000000000000800aULL,\n    0x800000008000000aULL,\n    0x8000000080008081ULL,\n    0x8000000000008080ULL,\n    0x0000000080000001ULL,\n    0x8000000080008008ULL};\n\n#define copyFromState(X, state) \\\n    X##ba = LOAD256(state[ 0]); \\\n    X##be = LOAD256(state[ 1]); \\\n    X##bi = LOAD256(state[ 2]); \\\n    X##bo = LOAD256(state[ 3]); \\\n    X##bu = LOAD256(state[ 4]); \\\n    X##ga = LOAD256(state[ 5]); \\\n    X##ge = LOAD256(state[ 6]); \\\n    X##gi = LOAD256(state[ 7]); \\\n    X##go = LOAD256(state[ 8]); \\\n    X##gu = LOAD256(state[ 9]); \\\n    X##ka = LOAD256(state[10]); \\\n    X##ke = LOAD256(state[11]); \\\n    X##ki = LOAD256(state[12]); \\\n    X##ko = LOAD256(state[13]); \\\n    X##ku = LOAD256(state[14]); \\\n    X##ma = LOAD256(state[15]); \\\n    X##me = LOAD256(state[16]); \\\n    X##mi = LOAD256(state[17]); \\\n    X##mo = LOAD256(state[18]); \\\n    X##mu = LOAD256(state[19]); \\\n    X##sa = LOAD256(state[20]); \\\n    X##se = LOAD256(state[21]); \\\n    X##si = LOAD256(state[22]); \\\n    X##so = LOAD256(state[23]); \\\n    X##su = LOAD256(state[24]); \\\n\n#define copyToState(state, X) \\\n    STORE256(state[ 0], X##ba); \\\n    STORE256(state[ 1], X##be); \\\n    STORE256(state[ 2], X##bi); \\\n    STORE256(state[ 3], X##bo); \\\n    STORE256(state[ 4], X##bu); \\\n    STORE256(state[ 5], X##ga); \\\n    STORE256(state[ 6], X##ge); \\\n    STORE256(state[ 7], X##gi); \\\n    STORE256(state[ 8], X##go); \\\n    STORE256(state[ 9], X##gu); \\\n    STORE256(state[10], X##ka); \\\n    STORE256(state[11], X##ke); \\\n    STORE256(state[12], X##ki); \\\n    STORE256(state[13], X##ko); \\\n    STORE256(state[14], X##ku); \\\n    STORE256(state[15], X##ma); \\\n    STORE256(state[16], X##me); \\\n    STORE256(state[17], X##mi); \\\n    STORE256(state[18], X##mo); \\\n    STORE256(state[19], X##mu); \\\n    STORE256(state[20], X##sa); \\\n    STORE256(state[21], X##se); \\\n    STORE256(state[22], X##si); \\\n    STORE256(state[23], X##so); \\\n    STORE256(state[24], X##su); \\\n\n#define copyStateVariables(X, Y) \\\n    X##ba = Y##ba; \\\n    X##be = Y##be; \\\n    X##bi = Y##bi; \\\n    X##bo = Y##bo; \\\n    X##bu = Y##bu; \\\n    X##ga = Y##ga; \\\n    X##ge = Y##ge; \\\n    X##gi = Y##gi; \\\n    X##go = Y##go; \\\n    X##gu = Y##gu; \\\n    X##ka = Y##ka; \\\n    X##ke = Y##ke; \\\n    X##ki = Y##ki; \\\n    X##ko = Y##ko; \\\n    X##ku = Y##ku; \\\n    X##ma = Y##ma; \\\n    X##me = Y##me; \\\n    X##mi = Y##mi; \\\n    X##mo = Y##mo; \\\n    X##mu = Y##mu; \\\n    X##sa = Y##sa; \\\n    X##se = Y##se; \\\n    X##si = Y##si; \\\n    X##so = Y##so; \\\n    X##su = Y##su; \\\n\n #ifdef KeccakP1600times4_fullUnrolling\n#define FullUnrolling\n#else\n#define Unrolling KeccakP1600times4_unrolling\n#endif\n#include \"KeccakP-1600-unrolling.macros\"\n\nvoid KeccakP1600times4_PermuteAll_24rounds(void *states)\n{\n    V256 *statesAsLanes = (V256 *)states;\n    declareABCDE\n    #ifndef KeccakP1600times4_fullUnrolling\n    unsigned int i;\n    #endif\n\n    copyFromState(A, statesAsLanes)\n    rounds24\n    copyToState(statesAsLanes, A)\n}\n\nvoid KeccakP1600times4_PermuteAll_12rounds(void *states)\n{\n    V256 *statesAsLanes = (V256 *)states;\n    declareABCDE\n    #ifndef KeccakP1600times4_fullUnrolling\n    unsigned int i;\n    #endif\n\n    copyFromState(A, statesAsLanes)\n    rounds12\n    copyToState(statesAsLanes, A)\n}\n\nsize_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)\n{\n    if (laneCount == 21) {\n#if 0\n        const unsigned char *dataStart = data;\n        const UINT64 *curData0 = (const UINT64 *)data;\n        const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes);\n        const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes);\n        const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes);\n\n        while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) {\n            V256 *stateAsLanes = (V256 *)states;\n            V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;\n            #define Xor_In( argIndex ) \\\n                XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))\n            #define Xor_In4( argIndex ) \\\n                lanes0 = LOAD256u( curData0[argIndex]),\\\n                lanes1 = LOAD256u( curData1[argIndex]),\\\n                lanes2 = LOAD256u( curData2[argIndex]),\\\n                lanes3 = LOAD256u( curData3[argIndex]),\\\n                INTLEAVE(),\\\n                XOReq256( stateAsLanes[argIndex+0], lanes0 ),\\\n                XOReq256( stateAsLanes[argIndex+1], lanes1 ),\\\n                XOReq256( stateAsLanes[argIndex+2], lanes2 ),\\\n                XOReq256( stateAsLanes[argIndex+3], lanes3 )\n            Xor_In4( 0 );\n            Xor_In4( 4 );\n            Xor_In4( 8 );\n            Xor_In4( 12 );\n            Xor_In4( 16 );\n            Xor_In( 20 );\n            #undef  Xor_In\n            #undef  Xor_In4\n            KeccakP1600times4_PermuteAll_24rounds(states);\n            curData0 += laneOffsetSerial;\n            curData1 += laneOffsetSerial;\n            curData2 += laneOffsetSerial;\n            curData3 += laneOffsetSerial;\n            dataByteLen -= laneOffsetSerial*8;\n        }\n        return (const unsigned char *)curData0 - dataStart;\n#else\n//        unsigned int i;\n        const unsigned char *dataStart = data;\n        const UINT64 *curData0 = (const UINT64 *)data;\n        const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes);\n        const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes);\n        const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes);\n        V256 *statesAsLanes = (V256 *)states;\n        declareABCDE\n\n        copyFromState(A, statesAsLanes)\n        while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) {\n            #define XOR_In( Xxx, argIndex ) \\\n                XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))\n            XOR_In( Aba, 0 );\n            XOR_In( Abe, 1 );\n            XOR_In( Abi, 2 );\n            XOR_In( Abo, 3 );\n            XOR_In( Abu, 4 );\n            XOR_In( Aga, 5 );\n            XOR_In( Age, 6 );\n            XOR_In( Agi, 7 );\n            XOR_In( Ago, 8 );\n            XOR_In( Agu, 9 );\n            XOR_In( Aka, 10 );\n            XOR_In( Ake, 11 );\n            XOR_In( Aki, 12 );\n            XOR_In( Ako, 13 );\n            XOR_In( Aku, 14 );\n            XOR_In( Ama, 15 );\n            XOR_In( Ame, 16 );\n            XOR_In( Ami, 17 );\n            XOR_In( Amo, 18 );\n            XOR_In( Amu, 19 );\n            XOR_In( Asa, 20 );\n            #undef XOR_In\n            rounds24\n            curData0 += laneOffsetSerial;\n            curData1 += laneOffsetSerial;\n            curData2 += laneOffsetSerial;\n            curData3 += laneOffsetSerial;\n            dataByteLen -= laneOffsetSerial*8;\n        }\n        copyToState(statesAsLanes, A)\n        return (const unsigned char *)curData0 - dataStart;\n#endif\n    }\n    else {\n//        unsigned int i;\n        const unsigned char *dataStart = data;\n\n        while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) {\n            KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);\n            KeccakP1600times4_PermuteAll_24rounds(states);\n            data += laneOffsetSerial*8;\n            dataByteLen -= laneOffsetSerial*8;\n        }\n        return data - dataStart;\n    }\n}\n\nsize_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)\n{\n    if (laneCount == 21) {\n#if 0\n        const unsigned char *dataStart = data;\n        const UINT64 *curData0 = (const UINT64 *)data;\n        const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes);\n        const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes);\n        const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes);\n\n        while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) {\n            V256 *stateAsLanes = states;\n            V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;\n            #define Xor_In( argIndex ) \\\n                XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))\n            #define Xor_In4( argIndex ) \\\n                lanes0 = LOAD256u( curData0[argIndex]),\\\n                lanes1 = LOAD256u( curData1[argIndex]),\\\n                lanes2 = LOAD256u( curData2[argIndex]),\\\n                lanes3 = LOAD256u( curData3[argIndex]),\\\n                INTLEAVE(),\\\n                XOReq256( stateAsLanes[argIndex+0], lanes0 ),\\\n                XOReq256( stateAsLanes[argIndex+1], lanes1 ),\\\n                XOReq256( stateAsLanes[argIndex+2], lanes2 ),\\\n                XOReq256( stateAsLanes[argIndex+3], lanes3 )\n            Xor_In4( 0 );\n            Xor_In4( 4 );\n            Xor_In4( 8 );\n            Xor_In4( 12 );\n            Xor_In4( 16 );\n            Xor_In( 20 );\n            #undef  Xor_In\n            #undef  Xor_In4\n            KeccakP1600times4_PermuteAll_12rounds(states);\n            curData0 += laneOffsetSerial;\n            curData1 += laneOffsetSerial;\n            curData2 += laneOffsetSerial;\n            curData3 += laneOffsetSerial;\n            dataByteLen -= laneOffsetSerial*8;\n        }\n        return (const unsigned char *)curData0 - dataStart;\n#else\n//        unsigned int i;\n        const unsigned char *dataStart = data;\n        const UINT64 *curData0 = (const UINT64 *)data;\n        const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes);\n        const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes);\n        const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes);\n        V256 *statesAsLanes = states;\n        declareABCDE\n\n        copyFromState(A, statesAsLanes)\n        while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) {\n            #define XOR_In( Xxx, argIndex ) \\\n                XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))\n            XOR_In( Aba, 0 );\n            XOR_In( Abe, 1 );\n            XOR_In( Abi, 2 );\n            XOR_In( Abo, 3 );\n            XOR_In( Abu, 4 );\n            XOR_In( Aga, 5 );\n            XOR_In( Age, 6 );\n            XOR_In( Agi, 7 );\n            XOR_In( Ago, 8 );\n            XOR_In( Agu, 9 );\n            XOR_In( Aka, 10 );\n            XOR_In( Ake, 11 );\n            XOR_In( Aki, 12 );\n            XOR_In( Ako, 13 );\n            XOR_In( Aku, 14 );\n            XOR_In( Ama, 15 );\n            XOR_In( Ame, 16 );\n            XOR_In( Ami, 17 );\n            XOR_In( Amo, 18 );\n            XOR_In( Amu, 19 );\n            XOR_In( Asa, 20 );\n            #undef XOR_In\n            rounds12\n            curData0 += laneOffsetSerial;\n            curData1 += laneOffsetSerial;\n            curData2 += laneOffsetSerial;\n            curData3 += laneOffsetSerial;\n            dataByteLen -= laneOffsetSerial*8;\n        }\n        copyToState(statesAsLanes, A)\n        return (const unsigned char *)curData0 - dataStart;\n#endif\n    }\n    else {\n//        unsigned int i;\n        const unsigned char *dataStart = data;\n\n        while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) {\n            KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);\n            KeccakP1600times4_PermuteAll_12rounds(states);\n            data += laneOffsetSerial*8;\n            dataByteLen -= laneOffsetSerial*8;\n        }\n        return data - dataStart;\n    }\n}\n"
  },
  {
    "path": "shake-avx2/keccak4x/KeccakP-1600-times4-SnP.h",
    "content": "/*\nImplementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,\nJoan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby\ndenoted as \"the implementer\".\n\nFor more information, feedback or questions, please refer to our websites:\nhttp://keccak.noekeon.org/\nhttp://keyak.noekeon.org/\nhttp://ketje.noekeon.org/\n\nTo the extent possible under law, the implementer has waived all copyright\nand related or neighboring rights to the source code in this file.\nhttp://creativecommons.org/publicdomain/zero/1.0/\n*/\n\n#ifndef _KeccakP_1600_times4_SnP_h_\n#define _KeccakP_1600_times4_SnP_h_\n\n/** For the documentation, see PlSnP-documentation.h.\n */\n\n#include \"SIMD256-config.h\"\n\n#define KeccakP1600times4_implementation        \"256-bit SIMD implementation (\" KeccakP1600times4_implementation_config \")\"\n#define KeccakP1600times4_statesSizeInBytes     800\n#define KeccakP1600times4_statesAlignment       32\n#define KeccakF1600times4_FastLoop_supported\n#define KeccakP1600times4_12rounds_FastLoop_supported\n\n#include <stddef.h>\n\n#define KeccakP1600times4_StaticInitialize()\nvoid KeccakP1600times4_InitializeAll(void *states);\n#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \\\n    ((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte)\nvoid KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);\nvoid KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);\nvoid KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);\nvoid KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);\nvoid KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);\nvoid KeccakP1600times4_PermuteAll_12rounds(void *states);\nvoid KeccakP1600times4_PermuteAll_24rounds(void *states);\nvoid KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);\nvoid KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);\nvoid KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex,  const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);\nvoid KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);\nsize_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);\nsize_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);\n\n#endif\n"
  },
  {
    "path": "shake-avx2/keccak4x/KeccakP-1600-unrolling.macros",
    "content": "/*\nImplementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,\nJoan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby\ndenoted as \"the implementer\".\n\nFor more information, feedback or questions, please refer to our websites:\nhttp://keccak.noekeon.org/\nhttp://keyak.noekeon.org/\nhttp://ketje.noekeon.org/\n\nTo the extent possible under law, the implementer has waived all copyright\nand related or neighboring rights to the source code in this file.\nhttp://creativecommons.org/publicdomain/zero/1.0/\n*/\n\n#if (defined(FullUnrolling))\n#define rounds24 \\\n    prepareTheta \\\n    thetaRhoPiChiIotaPrepareTheta( 0, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta( 1, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta( 2, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta( 3, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta( 4, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta( 5, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta( 6, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta( 7, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta( 8, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta( 9, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(10, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(11, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(12, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(13, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(14, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(15, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(16, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(17, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(18, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(19, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(20, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(21, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(22, A, E) \\\n    thetaRhoPiChiIota(23, E, A) \\\n\n#define rounds12 \\\n    prepareTheta \\\n    thetaRhoPiChiIotaPrepareTheta(12, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(13, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(14, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(15, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(16, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(17, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(18, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(19, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(20, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(21, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(22, A, E) \\\n    thetaRhoPiChiIota(23, E, A) \\\n\n#elif (Unrolling == 12)\n#define rounds24 \\\n    prepareTheta \\\n    for(i=0; i<24; i+=12) { \\\n        thetaRhoPiChiIotaPrepareTheta(i   , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \\\n    } \\\n\n#define rounds12 \\\n    prepareTheta \\\n    thetaRhoPiChiIotaPrepareTheta(12, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(13, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(14, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(15, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(16, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(17, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(18, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(19, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(20, A, E) \\\n    thetaRhoPiChiIotaPrepareTheta(21, E, A) \\\n    thetaRhoPiChiIotaPrepareTheta(22, A, E) \\\n    thetaRhoPiChiIota(23, E, A) \\\n\n#elif (Unrolling == 6)\n#define rounds24 \\\n    prepareTheta \\\n    for(i=0; i<24; i+=6) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \\\n    } \\\n\n#define rounds12 \\\n    prepareTheta \\\n    for(i=12; i<24; i+=6) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \\\n    } \\\n\n#elif (Unrolling == 4)\n#define rounds24 \\\n    prepareTheta \\\n    for(i=0; i<24; i+=4) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \\\n    } \\\n\n#define rounds12 \\\n    prepareTheta \\\n    for(i=12; i<24; i+=4) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \\\n    } \\\n\n#elif (Unrolling == 3)\n#define rounds24 \\\n    prepareTheta \\\n    for(i=0; i<24; i+=3) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \\\n        copyStateVariables(A, E) \\\n    } \\\n\n#define rounds12 \\\n    prepareTheta \\\n    for(i=12; i<24; i+=3) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \\\n        copyStateVariables(A, E) \\\n    } \\\n\n#elif (Unrolling == 2)\n#define rounds24 \\\n    prepareTheta \\\n    for(i=0; i<24; i+=2) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n    } \\\n\n#define rounds12 \\\n    prepareTheta \\\n    for(i=12; i<24; i+=2) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n    } \\\n\n#elif (Unrolling == 1)\n#define rounds24 \\\n    prepareTheta \\\n    for(i=0; i<24; i++) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        copyStateVariables(A, E) \\\n    } \\\n\n#define rounds12 \\\n    prepareTheta \\\n    for(i=12; i<24; i++) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        copyStateVariables(A, E) \\\n    } \\\n\n#else\n#error \"Unrolling is not correctly specified!\"\n#endif\n\n#define roundsN(__nrounds) \\\n    prepareTheta \\\n    i = 24 - (__nrounds); \\\n    if ((i&1) != 0) { \\\n        thetaRhoPiChiIotaPrepareTheta(i, A, E) \\\n        copyStateVariables(A, E) \\\n        ++i; \\\n    } \\\n    for( /* empty */; i<24; i+=2) { \\\n        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \\\n        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \\\n    }\n"
  },
  {
    "path": "shake-avx2/keccak4x/SIMD256-config.h",
    "content": "#define KeccakP1600times4_implementation_config \"AVX2, all rounds unrolled\"\n#define KeccakP1600times4_fullUnrolling\n#define KeccakP1600times4_useAVX2\n"
  },
  {
    "path": "shake-avx2/keccak4x/align.h",
    "content": "/*\nImplementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,\nJoan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby\ndenoted as \"the implementer\".\n\nFor more information, feedback or questions, please refer to our websites:\nhttp://keccak.noekeon.org/\nhttp://keyak.noekeon.org/\nhttp://ketje.noekeon.org/\n\nTo the extent possible under law, the implementer has waived all copyright\nand related or neighboring rights to the source code in this file.\nhttp://creativecommons.org/publicdomain/zero/1.0/\n*/\n\n#ifndef _align_h_\n#define _align_h_\n\n/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */\n#ifdef ALIGN\n#undef ALIGN\n#endif\n\n#if defined(__GNUC__)\n#define ALIGN(x) __attribute__ ((aligned(x)))\n#elif defined(_MSC_VER)\n#define ALIGN(x) __declspec(align(x))\n#elif defined(__ARMCC_VERSION)\n#define ALIGN(x) __align(x)\n#else\n#define ALIGN(x)\n#endif\n\n#endif\n"
  },
  {
    "path": "shake-avx2/keccak4x/brg_endian.h",
    "content": "/*\n ---------------------------------------------------------------------------\n Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.\n\n LICENSE TERMS\n\n The redistribution and use of this software (with or without changes)\n is allowed without the payment of fees or royalties provided that:\n\n  1. source code distributions include the above copyright notice, this\n     list of conditions and the following disclaimer;\n\n  2. binary distributions include the above copyright notice, this list\n     of conditions and the following disclaimer in their documentation;\n\n  3. the name of the copyright holder is not used to endorse products\n     built using this software without specific written permission.\n\n DISCLAIMER\n\n This software is provided 'as is' with no explicit or implied warranties\n in respect of its properties, including, but not limited to, correctness\n and/or fitness for purpose.\n ---------------------------------------------------------------------------\n Issue Date: 20/12/2007\n Changes for ARM 9/9/2010\n*/\n\n#ifndef _BRG_ENDIAN_H\n#define _BRG_ENDIAN_H\n\n#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */\n#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */\n\n#if 0\n/* Include files where endian defines and byteswap functions may reside */\n#if defined( __sun )\n#  include <sys/isa_defs.h>\n#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )\n#  include <sys/endian.h>\n#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \\\n      defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )\n#  include <machine/endian.h>\n#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )\n#  if !defined( __MINGW32__ ) && !defined( _AIX )\n#    include <endian.h>\n#    if !defined( __BEOS__ )\n#      include <byteswap.h>\n#    endif\n#  endif\n#endif\n#endif\n\n/* Now attempt to set the define for platform byte order using any  */\n/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which  */\n/* seem to encompass most endian symbol definitions                 */\n\n#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )\n#  if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN\n#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#  elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN\n#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#  endif\n#elif defined( BIG_ENDIAN )\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#elif defined( LITTLE_ENDIAN )\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#endif\n\n#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )\n#  if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN\n#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#  elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN\n#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#  endif\n#elif defined( _BIG_ENDIAN )\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#elif defined( _LITTLE_ENDIAN )\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#endif\n\n#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )\n#  if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN\n#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#  elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN\n#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#  endif\n#elif defined( __BIG_ENDIAN )\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#elif defined( __LITTLE_ENDIAN )\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#endif\n\n#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )\n#  if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__\n#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#  elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__\n#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#  endif\n#elif defined( __BIG_ENDIAN__ )\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#elif defined( __LITTLE_ENDIAN__ )\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#endif\n\n/*  if the platform byte order could not be determined, then try to */\n/*  set this define using common machine defines                    */\n#if !defined(PLATFORM_BYTE_ORDER)\n\n#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \\\n      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \\\n      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \\\n      defined( vax )       || defined( vms )     || defined( VMS )        || \\\n      defined( __VMS )     || defined( _M_X64 )\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n\n#elif defined( AMIGA )   || defined( applec )    || defined( __AS400__ )  || \\\n      defined( _CRAY )   || defined( __hppa )    || defined( __hp9000 )   || \\\n      defined( ibm370 )  || defined( mc68000 )   || defined( m68k )       || \\\n      defined( __MRC__ ) || defined( __MVS__ )   || defined( __MWERKS__ ) || \\\n      defined( sparc )   || defined( __sparc)    || defined( SYMANTEC_C ) || \\\n      defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM )   || \\\n      defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n\n#elif defined(__arm__)\n# ifdef __BIG_ENDIAN\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n# else\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n# endif\n#elif 1     /* **** EDIT HERE IF NECESSARY **** */\n#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN\n#elif 0     /* **** EDIT HERE IF NECESSARY **** */\n#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN\n#else\n#  error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order\n#endif\n\n#endif\n\n#endif\n"
  },
  {
    "path": "shake-avx2/merkle.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx4.h\"\n#include \"wots.h\"\n#include \"wotsx4.h\"\n#include \"merkle.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n/*\n * This generates a Merkle signature (WOTS signature followed by the Merkle\n * authentication path).\n */ \nvoid merkle_sign(uint8_t *sig, unsigned char *root,\n                 const spx_ctx* ctx,\n                 uint32_t wots_addr[8], uint32_t tree_addr[8],\n                 uint32_t idx_leaf)\n{\n    unsigned char *auth_path = sig + SPX_WOTS_BYTES;\n    uint32_t tree_addrx4[4*8] = { 0 };\n    int j;\n    struct leaf_info_x4 info = { 0 };\n    unsigned steps[ SPX_WOTS_LEN ];\n\n    info.wots_sig = sig;\n    chain_lengths(steps, root);\n    info.wots_steps = steps;\n\n    for (j=0; j<4; j++) {\n        set_type(&tree_addrx4[8*j], SPX_ADDR_TYPE_HASHTREE);\n        set_type(&info.leaf_addr[8*j], SPX_ADDR_TYPE_WOTS);\n        set_type(&info.pk_addr[8*j], SPX_ADDR_TYPE_WOTSPK);\n        copy_subtree_addr(&tree_addrx4[8*j], tree_addr);\n        copy_subtree_addr(&info.leaf_addr[8*j], wots_addr);\n        copy_subtree_addr(&info.pk_addr[8*j], wots_addr);\n    }\n\n    info.wots_sign_leaf = idx_leaf;\n\n    treehashx4(root, auth_path, ctx,\n                idx_leaf, 0,\n                SPX_TREE_HEIGHT,\n                wots_gen_leafx4,\n                tree_addrx4, &info);\n}\n\n/* Compute root node of the top-most subtree. */\nvoid merkle_gen_root(unsigned char *root, const spx_ctx *ctx)\n{\n    /* We do not need the auth path in key generation, but it simplifies the\n       code to have just one treehash routine that computes both root and path\n       in one function. */\n    unsigned char auth_path[SPX_TREE_HEIGHT * SPX_N + SPX_WOTS_BYTES];\n    uint32_t top_tree_addr[8] = {0};\n    uint32_t wots_addr[8] = {0};\n\n    set_layer_addr(top_tree_addr, SPX_D - 1);\n    set_layer_addr(wots_addr, SPX_D - 1);\n\n    merkle_sign(auth_path, root, ctx,\n                wots_addr, top_tree_addr,\n                ~0 /* ~0 means \"don't bother generating an auth path */ );\n}\n"
  },
  {
    "path": "shake-avx2/test/benchmark.c",
    "content": "#define _POSIX_C_SOURCE 199309L\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n\n#include \"../api.h\"\n#include \"../fors.h\"\n#include \"../wots.h\"\n#include \"../wotsx4.h\"\n#include \"../params.h\"\n#include \"../randombytes.h\"\n\n#define SPX_MLEN 32\n#define NTESTS 10\n\nstatic void wots_gen_pkx4(unsigned char *pk, const spx_ctx *ctx,\n        uint32_t addr[8]);\n\nstatic int cmp_llu(const void *a, const void*b)\n{\n  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;\n  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;\n  return 0;\n}\n\nstatic unsigned long long median(unsigned long long *l, size_t llen)\n{\n  qsort(l,llen,sizeof(unsigned long long),cmp_llu);\n\n  if(llen%2) return l[llen/2];\n  else return (l[llen/2-1]+l[llen/2])/2;\n}\n\nstatic void delta(unsigned long long *l, size_t llen)\n{\n    unsigned int i;\n    for(i = 0; i < llen - 1; i++) {\n        l[i] = l[i+1] - l[i];\n    }\n}\n\nstatic unsigned long long cpucycles(void)\n{\n  unsigned long long result;\n  __asm volatile(\".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax\"\n    : \"=a\" (result) ::  \"%rdx\");\n  return result;\n}\n\nstatic void printfcomma (unsigned long long n)\n{\n    if (n < 1000) {\n        printf(\"%llu\", n);\n        return;\n    }\n    printfcomma(n / 1000);\n    printf (\",%03llu\", n % 1000);\n}\n\nstatic void printfalignedcomma (unsigned long long n, int len)\n{\n    unsigned long long ncopy = n;\n    int i = 0;\n\n    while (ncopy > 9) {\n        len -= 1;\n        ncopy /= 10;\n        i += 1;  // to account for commas\n    }\n    i = i/3 - 1;  // to account for commas\n    for (; i < len; i++) {\n        printf(\" \");\n    }\n    printfcomma(n);\n}\n\nstatic void display_result(double result, unsigned long long *l, size_t llen, unsigned long long mul)\n{\n    unsigned long long med;\n\n    result /= NTESTS;\n    delta(l, NTESTS + 1);\n    med = median(l, llen);\n    printf(\"avg. %11.2lf us (%2.2lf sec); median \", result, result / 1e6);\n    printfalignedcomma(med, 12);\n    printf(\" cycles,  %5llux: \", mul);\n    printfalignedcomma(mul*med, 12);\n    printf(\" cycles\\n\");\n}\n\n#define MEASURE(TEXT, MUL, FNCALL)\\\n    printf(TEXT);\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);\\\n    for(i = 0; i < NTESTS; i++) {\\\n        t[i] = cpucycles();\\\n        FNCALL;\\\n    }\\\n    t[NTESTS] = cpucycles();\\\n    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop);\\\n    result = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) / 1e3;\\\n    display_result(result, t, NTESTS, MUL);\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    spx_ctx ctx;\n    unsigned char pk[SPX_PK_BYTES];\n    unsigned char sk[SPX_SK_BYTES];\n    unsigned char *m = malloc(SPX_MLEN);\n    unsigned char *sm = malloc(SPX_BYTES + SPX_MLEN);\n    unsigned char *mout = malloc(SPX_BYTES + SPX_MLEN);\n\n    unsigned char fors_pk[SPX_FORS_PK_BYTES];\n    unsigned char fors_m[SPX_FORS_MSG_BYTES];\n    unsigned char fors_sig[SPX_FORS_BYTES];\n    unsigned char addr[SPX_ADDR_BYTES];\n    unsigned char wots_pk[4*SPX_WOTS_PK_BYTES];\n\n    unsigned long long smlen;\n    unsigned long long mlen;\n    unsigned long long t[NTESTS+1];\n    struct timespec start, stop;\n    double result;\n    int i;\n\n    randombytes(m, SPX_MLEN);\n    randombytes(addr, SPX_ADDR_BYTES);\n\n    printf(\"Parameters: n = %d, h = %d, d = %d, b = %d, k = %d, w = %d\\n\",\n           SPX_N, SPX_FULL_HEIGHT, SPX_D, SPX_FORS_HEIGHT, SPX_FORS_TREES,\n           SPX_WOTS_W);\n\n    printf(\"Running %d iterations.\\n\", NTESTS);\n\n    MEASURE(\"Generating keypair.. \", 1, crypto_sign_keypair(pk, sk));\n    MEASURE(\"  - WOTS pk gen 4x.. \", (1 << SPX_TREE_HEIGHT) / 4, wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Signing..            \", 1, crypto_sign(sm, &smlen, m, SPX_MLEN, sk));\n    MEASURE(\"  - FORS signing..   \", 1, fors_sign(fors_sig, fors_pk, fors_m, &ctx, (uint32_t *) addr));\n    MEASURE(\"  - WOTS pk gen x4.. \", SPX_D * (1 << SPX_TREE_HEIGHT) / 4, wots_gen_pkx4(wots_pk, &ctx, (uint32_t *) addr));\n    MEASURE(\"Verifying..          \", 1, crypto_sign_open(mout, &mlen, sm, smlen, pk));\n\n    printf(\"Signature size: %d (%.2f KiB)\\n\", SPX_BYTES, SPX_BYTES / 1024.0);\n    printf(\"Public key size: %d (%.2f KiB)\\n\", SPX_PK_BYTES, SPX_PK_BYTES / 1024.0);\n    printf(\"Secret key size: %d (%.2f KiB)\\n\", SPX_SK_BYTES, SPX_SK_BYTES / 1024.0);\n\n    free(m);\n    free(sm);\n    free(mout);\n\n    return 0;\n}\n\nstatic void wots_gen_pkx4(unsigned char *pk, const spx_ctx *ctx, uint32_t addr[8]) {\n    struct leaf_info_x4 leaf;\n    unsigned steps[ SPX_WOTS_LEN ] = { 0 };\n    INITIALIZE_LEAF_INFO_X4(leaf, addr, steps);\n    wots_gen_leafx4(pk, ctx, 0, &leaf);\n}\n"
  },
  {
    "path": "shake-avx2/test/thashx4.c",
    "content": "#include <stdio.h>\n#include <string.h>\n\n#include \"../thashx4.h\"\n#include \"../thash.h\"\n#include \"../randombytes.h\"\n#include \"../params.h\"\n\nint main(void)\n{\n    /* Make stdout buffer more responsive. */\n    setbuf(stdout, NULL);\n\n    unsigned char input[4*SPX_N];\n    unsigned char output[4*SPX_N];\n    unsigned char out4[4*SPX_N];\n    uint32_t addr[4*8] = {0};\n    unsigned int j;\n    spx_ctx ctx;\n\n    randombytes(ctx.pub_seed, SPX_N);\n    randombytes(input, 4*SPX_N);\n    randombytes((unsigned char *)addr, 4 * 8 * sizeof(uint32_t));\n\n    printf(\"Testing if thash matches thashx4.. \");\n\n    for (j = 0; j < 4; j++) {\n        thash(out4 + j * SPX_N, input + j * SPX_N, 1, &ctx, addr + j*8);\n    }\n\n    thashx4(output + 0*SPX_N,\n            output + 1*SPX_N,\n            output + 2*SPX_N,\n            output + 3*SPX_N,\n            input + 0*SPX_N,\n            input + 1*SPX_N,\n            input + 2*SPX_N,\n            input + 3*SPX_N,\n            1, &ctx, addr);\n\n    if (memcmp(out4, output, 4 * SPX_N)) {\n        printf(\"failed!\\n\");\n        return -1;\n    }\n    printf(\"successful.\\n\");\n    return 0;\n}\n"
  },
  {
    "path": "shake-avx2/thash_shake_robustx4.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thashx4.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"fips202x4.h\"\n\nextern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s);\n\n/**\n * 4-way parallel version of thash; takes 4x as much input and output\n */\nvoid thashx4(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx4[4*8])\n{\n    if (inblocks == 1 || inblocks == 2) {\n        /* As we write and read only a few quadwords, it is more efficient to\n         * build and extract from the fourway SHAKE256 state by hand. */\n        __m256i state[25];\n        for (int i = 0; i < SPX_N/8; i++) {\n            state[i] = _mm256_set1_epi64x(((int64_t*)ctx->pub_seed)[i]);\n        }\n        for (int i = 0; i < 4; i++) {\n            state[SPX_N/8+i] = _mm256_set_epi32(\n                addrx4[3*8+1+2*i],\n                addrx4[3*8+2*i],\n                addrx4[2*8+1+2*i],\n                addrx4[2*8+2*i],\n                addrx4[8+1+2*i],\n                addrx4[8+2*i],\n                addrx4[1+2*i],\n                addrx4[2*i]\n            );\n        }\n\n        /* SHAKE domain separator and padding */\n        state[SPX_N/8+4] = _mm256_set1_epi64x(0x1f);\n        for (int i = SPX_N/8+5; i < 16; i++) {\n            state[i] = _mm256_set1_epi64x(0);\n        }\n        state[16] = _mm256_set1_epi64x((long long)(0x80ULL << 56));\n\n        for (int i = 17; i < 25; i++) {\n            state[i] = _mm256_set1_epi64x(0);\n        }\n\n        /* We will permutate state2 with f1600x4 to compute the bitmask,\n         * but first we'll copy it to state2 which will be used to compute\n         * the final output, as its input is alsmost identical. */\n        __m256i state2[25];\n        memcpy(state2, state, 800);\n\n        KeccakP1600times4_PermuteAll_24rounds(&state[0]);\n\n        /* By copying from state, state2 already contains the pub_seed\n         * and addres.  We just need to copy in the input blocks xorred with\n         * the bitmask we just computed. */\n        for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) {\n            state2[SPX_N/8+4+i] = _mm256_xor_si256(\n                    state[i],\n                    _mm256_set_epi64x(\n                        ((int64_t*)in3)[i],\n                        ((int64_t*)in2)[i],\n                        ((int64_t*)in1)[i],\n                        ((int64_t*)in0)[i]\n                    )\n                );\n        }\n\n        /* Domain separator and start of padding.  Note that the quadwords\n         * around are already zeroed for state from which we copied.\n         * We do a XOR instead of a set as this might be the 16th quadword\n         * when N=32 and inblocks=2, which already contains the end\n         * of the padding. */\n        state2[(SPX_N/8)*(1+inblocks)+4] = _mm256_xor_si256(\n            state2[(SPX_N/8)*(1+inblocks)+4],\n            _mm256_set1_epi64x(0x1f)\n        );\n\n        KeccakP1600times4_PermuteAll_24rounds(&state2[0]);\n\n        for (int i = 0; i < SPX_N/8; i++) {\n            ((int64_t*)out0)[i] = _mm256_extract_epi64(state2[i], 0);\n            ((int64_t*)out1)[i] = _mm256_extract_epi64(state2[i], 1);\n            ((int64_t*)out2)[i] = _mm256_extract_epi64(state2[i], 2);\n            ((int64_t*)out3)[i] = _mm256_extract_epi64(state2[i], 3);\n        }\n    } else {\n        SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, buf2, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, bitmask0, inblocks * SPX_N);\n        SPX_VLA(unsigned char, bitmask1, inblocks * SPX_N);\n        SPX_VLA(unsigned char, bitmask2, inblocks * SPX_N);\n        SPX_VLA(unsigned char, bitmask3, inblocks * SPX_N);\n        unsigned int i;\n\n        memcpy(buf0, ctx->pub_seed, SPX_N);\n        memcpy(buf1, ctx->pub_seed, SPX_N);\n        memcpy(buf2, ctx->pub_seed, SPX_N);\n        memcpy(buf3, ctx->pub_seed, SPX_N);\n        memcpy(buf0 + SPX_N, addrx4 + 0*8, SPX_ADDR_BYTES);\n        memcpy(buf1 + SPX_N, addrx4 + 1*8, SPX_ADDR_BYTES);\n        memcpy(buf2 + SPX_N, addrx4 + 2*8, SPX_ADDR_BYTES);\n        memcpy(buf3 + SPX_N, addrx4 + 3*8, SPX_ADDR_BYTES);\n\n        shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, inblocks * SPX_N,\n                   buf0, buf1, buf2, buf3, SPX_N + SPX_ADDR_BYTES);\n\n        for (i = 0; i < inblocks * SPX_N; i++) {\n            buf0[SPX_N + SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i];\n            buf1[SPX_N + SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i];\n            buf2[SPX_N + SPX_ADDR_BYTES + i] = in2[i] ^ bitmask2[i];\n            buf3[SPX_N + SPX_ADDR_BYTES + i] = in3[i] ^ bitmask3[i];\n        }\n\n        shake256x4(out0, out1, out2, out3, SPX_N,\n                   buf0, buf1, buf2, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n    }\n}\n"
  },
  {
    "path": "shake-avx2/thash_shake_simplex4.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"thashx4.h\"\n#include \"address.h\"\n#include \"params.h\"\n#include \"utils.h\"\n\n#include \"fips202x4.h\"\n\nextern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s);\n\n/**\n * 4-way parallel version of thash; takes 4x as much input and output\n */\nvoid thashx4(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx4[4*8])\n{\n    if (inblocks == 1 || inblocks == 2) {\n        /* As we write and read only a few quadwords, it is more efficient to\n         * build and extract from the fourway SHAKE256 state by hand. */\n        __m256i state[25];\n        for (int i = 0; i < SPX_N/8; i++) {\n            state[i] = _mm256_set1_epi64x(((int64_t*)ctx->pub_seed)[i]);\n        }\n        for (int i = 0; i < 4; i++) {\n            state[SPX_N/8+i] = _mm256_set_epi32(\n                addrx4[3*8+1+2*i],\n                addrx4[3*8+2*i],\n                addrx4[2*8+1+2*i],\n                addrx4[2*8+2*i],\n                addrx4[8+1+2*i],\n                addrx4[8+2*i],\n                addrx4[1+2*i],\n                addrx4[2*i]\n            );\n        }\n\n        for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) {\n            state[SPX_N/8+4+i] = _mm256_set_epi64x(\n                        ((int64_t*)in3)[i],\n                        ((int64_t*)in2)[i],\n                        ((int64_t*)in1)[i],\n                        ((int64_t*)in0)[i]\n                    );\n        }\n\n        /* Domain separator and padding. */\n        for (int i = (SPX_N/8)*(1+inblocks)+4; i < 16; i++) {\n            state[i] = _mm256_set1_epi64x(0);\n        }\n        state[16] = _mm256_set1_epi64x((long long)(0x80ULL << 56));\n        state[(SPX_N/8)*(1+inblocks)+4] = _mm256_xor_si256(\n            state[(SPX_N/8)*(1+inblocks)+4],\n            _mm256_set1_epi64x(0x1f)\n        );\n        for (int i = 17; i < 25; i++) {\n            state[i] = _mm256_set1_epi64x(0);\n        }\n\n        KeccakP1600times4_PermuteAll_24rounds(&state[0]);\n\n        for (int i = 0; i < SPX_N/8; i++) {\n            ((int64_t*)out0)[i] = _mm256_extract_epi64(state[i], 0);\n            ((int64_t*)out1)[i] = _mm256_extract_epi64(state[i], 1);\n            ((int64_t*)out2)[i] = _mm256_extract_epi64(state[i], 2);\n            ((int64_t*)out3)[i] = _mm256_extract_epi64(state[i], 3);\n        }\n    } else {\n        SPX_VLA(unsigned char, buf0, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, buf2, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n        SPX_VLA(unsigned char, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n\n        memcpy(buf0, ctx->pub_seed, SPX_N);\n        memcpy(buf1, ctx->pub_seed, SPX_N);\n        memcpy(buf2, ctx->pub_seed, SPX_N);\n        memcpy(buf3, ctx->pub_seed, SPX_N);\n        memcpy(buf0 + SPX_N, addrx4 + 0*8, SPX_ADDR_BYTES);\n        memcpy(buf1 + SPX_N, addrx4 + 1*8, SPX_ADDR_BYTES);\n        memcpy(buf2 + SPX_N, addrx4 + 2*8, SPX_ADDR_BYTES);\n        memcpy(buf3 + SPX_N, addrx4 + 3*8, SPX_ADDR_BYTES);\n        memcpy(buf0 + SPX_N + SPX_ADDR_BYTES, in0, inblocks * SPX_N);\n        memcpy(buf1 + SPX_N + SPX_ADDR_BYTES, in1, inblocks * SPX_N);\n        memcpy(buf2 + SPX_N + SPX_ADDR_BYTES, in2, inblocks * SPX_N);\n        memcpy(buf3 + SPX_N + SPX_ADDR_BYTES, in3, inblocks * SPX_N);\n\n        shake256x4(out0, out1, out2, out3, SPX_N,\n                   buf0, buf1, buf2, buf3, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);\n    }\n}\n"
  },
  {
    "path": "shake-avx2/thashx4.h",
    "content": "#ifndef SPX_THASHX4_H\n#define SPX_THASHX4_H\n\n#include <stdint.h>\n#include \"context.h\"\n#include \"params.h\"\n\n#define thashx4 SPX_NAMESPACE(thashx4)\nvoid thashx4(unsigned char *out0,\n             unsigned char *out1,\n             unsigned char *out2,\n             unsigned char *out3,\n             const unsigned char *in0,\n             const unsigned char *in1,\n             const unsigned char *in2,\n             const unsigned char *in3, unsigned int inblocks,\n             const spx_ctx *ctx, uint32_t addrx4[4*8]);\n\n#endif\n"
  },
  {
    "path": "shake-avx2/utilsx4.c",
    "content": "#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx4.h\"\n#include \"params.h\"\n#include \"thashx4.h\"\n#include \"address.h\"\n\n/*\n * Generate the entire Merkle tree, computing the authentication path for leaf_idx,\n * and the resulting root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE)\n *\n * This expects tree_addrx4 to be initialized to 4 parallel addr structures for\n * the Merkle tree nodes\n *\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This works by using the standard Merkle tree building algorithm, except\n * that each 'node' tracked is actually 4 consecutive nodes in the real tree.\n * When we combine two logical nodes ABCD and WXYZ, we perform the H\n * operation on adjacent real nodes, forming the parent logical node\n * (AB)(CD)(WX)(YZ)\n *\n * When we get to the top two levels of the real tree (where there is only\n * one logical node), we continue this operation two more times; the right\n * most real node will by the actual root (and the other 3 nodes will be\n * garbage).  We follow the same thashx4 logic so that the 'extract\n * authentication path components' part of the loop is still executed (and\n * to simplify the code somewhat)\n *\n * This currently assumes tree_height >= 2; I suspect that doing an adjusting\n * idx, addr_idx on the gen_leafx4 call if tree_height < 2 would fix it; since\n * we don't actually use such short trees, I haven't bothered\n */\nvoid treehashx4(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx *ctx,\n                uint32_t leaf_idx, uint32_t idx_offset,\n                uint32_t tree_height,\n                void (*gen_leafx4)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx*,\n                   uint32_t idx, void *info),\n                uint32_t tree_addrx4[4*8],\n                void *info)\n{\n    /* This is where we keep the intermediate nodes */\n    SPX_VLA(unsigned char, stackx4, tree_height * 4 * SPX_N);\n    uint32_t left_adj = 0, prev_left_adj = 0; /* When we're doing the top 3 */\n        /* levels, the left-most part of the tree isn't at the beginning */\n        /* of current[].  These give the offset of the actual start */\n\n    uint32_t idx;\n    uint32_t max_idx = (1 << (tree_height-2)) - 1;\n    for (idx = 0;; idx++) {\n        unsigned char current[4*SPX_N];   /* Current logical node */\n        gen_leafx4( current, ctx, 4*idx + idx_offset,\n                    info );\n\n        /* Now combine the freshly generated right node with previously */\n        /* generated left ones */\n        uint32_t internal_idx_offset = idx_offset;\n        uint32_t internal_idx = idx;\n        uint32_t internal_leaf = leaf_idx;\n        uint32_t h;     /* The height we are in the Merkle tree */\n        for (h=0;; h++, internal_idx >>= 1, internal_leaf >>= 1) {\n\n            /* Special processing if we're at the top of the tree */\n            if (h >= tree_height - 2) {\n                if (h == tree_height) {\n                    /* We hit the root; return it */\n                    memcpy( root, &current[3*SPX_N], SPX_N );\n                    return;\n                }\n                /* The tree indexing logic is a bit off in this case */\n                /* Adjust it so that the left-most node of the part of */\n                /* the tree that we're processing has index 0 */\n                prev_left_adj = left_adj;\n                left_adj = 4 - (1 << (tree_height - h - 1));\n            }\n\n            /* Check if we hit the top of the tree */\n            if (h == tree_height) {\n                /* We hit the root; return it */\n                memcpy( root, &current[3*SPX_N], SPX_N );\n                return;\n            }\n            \n            /*\n             * Check if one of the nodes we have is a part of the\n             * authentication path; if it is, write it out\n             */\n            if ((((internal_idx << 2) ^ internal_leaf) & ~0x3) == 0) {\n                memcpy( &auth_path[ h * SPX_N ],\n                        &current[(((internal_leaf&3)^1) + prev_left_adj) * SPX_N],\n                        SPX_N );\n            }\n\n            /*\n             * Check if we're at a left child; if so, stop going up the stack\n             * Exception: if we've reached the end of the tree, keep on going\n             * (so we combine the last 4 nodes into the one root node in two\n             * more iterations)\n             */\n            if ((internal_idx & 1) == 0 && idx < max_idx) {\n                break;\n            }\n\n            /* Ok, we're at a right node (or doing the top 3 levels) */\n            /* Now combine the left and right logical nodes together */\n\n            /* Set the address of the node we're creating. */\n            int j;\n            internal_idx_offset >>= 1;\n            for (j = 0; j < 4; j++) {\n                set_tree_height(tree_addrx4 + j*8, h + 1);\n                set_tree_index(tree_addrx4 + j*8,\n                     (4/2) * (internal_idx&~1) + j - left_adj + internal_idx_offset );\n            }\n            unsigned char *left = &stackx4[h * 4 * SPX_N];\n            thashx4( &current[0 * SPX_N],\n                     &current[1 * SPX_N],\n                     &current[2 * SPX_N],\n                     &current[3 * SPX_N],\n                     &left   [0 * SPX_N],\n                     &left   [2 * SPX_N],\n                     &current[0 * SPX_N],\n                     &current[2 * SPX_N],\n                     2, ctx, tree_addrx4);\n        }\n\n        /* We've hit a left child; save the current for when we get the */\n        /* corresponding right right */\n        memcpy( &stackx4[h * 4 * SPX_N], current, 4 * SPX_N);\n    }\n}\n"
  },
  {
    "path": "shake-avx2/utilsx4.h",
    "content": "#ifndef SPX_UTILSX4_H\n#define SPX_UTILSX4_H\n\n#include <stdint.h>\n#include \"params.h\"\n\n/**\n * For a given leaf index, computes the authentication path and the resulting\n * root node using Merkle's TreeHash algorithm.\n * Expects the layer and tree parts of the tree_addr to be set, as well as the\n * tree type (i.e. SPX_ADDR_TYPE_HASHTREE or SPX_ADDR_TYPE_FORSTREE).\n * Applies the offset idx_offset to indices before building addresses, so that\n * it is possible to continue counting indices across trees.\n *\n * This implementation uses AVX to compute internal nodes 4 at a time (in\n * parallel)\n */\n#define treehashx4 SPX_NAMESPACE(treehashx4)\nvoid treehashx4(unsigned char *root, unsigned char *auth_path,\n                const spx_ctx *ctx,\n                uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height,\n                void (*gen_leafx4)(\n                   unsigned char* /* Where to write the leaves */,\n                   const spx_ctx* /* ctx */,\n                   uint32_t addr_idx, void *info),\n                uint32_t tree_addrx4[4*8], void *info);\n\n#endif\n"
  },
  {
    "path": "shake-avx2/wots.c",
    "content": "#include <stdint.h>\n#include <string.h>\n\n#include \"utils.h\"\n#include \"utilsx4.h\"\n#include \"hash.h\"\n#include \"hashx4.h\"\n#include \"thash.h\"\n#include \"thashx4.h\"\n#include \"wots.h\"\n#include \"wotsx4.h\"\n#include \"address.h\"\n#include \"params.h\"\n\n// TODO clarify address expectations, and make them more uniform.\n// TODO i.e. do we expect types to be set already?\n// TODO and do we expect modifications or copies?\n\n/**\n * Computes up the chains\n */\nstatic void gen_chains(\n        unsigned char *out,\n        const unsigned char *in,\n        unsigned int start[SPX_WOTS_LEN],\n        unsigned int steps[SPX_WOTS_LEN],\n        const spx_ctx *ctx,\n        uint32_t addr[8])\n{\n    uint32_t i, j, k, idx, watching;\n    int done;\n    unsigned char empty[SPX_N];\n    unsigned char *bufs[4];\n    uint32_t addrs[8*4];\n\n    int l;\n    uint16_t counts[SPX_WOTS_W] = { 0 };\n    uint16_t idxs[SPX_WOTS_LEN];\n    uint16_t total, newTotal;\n\n    /* set addrs = {addr, addr, addr, addr} */\n    for (j = 0; j < 4; j++) {\n        memcpy(addrs+j*8, addr, sizeof(uint32_t) * 8);\n    }\n\n    /* Initialize out with the value at position 'start'. */\n    memcpy(out, in, SPX_WOTS_LEN*SPX_N);\n\n    /* Sort the chains in reverse order by steps using counting sort. */\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        counts[steps[i]]++;\n    }\n    total = 0;\n    for (l = SPX_WOTS_W - 1; l >= 0; l--) {\n        newTotal = counts[l] + total;\n        counts[l] = total;\n        total = newTotal;\n    }\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        idxs[counts[steps[i]]] = i;\n        counts[steps[i]]++;\n    }\n\n    /* We got our work cut out for us: do it! */\n    for (i = 0; i < SPX_WOTS_LEN; i += 4) {\n        for (j = 0; j < 4 && i+j < SPX_WOTS_LEN; j++) {\n            idx = idxs[i+j];\n            set_chain_addr(addrs+j*8, idx);\n            bufs[j] = out + SPX_N * idx;\n        }\n\n        /* As the chains are sorted in reverse order, we know that the first\n         * chain is the longest and the last one is the shortest.  We keep\n         * an eye on whether the last chain is done and then on the one before,\n         * et cetera. */\n        watching = 3;\n        done = 0;\n        while (i + watching >= SPX_WOTS_LEN) {\n            bufs[watching] = &empty[0];\n            watching--;\n        }\n\n        for (k = 0;; k++) {\n            while (k == steps[idxs[i+watching]]) {\n                bufs[watching] = &empty[0];\n                if (watching == 0) {\n                    done = 1;\n                    break;\n                }\n                watching--;\n            }\n            if (done) {\n                break;\n            }\n            for (j = 0; j < watching + 1; j++) {\n                set_hash_addr(addrs+j*8, k + start[idxs[i+j]]);\n            }\n\n            thashx4(bufs[0], bufs[1], bufs[2], bufs[3],\n                    bufs[0], bufs[1], bufs[2], bufs[3], 1, ctx, addrs);\n        }\n    }\n}\n\n/**\n * base_w algorithm as described in draft.\n * Interprets an array of bytes as integers in base w.\n * This only works when log_w is a divisor of 8.\n */\nstatic void base_w(unsigned int *output, const int out_len,\n                   const unsigned char *input)\n{\n    int in = 0;\n    int out = 0;\n    unsigned char total;\n    int bits = 0;\n    int consumed;\n\n    for (consumed = 0; consumed < out_len; consumed++) {\n        if (bits == 0) {\n            total = input[in];\n            in++;\n            bits += 8;\n        }\n        bits -= SPX_WOTS_LOGW;\n        output[out] = (total >> bits) & (SPX_WOTS_W - 1);\n        out++;\n    }\n}\n\n/* Computes the WOTS+ checksum over a message (in base_w). */\nstatic void wots_checksum(unsigned int *csum_base_w,\n                          const unsigned int *msg_base_w)\n{\n    unsigned int csum = 0;\n    unsigned char csum_bytes[(SPX_WOTS_LEN2 * SPX_WOTS_LOGW + 7) / 8];\n    unsigned int i;\n\n    /* Compute checksum. */\n    for (i = 0; i < SPX_WOTS_LEN1; i++) {\n        csum += SPX_WOTS_W - 1 - msg_base_w[i];\n    }\n\n    /* Convert checksum to base_w. */\n    /* Make sure expected empty zero bits are the least significant bits. */\n    csum = csum << ((8 - ((SPX_WOTS_LEN2 * SPX_WOTS_LOGW) % 8)) % 8);\n    ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum);\n    base_w(csum_base_w, SPX_WOTS_LEN2, csum_bytes);\n}\n\n/* Takes a message and derives the matching chain lengths. */\nvoid chain_lengths(unsigned int *lengths, const unsigned char *msg)\n{\n    base_w(lengths, SPX_WOTS_LEN1, msg);\n    wots_checksum(lengths + SPX_WOTS_LEN1, lengths);\n}\n\n/**\n * Takes a WOTS signature and an n-byte message, computes a WOTS public key.\n *\n * Writes the computed public key to 'pk'.\n */\nvoid wots_pk_from_sig(unsigned char *pk,\n                      const unsigned char *sig, const unsigned char *msg,\n                      const spx_ctx *ctx, uint32_t addr[8])\n{\n    unsigned int steps[SPX_WOTS_LEN];\n    unsigned int start[SPX_WOTS_LEN];\n    uint32_t i;\n\n    chain_lengths(start, msg);\n\n    for (i = 0; i < SPX_WOTS_LEN; i++) {\n        steps[i] = SPX_WOTS_W - 1 - start[i];\n    }\n\n    gen_chains(pk, sig, start, steps, ctx, addr);\n}\n\n/*\n * This generates 4 sequential WOTS public keys\n * It also generates the WOTS signature if leaf_info indicates\n * that we're signing with one of these WOTS keys\n */\nvoid wots_gen_leafx4(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info) {\n    struct leaf_info_x4 *info = v_info;\n    uint32_t *leaf_addr = info->leaf_addr;\n    uint32_t *pk_addr = info->pk_addr;\n    unsigned int i, j, k;\n    unsigned char pk_buffer[ 4 * SPX_WOTS_BYTES ];\n    unsigned wots_offset = SPX_WOTS_BYTES;\n    unsigned char *buffer;\n    uint32_t wots_k_mask;\n    unsigned wots_sign_index;\n\n    if (((leaf_idx ^ info->wots_sign_leaf) & ~3) == 0) {\n        /* We're traversing the leaf that's signing; generate the WOTS */\n        /* signature */\n        wots_k_mask = 0;\n        wots_sign_index = info->wots_sign_leaf & 3; /* Which of of the 4 */\n                                  /* 4 slots do the signatures come from */\n    } else {\n        /* Nope, we're just generating pk's; turn off the signature logic */\n        wots_k_mask = (uint32_t)~0;\n\t    wots_sign_index = 0;\n    }\n\n    for (j = 0; j < 4; j++) {\n        set_keypair_addr( leaf_addr + j*8, leaf_idx + j );\n        set_keypair_addr( pk_addr + j*8, leaf_idx + j );\n    }\n\n    for (i = 0, buffer = pk_buffer; i < SPX_WOTS_LEN; i++, buffer += SPX_N) {\n        uint32_t wots_k = info->wots_steps[i] | wots_k_mask; /* Set wots_k to */\n            /* the step if we're generating a signature, ~0 if we're not */\n\n        /* Start with the secret seed */\n        for (j = 0; j < 4; j++) {\n            set_chain_addr(leaf_addr + j*8, i);\n            set_hash_addr(leaf_addr + j*8, 0);\n            set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTSPRF);\n        }\n        prf_addrx4(buffer + 0*wots_offset,\n                   buffer + 1*wots_offset,\n                   buffer + 2*wots_offset,\n                   buffer + 3*wots_offset,\n                   ctx, leaf_addr);\n\n        for (j = 0; j < 4; j++) {\n            set_type(leaf_addr + j*8, SPX_ADDR_TYPE_WOTS);\n        }\n\n        /* Iterate down the WOTS chain */\n        for (k=0;; k++) {\n            /* Check if one of the values we have needs to be saved as a */\n            /* part of the WOTS signature */\n            if (k == wots_k) {\n                memcpy( info->wots_sig + i * SPX_N,\n                        buffer + wots_sign_index*wots_offset, SPX_N );\n            }\n\n            /* Check if we hit the top of the chain */\n            if (k == SPX_WOTS_W - 1) break;\n\n            /* Iterate one step on all 4 chains */\n            for (j = 0; j < 4; j++) {\n                set_hash_addr(leaf_addr + j*8, k);\n            }\n            thashx4(buffer + 0*wots_offset,\n                    buffer + 1*wots_offset,\n                    buffer + 2*wots_offset,\n                    buffer + 3*wots_offset,\n                    buffer + 0*wots_offset,\n                    buffer + 1*wots_offset,\n                    buffer + 2*wots_offset,\n                    buffer + 3*wots_offset, 1, ctx, leaf_addr);\n        }\n    }\n\n    /* Do the final thash to generate the public keys */\n    thashx4(dest + 0*SPX_N,\n            dest + 1*SPX_N,\n            dest + 2*SPX_N,\n            dest + 3*SPX_N,\n            pk_buffer + 0*wots_offset,\n            pk_buffer + 1*wots_offset,\n            pk_buffer + 2*wots_offset,\n            pk_buffer + 3*wots_offset, SPX_WOTS_LEN, ctx, pk_addr);\n}\n"
  },
  {
    "path": "shake-avx2/wotsx4.h",
    "content": "#if !defined( WOTSX4_H_ )\n#define WOTSX4_H_\n\n#include <string.h>\n#include \"params.h\"\n\n/*\n * This is here to provide an interface to the internal wots_gen_leafx4\n * routine.  While this routine is not referenced in the package outside of\n * wots.c, it is called from the stand-alone benchmark code to characterize\n * the performance\n */\nstruct leaf_info_x4 {\n    unsigned char *wots_sig;\n    uint32_t wots_sign_leaf; /* The index of the WOTS we're using to sign */\n    uint32_t *wots_steps;\n    uint32_t leaf_addr[4*8];\n    uint32_t pk_addr[4*8];\n};\n\n/* Macro to set the leaf_info to something 'benign', that is, it would */\n/* run with the same time as it does during the real signing process */\n/* Used only by the benchmark code */\n#define INITIALIZE_LEAF_INFO_X4(info, addr, step_buffer) { \\\n    info.wots_sig = 0;             \\\n    info.wots_sign_leaf = ~0;      \\\n    info.wots_steps = step_buffer; \\\n    int i;                         \\\n    for (i=0; i<4; i++) {          \\\n        memcpy( &info.leaf_addr[8*i], addr, 32 ); \\\n        memcpy( &info.pk_addr[8*i], addr, 32 ); \\\n    } \\\n}\n\n#define wots_gen_leafx4 SPX_NAMESPACE(wots_gen_leafx4)\nvoid wots_gen_leafx4(unsigned char *dest,\n                   const spx_ctx *ctx,\n                   uint32_t leaf_idx, void *v_info);\n\n#endif /* WOTSX4_H_ */\n"
  },
  {
    "path": "vectors.py",
    "content": "#! /usr/bin/env python3\n\n# Without arguments, generates sha256 sums of NIST KAT response files\n# for each of the instances (which should match SHA256SUMS.)\n#\n# With two arguments, checks whether the sha256 sum of the given\n# generated NIST KAT response file is correct, e.g.:\n#\n#       ./vectors.py sphincs-shake-128s-simple shake-avx2\n\nimport multiprocessing\nimport subprocess\nimport itertools\nimport tempfile\nimport hashlib\nimport shutil\nimport os\nimport sys\n\nfns = ['shake', 'sha2', 'haraka']\noptions = [\"f\", \"s\"]\nsizes = [128, 192, 256]\nthashes = ['robust', 'simple']\n\ndef nameFor(fn, opt, size, thash):\n    return f\"sphincs-{fn}-{size}{opt}-{thash}\"\n\ndef make(fn, opt, size, thash, bindir, impl):\n    name = nameFor(fn, opt, size, thash)\n    overrides = [f'PARAMS=sphincs-{fn}-{size}{opt}', 'THASH='+thash]\n\n    sys.stderr.write(f\"Compiling {name} …\\n\")\n    sys.stderr.flush()\n\n    subprocess.run([\"make\", \"-C\", impl, \"clean\"] + overrides,\n        stdout=subprocess.DEVNULL, stderr=sys.stderr, check=True)\n    subprocess.run([\"make\", '-j', \"-C\", impl, \"PQCgenKAT_sign\"] + overrides,\n        stdout=subprocess.DEVNULL, stderr=sys.stderr, check=True)\n\n    shutil.move(\n        os.path.join(impl, 'PQCgenKAT_sign'),\n        os.path.join(bindir, name),\n    )\n\n    return (name, size)\n\ndef run(name_size, bindir):\n    name, size = name_size\n    rsp = f'PQCsignKAT_{size//2}.rsp'\n    req = f'PQCsignKAT_{size//2}.req'\n\n    with tempfile.TemporaryDirectory() as rundir:\n        sys.stderr.write(f\"Running {name} …\\n\")\n        sys.stderr.flush()\n\n        subprocess.run([os.path.join(bindir, name)],\n            stdout=subprocess.DEVNULL, stderr=sys.stderr, cwd=rundir, check=True)\n        with open(os.path.join(rundir, rsp), 'rb') as f:\n            h = hashlib.sha256(f.read()).hexdigest()\n            return f\"{h} {name}\"\n\ndef generate_sums():\n    with tempfile.TemporaryDirectory() as bindir:\n        with multiprocessing.Pool() as pool:\n            name_sizes = []\n            for fn in fns:\n                for opt, size, thash in itertools.product(options, sizes, thashes):\n                    name_sizes.append(make(fn, opt, size, thash, bindir, 'ref'))\n\n            res = pool.starmap(run, zip(name_sizes, [bindir]*len(name_sizes)))\n            res.sort()\n            print('\\n'.join(res))\n\ndef check_sum(name, impl):\n    line = None\n    with tempfile.TemporaryDirectory() as bindir:\n        for fn in fns:\n            for opt, size, thash in itertools.product(\n                    options, sizes, thashes):\n                if nameFor(fn, opt, size, thash) != name:\n                    continue\n                name_size = make(fn, opt, size, thash, bindir, impl)\n                line = run(name_size, bindir)\n                break\n    if not line:\n        sys.stderr.write(\"No such instance\\n\")\n        sys.exit(1)\n    with open('SHA256SUMS', 'r') as f:\n        if f.read().find(line + '\\n') == -1:\n            sys.stderr.write(f\"Test vector mismatch: {line}\\n\")\n            sys.exit(2)\n        sys.stderr.write(\"ok\\n\")\n\nif __name__ == '__main__':\n    if len(sys.argv) <= 1:\n        generate_sums()\n    elif len(sys.argv) == 3:\n        check_sum(sys.argv[1], sys.argv[2])\n    else:\n        sys.stderr.write(\"Expect two or no arguments\\n\")\n        sys.exit(3)\n"
  }
]